Skip to content

Commit 9e54b5e

Browse files
committed
Improve: Types choice in search.rs
1 parent 844e3b2 commit 9e54b5e

File tree

3 files changed

+16
-38
lines changed

3 files changed

+16
-38
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"ashvardanian",
44
"cntfrq",
55
"cntvct",
6+
"codegen",
67
"colocations",
78
"combinators",
89
"Condvar",

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ bench = false
4040
rayon = "1.10"
4141
rand = "0.9"
4242
tokio = { version = "1.46.1", features = ["rt-multi-thread", "sync"] }
43-
simsimd = "6.4.10"
43+
simsimd = "6.5.0"

scripts/search.rs

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::env;
1919
use std::time::Instant;
2020

2121
use fork_union as fu;
22+
use simsimd::{Distance, SpatialSimilarity};
2223

2324
/// Embedding dimensions - fixed at compile time for better performance
2425
const EMBEDDING_DIMENSIONS: usize = 768;
@@ -32,21 +33,21 @@ type Embedding = [f32; EMBEDDING_DIMENSIONS];
3233
/// Result of a search operation - stored on stack to avoid heap allocations
3334
#[derive(Debug, Clone, Copy)]
3435
struct SearchResult {
35-
best_similarity: f32,
36+
best_similarity: Distance,
3637
best_index: usize,
3738
numa_node: usize,
3839
}
3940

4041
impl SearchResult {
4142
fn new(numa_node: usize) -> Self {
4243
Self {
43-
best_similarity: f32::NEG_INFINITY,
44+
best_similarity: Distance::NEG_INFINITY,
4445
best_index: 0,
4546
numa_node,
4647
}
4748
}
4849

49-
fn update_if_better(&mut self, similarity: f32, index: usize) {
50+
fn update_if_better(&mut self, similarity: Distance, index: usize) {
5051
if similarity > self.best_similarity {
5152
self.best_similarity = similarity;
5253
self.best_index = index;
@@ -118,32 +119,6 @@ fn create_distributed_embeddings(
118119
Some(distributed_vec)
119120
}
120121

121-
/// Fast cosine similarity using SimSIMD (placeholder for now - would need actual SimSIMD binding)
122-
#[inline]
123-
fn cosine_similarity_simd(a: &Embedding, b: &Embedding) -> f32 {
124-
// TODO: Replace with actual SimSIMD call when binding is available
125-
// For now, use optimized manual implementation
126-
let mut dot_product = 0.0f32;
127-
let mut norm_a = 0.0f32;
128-
let mut norm_b = 0.0f32;
129-
130-
// Unroll loop for better vectorization
131-
for i in (0..EMBEDDING_DIMENSIONS).step_by(4) {
132-
let end = (i + 4).min(EMBEDDING_DIMENSIONS);
133-
for j in i..end {
134-
dot_product += a[j] * b[j];
135-
norm_a += a[j] * a[j];
136-
norm_b += b[j] * b[j];
137-
}
138-
}
139-
140-
if norm_a == 0.0 || norm_b == 0.0 {
141-
0.0
142-
} else {
143-
dot_product / (norm_a.sqrt() * norm_b.sqrt())
144-
}
145-
}
146-
147122
/// Performs NUMA-aware search using Fork Union's for_threads API for optimal colocation
148123
fn numa_aware_search(
149124
storage: &DistributedEmbeddings,
@@ -187,7 +162,7 @@ fn numa_aware_search(
187162
// Search vectors assigned to this thread
188163
for local_vector_idx in range {
189164
if let Some(vector) = node_vectors.get(local_vector_idx) {
190-
let similarity = cosine_similarity_simd(query, vector);
165+
let similarity = f32::cosine(query, vector).unwrap();
191166
// Convert local index to global round-robin index using the new method
192167
let global_index =
193168
storage.local_to_global_index(colocation_index, local_vector_idx);
@@ -251,7 +226,7 @@ fn worst_case_search(
251226
// Search vectors assigned to this thread, regardless of NUMA locality
252227
for local_vector_idx in range {
253228
if let Some(vector) = node_vectors.get(local_vector_idx) {
254-
let similarity = cosine_similarity_simd(query, vector);
229+
let similarity = f32::cosine(query, vector).unwrap();
255230
// Convert to global index for consistent comparison
256231
let global_index =
257232
storage.local_to_global_index(numa_node, local_vector_idx);
@@ -288,7 +263,7 @@ fn benchmark_search<F>(
288263
println!("\n=== {} ===", name);
289264

290265
let start = Instant::now();
291-
let mut total_similarity = 0.0f32;
266+
let mut total_similarity: Distance = 0.0;
292267

293268
for (i, query) in queries.iter().enumerate() {
294269
let result = search_fn(storage, query, pool);
@@ -304,7 +279,7 @@ fn benchmark_search<F>(
304279
}
305280

306281
let duration = start.elapsed();
307-
let avg_similarity = total_similarity / queries.len() as f32;
282+
let avg_similarity = total_similarity / queries.len() as Distance;
308283

309284
println!(
310285
"Completed {} queries in {:.2}ms",
@@ -351,7 +326,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
351326
let mut pool = fu::ThreadPool::try_spawn(threads)?;
352327

353328
// Initialize NUMA-aware vector storage
354-
println!("\n📚 Initializing vector storage...");
329+
println!("");
330+
println!("📚 Initializing vector storage...");
355331
let storage = create_distributed_embeddings(&mut pool, memory_scope_percent)
356332
.ok_or("Failed to initialize NUMA vector storage")?;
357333
println!(
@@ -362,7 +338,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
362338

363339
// Generate random queries with fixed-size vectors
364340
let query_count = 100; // Fixed number of queries for consistent benchmarking
365-
println!("\n🎯 Generating {} random queries...", query_count);
341+
println!("");
342+
println!("🎯 Generating {} random queries...", query_count);
366343
let mut rng = rng();
367344
let mut queries = Vec::with_capacity(query_count);
368345

@@ -391,7 +368,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
391368
worst_case_search,
392369
);
393370

394-
println!("\n✅ Search benchmarking completed!");
395-
println!("Note: SimSIMD integration is ready for implementation when bindings are available.");
371+
println!("");
372+
println!("✅ Search benchmarking completed!");
396373
Ok(())
397374
}

0 commit comments

Comments
 (0)