@@ -19,6 +19,7 @@ use std::env;
19
19
use std:: time:: Instant ;
20
20
21
21
use fork_union as fu;
22
+ use simsimd:: { Distance , SpatialSimilarity } ;
22
23
23
24
/// Embedding dimensions - fixed at compile time for better performance
24
25
const EMBEDDING_DIMENSIONS : usize = 768 ;
@@ -32,21 +33,21 @@ type Embedding = [f32; EMBEDDING_DIMENSIONS];
32
33
/// Result of a search operation - stored on stack to avoid heap allocations
33
34
#[ derive( Debug , Clone , Copy ) ]
34
35
struct SearchResult {
35
- best_similarity : f32 ,
36
+ best_similarity : Distance ,
36
37
best_index : usize ,
37
38
numa_node : usize ,
38
39
}
39
40
40
41
impl SearchResult {
41
42
fn new ( numa_node : usize ) -> Self {
42
43
Self {
43
- best_similarity : f32 :: NEG_INFINITY ,
44
+ best_similarity : Distance :: NEG_INFINITY ,
44
45
best_index : 0 ,
45
46
numa_node,
46
47
}
47
48
}
48
49
49
- fn update_if_better ( & mut self , similarity : f32 , index : usize ) {
50
+ fn update_if_better ( & mut self , similarity : Distance , index : usize ) {
50
51
if similarity > self . best_similarity {
51
52
self . best_similarity = similarity;
52
53
self . best_index = index;
@@ -118,32 +119,6 @@ fn create_distributed_embeddings(
118
119
Some ( distributed_vec)
119
120
}
120
121
121
- /// Fast cosine similarity using SimSIMD (placeholder for now - would need actual SimSIMD binding)
122
- #[ inline]
123
- fn cosine_similarity_simd ( a : & Embedding , b : & Embedding ) -> f32 {
124
- // TODO: Replace with actual SimSIMD call when binding is available
125
- // For now, use optimized manual implementation
126
- let mut dot_product = 0.0f32 ;
127
- let mut norm_a = 0.0f32 ;
128
- let mut norm_b = 0.0f32 ;
129
-
130
- // Unroll loop for better vectorization
131
- for i in ( 0 ..EMBEDDING_DIMENSIONS ) . step_by ( 4 ) {
132
- let end = ( i + 4 ) . min ( EMBEDDING_DIMENSIONS ) ;
133
- for j in i..end {
134
- dot_product += a[ j] * b[ j] ;
135
- norm_a += a[ j] * a[ j] ;
136
- norm_b += b[ j] * b[ j] ;
137
- }
138
- }
139
-
140
- if norm_a == 0.0 || norm_b == 0.0 {
141
- 0.0
142
- } else {
143
- dot_product / ( norm_a. sqrt ( ) * norm_b. sqrt ( ) )
144
- }
145
- }
146
-
147
122
/// Performs NUMA-aware search using Fork Union's for_threads API for optimal colocation
148
123
fn numa_aware_search (
149
124
storage : & DistributedEmbeddings ,
@@ -187,7 +162,7 @@ fn numa_aware_search(
187
162
// Search vectors assigned to this thread
188
163
for local_vector_idx in range {
189
164
if let Some ( vector) = node_vectors. get ( local_vector_idx) {
190
- let similarity = cosine_similarity_simd ( query, vector) ;
165
+ let similarity = f32 :: cosine ( query, vector) . unwrap ( ) ;
191
166
// Convert local index to global round-robin index using the new method
192
167
let global_index =
193
168
storage. local_to_global_index ( colocation_index, local_vector_idx) ;
@@ -251,7 +226,7 @@ fn worst_case_search(
251
226
// Search vectors assigned to this thread, regardless of NUMA locality
252
227
for local_vector_idx in range {
253
228
if let Some ( vector) = node_vectors. get ( local_vector_idx) {
254
- let similarity = cosine_similarity_simd ( query, vector) ;
229
+ let similarity = f32 :: cosine ( query, vector) . unwrap ( ) ;
255
230
// Convert to global index for consistent comparison
256
231
let global_index =
257
232
storage. local_to_global_index ( numa_node, local_vector_idx) ;
@@ -288,7 +263,7 @@ fn benchmark_search<F>(
288
263
println ! ( "\n === {} ===" , name) ;
289
264
290
265
let start = Instant :: now ( ) ;
291
- let mut total_similarity = 0.0f32 ;
266
+ let mut total_similarity: Distance = 0.0 ;
292
267
293
268
for ( i, query) in queries. iter ( ) . enumerate ( ) {
294
269
let result = search_fn ( storage, query, pool) ;
@@ -304,7 +279,7 @@ fn benchmark_search<F>(
304
279
}
305
280
306
281
let duration = start. elapsed ( ) ;
307
- let avg_similarity = total_similarity / queries. len ( ) as f32 ;
282
+ let avg_similarity = total_similarity / queries. len ( ) as Distance ;
308
283
309
284
println ! (
310
285
"Completed {} queries in {:.2}ms" ,
@@ -351,7 +326,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
351
326
let mut pool = fu:: ThreadPool :: try_spawn ( threads) ?;
352
327
353
328
// Initialize NUMA-aware vector storage
354
- println ! ( "\n 📚 Initializing vector storage..." ) ;
329
+ println ! ( "" ) ;
330
+ println ! ( "📚 Initializing vector storage..." ) ;
355
331
let storage = create_distributed_embeddings ( & mut pool, memory_scope_percent)
356
332
. ok_or ( "Failed to initialize NUMA vector storage" ) ?;
357
333
println ! (
@@ -362,7 +338,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
362
338
363
339
// Generate random queries with fixed-size vectors
364
340
let query_count = 100 ; // Fixed number of queries for consistent benchmarking
365
- println ! ( "\n 🎯 Generating {} random queries..." , query_count) ;
341
+ println ! ( "" ) ;
342
+ println ! ( "🎯 Generating {} random queries..." , query_count) ;
366
343
let mut rng = rng ( ) ;
367
344
let mut queries = Vec :: with_capacity ( query_count) ;
368
345
@@ -391,7 +368,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
391
368
worst_case_search,
392
369
) ;
393
370
394
- println ! ( "\n ✅ Search benchmarking completed! " ) ;
395
- println ! ( "Note: SimSIMD integration is ready for implementation when bindings are available. " ) ;
371
+ println ! ( "" ) ;
372
+ println ! ( "✅ Search benchmarking completed! " ) ;
396
373
Ok ( ( ) )
397
374
}
0 commit comments