@@ -103,6 +103,12 @@ pub fn RefCountedSet(
103
103
/// unlikely. Roughly a (1/table_cap)^32 -- with any normal
104
104
/// table capacity that is so unlikely that it's not worth
105
105
/// handling.
106
+ ///
107
+ /// However, that assumes a uniform hash function, which
108
+ /// is not guaranteed and can be subverted with a crafted
109
+ /// input. We handle this gracefully by returning an error
110
+ /// anywhere where we're about to insert if there's any
111
+ /// item with a PSL in the last slot of the stats array.
106
112
psl_stats : [32 ]Id = [_ ]Id {0 } ** 32 ,
107
113
108
114
/// The backing store of items
@@ -237,6 +243,16 @@ pub fn RefCountedSet(
237
243
return id ;
238
244
}
239
245
246
+ // While it should be statistically impossible to exceed the
247
+ // bounds of `psl_stats`, the hash function is not perfect and
248
+ // in such a case we want to remain stable. If we're about to
249
+ // insert an item and there's something with a PSL of `len - 1`,
250
+ // we may end up with a PSL of `len` which would exceed the bounds.
251
+ // In such a case, we claim to be out of memory.
252
+ if (self .psl_stats [self .psl_stats .len - 1 ] > 0 ) {
253
+ return AddError .OutOfMemory ;
254
+ }
255
+
240
256
// If the item doesn't exist, we need an available ID.
241
257
if (self .next_id >= self .layout .cap ) {
242
258
// Arbitrarily chosen, threshold for rehashing.
@@ -284,6 +300,11 @@ pub fn RefCountedSet(
284
300
285
301
if (id < self .next_id ) {
286
302
if (items [id ].meta .ref == 0 ) {
303
+ // See comment in `addContext` for details.
304
+ if (self .psl_stats [self .psl_stats .len - 1 ] > 0 ) {
305
+ return AddError .OutOfMemory ;
306
+ }
307
+
287
308
self .deleteItem (base , id , ctx );
288
309
289
310
const added_id = self .upsert (base , value , id , ctx );
@@ -419,7 +440,7 @@ pub fn RefCountedSet(
419
440
420
441
if (item .meta .bucket > self .layout .table_cap ) return ;
421
442
422
- if (table [item .meta .bucket ] != id ) return ;
443
+ assert (table [item .meta .bucket ] == id );
423
444
424
445
if (comptime @hasDecl (Context , "deleted" )) {
425
446
// Inform the context struct that we're
@@ -449,6 +470,8 @@ pub fn RefCountedSet(
449
470
}
450
471
451
472
table [p ] = 0 ;
473
+
474
+ self .assertIntegrity (base , ctx );
452
475
}
453
476
454
477
/// Find an item in the table and return its ID.
@@ -463,7 +486,7 @@ pub fn RefCountedSet(
463
486
const hash : u64 = ctx .hash (value );
464
487
465
488
for (0.. self .max_psl + 1 ) | i | {
466
- const p : usize = @intCast ((hash + i ) & self .layout .table_mask );
489
+ const p : usize = @intCast ((hash +% i ) & self .layout .table_mask );
467
490
const id = table [p ];
468
491
469
492
// Empty bucket, our item cannot have probed to
@@ -538,11 +561,10 @@ pub fn RefCountedSet(
538
561
var held_id : Id = new_id ;
539
562
var held_item : * Item = & new_item ;
540
563
541
- var chosen_p : ? Id = null ;
542
564
var chosen_id : Id = new_id ;
543
565
544
566
for (0.. self .layout .table_cap - 1 ) | i | {
545
- const p : Id = @intCast ((hash + i ) & self .layout .table_mask );
567
+ const p : Id = @intCast ((hash +% i ) & self .layout .table_mask );
546
568
const id = table [p ];
547
569
548
570
// Empty bucket, put our held item in to it and break.
@@ -557,48 +579,43 @@ pub fn RefCountedSet(
557
579
const item = & items [id ];
558
580
559
581
// If there's a dead item then we resurrect it
560
- // for our value so that we can re-use its ID.
582
+ // for our value so that we can re-use its ID,
583
+ // unless its ID is greater than the one we're
584
+ // given (i.e. prefer smaller IDs).
561
585
if (item .meta .ref == 0 ) {
562
586
if (comptime @hasDecl (Context , "deleted" )) {
563
587
// Inform the context struct that we're
564
588
// deleting the dead item's value for good.
565
589
ctx .deleted (item .value );
566
590
}
567
591
568
- chosen_id = id ;
592
+ // Reap the dead item.
593
+ self .psl_stats [item .meta .psl ] -= 1 ;
594
+ item .* = .{};
595
+
596
+ // Only resurrect this item if it has a
597
+ // smaller id than the one we were given.
598
+ if (id < new_id ) chosen_id = id ;
569
599
600
+ // Put the currently held item in to the
601
+ // bucket of the item that we just reaped.
602
+ table [p ] = held_id ;
570
603
held_item .meta .bucket = p ;
571
- self .psl_stats [item .meta .psl ] -= 1 ;
572
604
self .psl_stats [held_item .meta .psl ] += 1 ;
573
605
self .max_psl = @max (self .max_psl , held_item .meta .psl );
574
606
575
- // If we're not still holding our new item then we
576
- // need to make sure that we put the re-used ID in
577
- // the right place, where we previously put new_id.
578
- if (chosen_p ) | c | {
579
- table [c ] = id ;
580
- table [p ] = held_id ;
581
- } else {
582
- // If we're still holding our new item then we
583
- // don't actually have to do anything, because
584
- // the table already has the correct ID here.
585
- }
586
-
587
607
break ;
588
608
}
589
609
590
610
// This item has a lower PSL, swap it out with our held item.
591
611
if (item .meta .psl < held_item .meta .psl ) {
592
- if (held_id == new_id ) {
593
- chosen_p = p ;
594
- new_item .meta .bucket = p ;
595
- }
596
-
612
+ // Put our held item in the bucket.
597
613
table [p ] = held_id ;
598
- items [ held_id ] .meta .bucket = p ;
614
+ held_item .meta .bucket = p ;
599
615
self .psl_stats [held_item .meta .psl ] += 1 ;
600
616
self .max_psl = @max (self .max_psl , held_item .meta .psl );
601
617
618
+ // Pick up the item that has a lower PSL.
602
619
held_id = id ;
603
620
held_item = item ;
604
621
self .psl_stats [item .meta .psl ] -= 1 ;
@@ -608,8 +625,60 @@ pub fn RefCountedSet(
608
625
held_item .meta .psl += 1 ;
609
626
}
610
627
628
+ // Our chosen ID may have changed if we decided
629
+ // to re-use a dead item's ID, so we make sure
630
+ // the chosen bucket contains the correct ID.
631
+ table [new_item .meta .bucket ] = chosen_id ;
632
+
633
+ // Finally place our new item in to our array.
611
634
items [chosen_id ] = new_item ;
635
+
636
+ self .assertIntegrity (base , ctx );
637
+
612
638
return chosen_id ;
613
639
}
640
+
641
+ fn assertIntegrity (
642
+ self : * const Self ,
643
+ base : anytype ,
644
+ ctx : Context ,
645
+ ) void {
646
+ // Disabled because this is excessively slow, only enable
647
+ // if debugging a RefCountedSet issue or modifying its logic.
648
+ if (false and std .debug .runtime_safety ) {
649
+ const table = self .table .ptr (base );
650
+ const items = self .items .ptr (base );
651
+
652
+ var psl_stats : [32 ]Id = [_ ]Id {0 } ** 32 ;
653
+
654
+ for (items [0.. self .layout .cap ], 0.. ) | item , id | {
655
+ if (item .meta .bucket < std .math .maxInt (Id )) {
656
+ assert (table [item .meta .bucket ] == id );
657
+ psl_stats [item .meta .psl ] += 1 ;
658
+ }
659
+ }
660
+
661
+ std .testing .expectEqualSlices (Id , & psl_stats , & self .psl_stats ) catch assert (false );
662
+
663
+ assert (std .mem .eql (Id , & psl_stats , & self .psl_stats ));
664
+
665
+ psl_stats = [_ ]Id {0 } ** 32 ;
666
+
667
+ for (table [0.. self .layout .table_cap ], 0.. ) | id , bucket | {
668
+ const item = items [id ];
669
+ if (item .meta .bucket < std .math .maxInt (Id )) {
670
+ assert (item .meta .bucket == bucket );
671
+
672
+ const hash : u64 = ctx .hash (item .value );
673
+ const p : usize = @intCast ((hash +% item .meta .psl ) & self .layout .table_mask );
674
+ assert (p == bucket );
675
+
676
+ psl_stats [item .meta .psl ] += 1 ;
677
+ }
678
+ }
679
+
680
+ std .testing .expectEqualSlices (Id , & psl_stats , & self .psl_stats ) catch assert (false );
681
+ }
682
+ }
614
683
};
615
684
}
0 commit comments