@@ -1380,14 +1380,23 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
1380
1380
if superpage == nil {
1381
1381
continue consume_loop
1382
1382
}
1383
- // `should_remove` determines whether or not we keep the cache
1384
- // entry around, pending comparison of the consistency of the
1385
- // number of free bins we expect to have versus the free bins
1386
- // we do merge.
1383
+
1384
+ // First, we will remove the cache entry, to avoid issues with
1385
+ // freeing operations that happen simultaneously while we're
1386
+ // merging.
1387
+ //
1388
+ // For example, if we merge the freed bins in one slab while a
1389
+ // lower-index slab receives a remote free, we would otherwise
1390
+ // be unaware of that, and if the superpage was still marked as
1391
+ // being in the cache, the freeing thread would not schedule
1392
+ // it.
1387
1393
//
1388
- // This is a heuristic to help with mergers that happen during
1389
- // simultaneous remote frees.
1390
- should_remove := true
1394
+ // The performance hit should be negligible in the event that
1395
+ // we have to reschedule the superpage.
1396
+ intrinsics.atomic_store_explicit (&cache.superpages_with_remote_frees[i], nil , .Release)
1397
+ // Seq_Cst, to prevent these two from being seen out of order.
1398
+ intrinsics.atomic_store_explicit (&superpage.remote_free_set, false , .Seq_Cst)
1399
+ should_reschedule := false
1391
1400
1392
1401
for j := 0 ; j < HEAP_SLAB_COUNT; /**/ {
1393
1402
slab := heap_superpage_index_slab (superpage, j)
@@ -1396,24 +1405,14 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
1396
1405
// signals that it has remote frees. This is for the sake
1397
1406
// of speed, given how large the bitmaps can be.
1398
1407
merge_block: if bin_size > 0 && slab.free_bins == 0 && intrinsics.atomic_load_explicit (&slab.remote_free_bins_scheduled, .Acquire) > 0 {
1399
- bins_left := heap_merge_remote_frees (slab)
1400
- if bins_left != 0 {
1401
- // Here we've detected that there are still remote
1402
- // frees left, so the entry is left in the cache
1403
- // for the next round of merges.
1404
- //
1405
- // NOTE: If we were to loop back and try to
1406
- // re-merge the unmerged bins, we could end up in a
1407
- // situation where this thread is stalled under
1408
- // heavy load.
1409
- should_remove = false
1410
- }
1408
+ heap_merge_remote_frees (slab)
1411
1409
if slab.free_bins == 0 {
1412
1410
// No bins were freed at all, which is possible due
1413
1411
// to the parallel nature of this code. The freeing
1414
1412
// thread could have signalled its intent, but we
1415
1413
// merged before it had a chance to flip the
1416
1414
// necessary bit.
1415
+ should_reschedule = true
1417
1416
break merge_block
1418
1417
}
1419
1418
intrinsics.atomic_store_explicit (&slab.is_full, false , .Release)
@@ -1437,12 +1436,25 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
1437
1436
}
1438
1437
}
1439
1438
1440
- if should_remove {
1441
- // NOTE: The order of operations here is important to keep
1442
- // the cache from overflowing. The entry must first be
1443
- // removed, then the superpage has its flag cleared.
1444
- intrinsics.atomic_store_explicit (&cache.superpages_with_remote_frees[i], nil , .Release)
1445
- intrinsics.atomic_store_explicit (&superpage.remote_free_set, false , .Seq_Cst)
1439
+ if should_reschedule {
1440
+ // This is the logic found in `heap_remote_cache_add_remote_free_superpage`, simplified.
1441
+ if !intrinsics.atomic_exchange_explicit (&superpage.remote_free_set, true , .Acq_Rel) {
1442
+ cache_reschedule := local_heap_cache
1443
+ reschedule_loop: for {
1444
+ for j := 0 ; j < len (cache_reschedule.superpages_with_remote_frees); j += 1 {
1445
+ old, swapped := intrinsics.atomic_compare_exchange_strong_explicit (&cache_reschedule.superpages_with_remote_frees[j], nil , superpage, .Acq_Rel, .Relaxed)
1446
+ assert_contextless (old != superpage, " The heap allocator found a duplicate of a superpage in its superpages_with_remote_frees while rescheduling." )
1447
+ if swapped {
1448
+ intrinsics.atomic_add_explicit (&local_heap_cache.remote_free_count, 1 , .Release)
1449
+ break reschedule_loop
1450
+ }
1451
+ }
1452
+ next_cache_block := intrinsics.atomic_load_explicit (&cache_reschedule.next_cache_block, .Acquire)
1453
+ assert_contextless (next_cache_block != nil , " The heap allocator failed to find free space for a new entry in its superpages_with_remote_frees cache." )
1454
+ cache_reschedule = next_cache_block
1455
+ }
1456
+ }
1457
+ } else {
1446
1458
removed += 1
1447
1459
}
1448
1460
0 commit comments