Skip to content

Commit 22ec9d9

Browse files
committed
Favor rescheduling superpage cache entry instead
This should be more robust during merges that happen simultaneously with remote frees. Previously, we would preserve the cache entry, but there is an edge case where a slab of a lower index could be remotely freed and missed during a merge.
1 parent f755b9b commit 22ec9d9

File tree

1 file changed

+37
-25
lines changed

1 file changed

+37
-25
lines changed

base/runtime/heap_allocator_implementation.odin

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1380,14 +1380,23 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
13801380
if superpage == nil {
13811381
continue consume_loop
13821382
}
1383-
// `should_remove` determines whether or not we keep the cache
1384-
// entry around, pending comparison of the consistency of the
1385-
// number of free bins we expect to have versus the free bins
1386-
// we do merge.
1383+
1384+
// First, we will remove the cache entry, to avoid issues with
1385+
// freeing operations that happen simultaneously while we're
1386+
// merging.
1387+
//
1388+
// For example, if we merge the freed bins in one slab while a
1389+
// lower-index slab receives a remote free, we would otherwise
1390+
// be unaware of that, and if the superpage was still marked as
1391+
// being in the cache, the freeing thread would not schedule
1392+
// it.
13871393
//
1388-
// This is a heuristic to help with mergers that happen during
1389-
// simultaneous remote frees.
1390-
should_remove := true
1394+
// The performance hit should be negligible in the event that
1395+
// we have to reschedule the superpage.
1396+
intrinsics.atomic_store_explicit(&cache.superpages_with_remote_frees[i], nil, .Release)
1397+
// Seq_Cst, to prevent these two from being seen out of order.
1398+
intrinsics.atomic_store_explicit(&superpage.remote_free_set, false, .Seq_Cst)
1399+
should_reschedule := false
13911400

13921401
for j := 0; j < HEAP_SLAB_COUNT; /**/ {
13931402
slab := heap_superpage_index_slab(superpage, j)
@@ -1396,24 +1405,14 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
13961405
// signals that it has remote frees. This is for the sake
13971406
// of speed, given how large the bitmaps can be.
13981407
merge_block: if bin_size > 0 && slab.free_bins == 0 && intrinsics.atomic_load_explicit(&slab.remote_free_bins_scheduled, .Acquire) > 0 {
1399-
bins_left := heap_merge_remote_frees(slab)
1400-
if bins_left != 0 {
1401-
// Here we've detected that there are still remote
1402-
// frees left, so the entry is left in the cache
1403-
// for the next round of merges.
1404-
//
1405-
// NOTE: If we were to loop back and try to
1406-
// re-merge the unmerged bins, we could end up in a
1407-
// situation where this thread is stalled under
1408-
// heavy load.
1409-
should_remove = false
1410-
}
1408+
heap_merge_remote_frees(slab)
14111409
if slab.free_bins == 0 {
14121410
// No bins were freed at all, which is possible due
14131411
// to the parallel nature of this code. The freeing
14141412
// thread could have signalled its intent, but we
14151413
// merged before it had a chance to flip the
14161414
// necessary bit.
1415+
should_reschedule = true
14171416
break merge_block
14181417
}
14191418
intrinsics.atomic_store_explicit(&slab.is_full, false, .Release)
@@ -1437,12 +1436,25 @@ heap_alloc :: proc "contextless" (size: int, zero_memory: bool = true) -> (ptr:
14371436
}
14381437
}
14391438

1440-
if should_remove {
1441-
// NOTE: The order of operations here is important to keep
1442-
// the cache from overflowing. The entry must first be
1443-
// removed, then the superpage has its flag cleared.
1444-
intrinsics.atomic_store_explicit(&cache.superpages_with_remote_frees[i], nil, .Release)
1445-
intrinsics.atomic_store_explicit(&superpage.remote_free_set, false, .Seq_Cst)
1439+
if should_reschedule {
1440+
// This is the logic found in `heap_remote_cache_add_remote_free_superpage`, simplified.
1441+
if !intrinsics.atomic_exchange_explicit(&superpage.remote_free_set, true, .Acq_Rel) {
1442+
cache_reschedule := local_heap_cache
1443+
reschedule_loop: for {
1444+
for j := 0; j < len(cache_reschedule.superpages_with_remote_frees); j += 1 {
1445+
old, swapped := intrinsics.atomic_compare_exchange_strong_explicit(&cache_reschedule.superpages_with_remote_frees[j], nil, superpage, .Acq_Rel, .Relaxed)
1446+
assert_contextless(old != superpage, "The heap allocator found a duplicate of a superpage in its superpages_with_remote_frees while rescheduling.")
1447+
if swapped {
1448+
intrinsics.atomic_add_explicit(&local_heap_cache.remote_free_count, 1, .Release)
1449+
break reschedule_loop
1450+
}
1451+
}
1452+
next_cache_block := intrinsics.atomic_load_explicit(&cache_reschedule.next_cache_block, .Acquire)
1453+
assert_contextless(next_cache_block != nil, "The heap allocator failed to find free space for a new entry in its superpages_with_remote_frees cache.")
1454+
cache_reschedule = next_cache_block
1455+
}
1456+
}
1457+
} else {
14461458
removed += 1
14471459
}
14481460

0 commit comments

Comments
 (0)