Skip to content

Add native lock-free dynamic heap allocator #4749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 43 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
13dca77
Add `get_current_thread_id` to `base`
Feoramund Jan 20, 2025
f142043
Add virtual memory procedures to `base`
Feoramund Jan 20, 2025
a4a7766
Add native heap allocator
Feoramund Jan 21, 2025
102fc3f
Add heap allocator tests to CI
Feoramund Jan 21, 2025
6d99d95
Fix indentation
Feoramund Jan 27, 2025
17b38c1
Strengthen `Consume` to `Acquire` in heap allocator
Feoramund Jan 28, 2025
76a2877
Hoist reused calculations out into variables
Feoramund Jan 28, 2025
1e62f2c
Rearrange slab iteration conditionals
Feoramund Jan 28, 2025
4c59019
Restrict superpage allocation on Darwin to AMD64 and 2MiB
Feoramund Jan 28, 2025
ef72a37
Use `VM_INHERIT_COPY` instead
Feoramund Jan 28, 2025
e9bf350
Add heap allocator exception for Orca
Feoramund Jan 28, 2025
541443f
Don't build feoramalloc on web platforms
Feoramund Jan 28, 2025
0532b73
Simplify loop
Feoramund Feb 2, 2025
30e6006
Strengthen order to prevent reordering
Feoramund Feb 4, 2025
c52c298
Unify heap allocators
Feoramund Feb 7, 2025
e86bd35
Optimize virtual memory resizing on Darwin
Feoramund Feb 10, 2025
247ce3c
Favor rescheduling superpage cache entry instead
Feoramund Feb 10, 2025
0947914
Use `mach_task_self_` global instead
Feoramund Feb 10, 2025
4a31e1f
Ensure remapped memory has desired protection on Darwin
Feoramund Feb 10, 2025
f5226d0
Revert to unoptimized behavior on Darwin
Feoramund Feb 10, 2025
ee3d859
Remove `HEAP_PANIC_ON_FREE_NIL`
Feoramund Feb 11, 2025
8638bd3
Fix slab iteration bug in `heap_cache_register_superpage`
Feoramund Mar 24, 2025
44a7015
Fix in-place expansion of wide slabs
Feoramund Mar 28, 2025
2d0547d
Remove unneeded call to `min`
Feoramund Mar 30, 2025
acd5d0d
Fix slab iteration bug in the remote free collection phase
Feoramund Mar 31, 2025
5e773aa
Adjust heap allocator benchmarks
Feoramund Apr 8, 2025
a0425ca
Do not return superpages that had remote frees to the OS
Feoramund Apr 13, 2025
fa1f6ab
Don't atomically subtract zero from `remote_free_count`
Feoramund Apr 9, 2025
dcd9bd7
Make slab map caching linear time instead of quadratic
Feoramund Apr 7, 2025
e038c3c
Make superpage caching linear time instead of quadratic
Feoramund Apr 9, 2025
92fdb4d
Cache contiguous free slabs per heap superpage
Feoramund Apr 9, 2025
920487b
Zero only needed part of memory when changing size categories
Feoramund Apr 14, 2025
94bfdd4
Use cache-friendly array to update next free slab index
Feoramund Apr 14, 2025
2a33e1c
Forbid freeing of superpages that were not fully used once
Feoramund Apr 14, 2025
b056b9e
Rewrite the heap allocator
Feoramund May 6, 2025
78006ad
Use AddressSanitizer with new heap allocator
Feoramund May 9, 2025
acc877b
Don't free virtual memory when ThreadSanitizer is active
Feoramund May 9, 2025
674853f
Remove heap coverage code in favor of external tooling
Feoramund May 9, 2025
b8cf6ac
Fix rebase glitch
Feoramund May 9, 2025
97a08e5
Pass `argc` and `argv` for shared libraries on UNIX-likes
Feoramund May 10, 2025
4b08e8c
Support getting page sizes at runtime
Feoramund May 10, 2025
94e04c4
Don't compile assembly with `nasm` on non-x86 platforms
Feoramund May 12, 2025
f0f3d72
Fix syscall-based virtual memory allocation on NetBSD
Feoramund May 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -debug -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -65,6 +66,7 @@ jobs:
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
./odin check vendor/sdl3 -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
./odin run tests/heap_allocator -vet -strict-style -disallow-do -debug -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
Expand Down Expand Up @@ -123,6 +125,8 @@ jobs:
run: ./odin check examples/all -strict-style -vet -disallow-do
- name: Odin check vendor/sdl3
run: ./odin check vendor/sdl3 -strict-style -vet -disallow-do -no-entry-point
- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -debug -sanitize:thread -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
- name: Optimized Core library tests
Expand Down Expand Up @@ -211,6 +215,11 @@ jobs:
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point
- name: Odin heap allocator tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run tests/heap_allocator -vet -strict-style -disallow-do -debug -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests
- name: Core library tests
shell: cmd
run: |
Expand Down Expand Up @@ -305,6 +314,9 @@ jobs:
- name: Odin run -debug
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

- name: Odin heap allocator tests
run: ./odin run tests/heap_allocator -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath -debug -- -allocator=feoramalloc -vmem-tests -serial-tests -parallel-tests

- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

Expand Down
3 changes: 0 additions & 3 deletions base/runtime/default_allocators_general.odin
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
default_allocator_proc :: panic_allocator_proc
default_allocator :: panic_allocator
} else when ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
default_allocator :: default_wasm_allocator
default_allocator_proc :: wasm_allocator_proc
} else {
default_allocator :: heap_allocator
default_allocator_proc :: heap_allocator_proc
Expand Down
3 changes: 2 additions & 1 deletion base/runtime/entry_unix.odin
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import "base:intrinsics"

when ODIN_BUILD_MODE == .Dynamic {
@(link_name="_odin_entry_point", linkage="strong", require/*, link_section=".init"*/)
_odin_entry_point :: proc "c" () {
_odin_entry_point :: proc "c" (argc: i32, argv: [^]cstring) {
args__ = argv[:argc]
context = default_context()
#force_no_inline _startup_runtime()
intrinsics.__entry_point()
Expand Down
137 changes: 47 additions & 90 deletions base/runtime/heap_allocator.odin
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"
Expand All @@ -9,111 +12,65 @@ heap_allocator :: proc() -> Allocator {
}
}

heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
//
// NOTE(tetra, 2020-01-14): The heap doesn't respect alignment.
// Instead, we overallocate by `alignment + size_of(rawptr) - 1`, and insert
// padding. We also store the original pointer returned by heap_alloc right before
// the pointer we return to the user.
//

aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
// Not(flysand): We need to reserve enough space for alignment, which
// includes the user data itself, the space to store the pointer to
// allocation start, as well as the padding required to align both
// the user data and the pointer.
a := max(alignment, align_of(rawptr))
space := a-1 + size_of(rawptr) + size
allocated_mem: rawptr

force_copy := old_ptr != nil && alignment > align_of(rawptr)

if old_ptr != nil && !force_copy {
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
allocated_mem = heap_resize(original_old_ptr, space)
} else {
allocated_mem = heap_alloc(space, zero_memory)
}
aligned_mem := rawptr(([^]u8)(allocated_mem)[size_of(rawptr):])

ptr := uintptr(aligned_mem)
aligned_ptr := (ptr + uintptr(a)-1) & ~(uintptr(a)-1)
if allocated_mem == nil {
aligned_free(old_ptr)
aligned_free(allocated_mem)
heap_allocator_proc :: proc(
allocator_data: rawptr,
mode: Allocator_Mode,
size, alignment: int,
old_memory: rawptr,
old_size: int,
loc := #caller_location,
) -> ([]byte, Allocator_Error) {
assert(alignment <= ODIN_HEAP_MAX_ALIGNMENT, "Heap allocation alignment beyond ODIN_HEAP_MAX_ALIGNMENT bytes is not supported.", loc = loc)
assert(alignment >= 0, "Alignment must be greater than or equal to zero.", loc = loc)
switch mode {
case .Alloc:
// All allocations are aligned to at least their size up to
// `HEAP_MAX_ALIGNMENT`, and by virtue of binary arithmetic, any
// address aligned to N will also be aligned to N>>1.
//
// Therefore, we have no book-keeping costs for alignment.
ptr := heap_alloc(max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

aligned_mem = rawptr(aligned_ptr)
([^]rawptr)(aligned_mem)[-1] = allocated_mem

if force_copy {
mem_copy_non_overlapping(aligned_mem, old_ptr, min(old_size, size))
aligned_free(old_ptr)
}

return byte_slice(aligned_mem, size), nil
}

aligned_free :: proc(p: rawptr) {
if p != nil {
heap_free(([^]rawptr)(p)[-1])
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Alloc_Non_Zeroed:
ptr := heap_alloc(max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
}

aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
if p == nil {
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize:
ptr := heap_resize(old_memory, old_size, max(size, alignment))
if ptr == nil {
return nil, .Out_Of_Memory
}

new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return

// NOTE: heap_resize does not zero the new memory, so we do it
if zero_memory && new_size > old_size {
new_region := raw_data(new_memory[old_size:])
intrinsics.mem_zero(new_region, new_size - old_size)
return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Resize_Non_Zeroed:
ptr := heap_resize(old_memory, old_size, max(size, alignment), zero_memory = false)
if ptr == nil {
return nil, .Out_Of_Memory
}
return
}

switch mode {
case .Alloc, .Alloc_Non_Zeroed:
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)

return transmute([]byte)Raw_Slice{ data = ptr, len = size }, nil
case .Free:
aligned_free(old_memory)

heap_free(old_memory)
case .Free_All:
return nil, .Mode_Not_Implemented

case .Resize, .Resize_Non_Zeroed:
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)

case .Query_Features:
set := (^Allocator_Mode_Set)(old_memory)
if set != nil {
set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features}
set^ = {
.Alloc,
.Alloc_Non_Zeroed,
.Resize,
.Resize_Non_Zeroed,
.Free,
.Query_Features,
}
}
return nil, nil

case .Query_Info:
return nil, .Mode_Not_Implemented
}

return nil, nil
}


heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
return _heap_alloc(size, zero_memory)
}

heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
return _heap_resize(ptr, new_size)
}

heap_free :: proc "contextless" (ptr: rawptr) {
_heap_free(ptr)
}
89 changes: 89 additions & 0 deletions base/runtime/heap_allocator_control.odin
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#+build !js
#+build !orca
#+build !wasi
package runtime

import "base:intrinsics"

/*
Reduce the amount of dynamically allocated memory held by the current thread as much as possible.
*/
compact_local_heap :: proc "contextless" () {
if local_heap == nil {
return
}

heap_merge_remote_free_list()

for segment := local_heap.segments; segment != nil; /**/ {
next := segment.next_segment

segment_will_free_itself := segment.may_return
free_slabs := segment.free_slabs
max_slabs := len(segment.slabs)

for i in 0..<max_slabs {
slab := &segment.slabs[i]
if slab.bin_size > 0 && slab.free_bins == slab.max_bins {
free_slabs += 1
heap_free_slab(segment, slab)
if free_slabs == max_slabs {
// We must break now, as the segment's memory could have
// been returned to the operating system and we may
// continue iterating over invalid memory.
break
}
}
}

// We check it this way because `heap_free_slab` will automatically
// free the segment if the conditions are right, otherwise we need to
// do it.
if free_slabs == max_slabs && !segment_will_free_itself {
heap_free_segment(segment)
}

segment = next
}
}

/*
Adopt all empty segments in the orphanage and release them back to the operating system.
*/
heap_release_empty_orphans :: proc "contextless" () {
segment: ^Heap_Segment

// First, take control of the linked list by replacing it with a nil
// pointer and a zero count.
old_head := transmute(Tagged_Pointer)intrinsics.atomic_load_explicit(cast(^u64)&heap_orphanage.empty, .Relaxed)
for {
count := old_head.pointer & ODIN_HEAP_ORPHANAGE_COUNT_BITS
untagged_head := uintptr(old_head.pointer) & ~uintptr(ODIN_HEAP_ORPHANAGE_COUNT_BITS)

segment = cast(^Heap_Segment)uintptr(untagged_head)
if segment == nil {
assert_contextless(count == 0, "The heap allocator saw a nil pointer on the orphanage for empty segments but the count was not zero.")
break
}

new_head := Tagged_Pointer{
pointer = 0, // nil pointer with zero count
version = old_head.version + 1,
}

old_head_, swapped := intrinsics.atomic_compare_exchange_weak_explicit(cast(^u64)&heap_orphanage.empty, transmute(u64)old_head, transmute(u64)new_head, .Acq_Rel, .Relaxed)
if swapped {
intrinsics.atomic_store_explicit(&segment.next_segment, nil, .Release)
break
}
old_head = transmute(Tagged_Pointer)old_head_
}

// Now walk the list of segments and release them.
for segment != nil {
next := segment.next_segment
assert_contextless(segment.free_slabs == len(segment.slabs), "The heap allocator found a segment in the orphanage that should have been empty.")
free_virtual_memory(segment, segment.size)
segment = next
}
}
Loading
Loading