Skip to content

Commit f71f1ea

Browse files
committed
compiler: don't queue too much AIR/MIR
Without this cap, unlucky scheduling and/or details of what pipeline stages perform best on the host machine could cause many gigabytes of MIR to be stuck in the queue. At a certain point, pause the main thread until some of the functions in flight have been processed.
1 parent b106b45 commit f71f1ea

File tree

3 files changed

+55
-0
lines changed

3 files changed

+55
-0
lines changed

src/Compilation.zig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4607,12 +4607,17 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
46074607
};
46084608
assert(zcu.pending_codegen_jobs.rmw(.Add, 1, .monotonic) > 0); // the "Code Generation" node hasn't been ended
46094609
zcu.codegen_prog_node.increaseEstimatedTotalItems(1);
4610+
// This value is used as a heuristic to avoid queueing too much AIR/MIR at once (hence
4611+
// using a lot of memory). If this would cause too many AIR bytes to be in-flight, we
4612+
// will block on the `dispatchZcuLinkTask` call below.
4613+
const air_bytes: u32 = @intCast(air.instructions.len * 5 + air.extra.items.len * 4);
46104614
if (comp.separateCodegenThreadOk()) {
46114615
// `workerZcuCodegen` takes ownership of `air`.
46124616
comp.thread_pool.spawnWgId(&comp.link_task_wait_group, workerZcuCodegen, .{ comp, func.func, air, shared_mir });
46134617
comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
46144618
.func = func.func,
46154619
.mir = shared_mir,
4620+
.air_bytes = air_bytes,
46164621
} });
46174622
} else {
46184623
{
@@ -4624,6 +4629,7 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job) JobError!void {
46244629
comp.dispatchZcuLinkTask(tid, .{ .link_func = .{
46254630
.func = func.func,
46264631
.mir = shared_mir,
4632+
.air_bytes = air_bytes,
46274633
} });
46284634
air.deinit(gpa);
46294635
}

src/link.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,11 @@ pub const ZcuTask = union(enum) {
12671267
/// the codegen job to ensure that the linker receives functions in a deterministic order,
12681268
/// allowing reproducible builds.
12691269
mir: *SharedMir,
1270+
/// This is not actually used by `doZcuTask`. Instead, `Queue` uses this value as a heuristic
1271+
/// to avoid queueing too much AIR/MIR for codegen/link at a time. Essentially, we cap the
1272+
/// total number of AIR bytes which are being processed at once, preventing unbounded memory
1273+
/// usage when AIR is produced faster than it is processed.
1274+
air_bytes: u32,
12701275

12711276
pub const SharedMir = struct {
12721277
/// This is initially `.pending`. When `value` is populated, the codegen thread will set

src/link/Queue.zig

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,21 @@ wip_zcu: std.ArrayListUnmanaged(ZcuTask),
3939
/// index into `wip_zcu` which we have reached.
4040
wip_zcu_idx: usize,
4141

42+
/// The sum of all `air_bytes` for all currently-queued `ZcuTask.link_func` tasks. Because
43+
/// MIR bytes are approximately proportional to AIR bytes, this acts to limit the amount of
44+
/// AIR and MIR which is queued for codegen and link respectively, to prevent excessive
45+
/// memory usage if analysis produces AIR faster than it can be processed by codegen/link.
46+
/// The cap is `max_air_bytes_in_flight`.
47+
/// Guarded by `mutex`.
48+
air_bytes_in_flight: u32,
49+
/// If nonzero, then a call to `enqueueZcu` is blocked waiting to add a `link_func` task, but
50+
/// cannot until `air_bytes_in_flight` is no greater than this value.
51+
/// Guarded by `mutex`.
52+
air_bytes_waiting: u32,
53+
/// After setting `air_bytes_waiting`, `enqueueZcu` will wait on this condition (with `mutex`).
54+
/// When `air_bytes_waiting` many bytes can be queued, this condition should be signaled.
55+
air_bytes_cond: std.Thread.Condition,
56+
4257
/// Guarded by `mutex`.
4358
state: union(enum) {
4459
/// The link thread is currently running or queued to run.
@@ -52,6 +67,11 @@ state: union(enum) {
5267
wait_for_mir: *ZcuTask.LinkFunc.SharedMir,
5368
},
5469

70+
/// In the worst observed case, MIR is around 50 times as large as AIR. More typically, the ratio is
71+
/// around 20. Going by that 50x multiplier, and assuming we want to consume no more than 500 MiB of
72+
/// memory on AIR/MIR, we see a limit of around 10 MiB of AIR in-flight.
73+
const max_air_bytes_in_flight = 10 * 1024 * 1024;
74+
5575
/// The initial `Queue` state, containing no tasks, expecting no prelink tasks, and with no running worker thread.
5676
/// The `pending_prelink_tasks` and `queued_prelink` fields may be modified as needed before calling `start`.
5777
pub const empty: Queue = .{
@@ -64,6 +84,9 @@ pub const empty: Queue = .{
6484
.wip_zcu = .empty,
6585
.wip_zcu_idx = 0,
6686
.state = .finished,
87+
.air_bytes_in_flight = 0,
88+
.air_bytes_waiting = 0,
89+
.air_bytes_cond = .{},
6790
};
6891
/// `lf` is needed to correctly deinit any pending `ZcuTask`s.
6992
pub fn deinit(q: *Queue, comp: *Compilation) void {
@@ -131,6 +154,16 @@ pub fn enqueueZcu(q: *Queue, comp: *Compilation, task: ZcuTask) Allocator.Error!
131154
{
132155
q.mutex.lock();
133156
defer q.mutex.unlock();
157+
// If this is a `link_func` task, we might need to wait for `air_bytes_in_flight` to fall.
158+
if (task == .link_func) {
159+
const max_in_flight = max_air_bytes_in_flight -| task.link_func.air_bytes;
160+
while (q.air_bytes_in_flight > max_in_flight) {
161+
q.air_bytes_waiting = task.link_func.air_bytes;
162+
q.air_bytes_cond.wait(&q.mutex);
163+
q.air_bytes_waiting = 0;
164+
}
165+
q.air_bytes_in_flight += task.link_func.air_bytes;
166+
}
134167
try q.queued_zcu.append(comp.gpa, task);
135168
switch (q.state) {
136169
.running, .wait_for_mir => return,
@@ -220,6 +253,17 @@ fn flushTaskQueue(tid: usize, q: *Queue, comp: *Compilation) void {
220253
return;
221254
}
222255
link.doZcuTask(comp, tid, task);
256+
if (task == .link_func) {
257+
// Decrease `air_bytes_in_flight`, since we've finished processing this MIR.
258+
q.mutex.lock();
259+
defer q.mutex.unlock();
260+
q.air_bytes_in_flight -= task.link_func.air_bytes;
261+
if (q.air_bytes_waiting != 0 and
262+
q.air_bytes_in_flight <= max_air_bytes_in_flight -| q.air_bytes_waiting)
263+
{
264+
q.air_bytes_cond.signal();
265+
}
266+
}
223267
task.deinit(comp.zcu.?);
224268
q.wip_zcu_idx += 1;
225269
}

0 commit comments

Comments
 (0)