Skip to content

Commit 445694c

Browse files
committed
Auto merge of #141406 - RalfJung:less-force-allocate, r=oli-obk
interpret: do not force_allocate all return places A while ago I cleaned up our `PlaceTy` a little, but as a side-effect of that, return places had to always be force-allocated. That turns out to cause quite a few extra allocations, and for a project we are doing where we marry Miri with a model checker, that means a lot of extra work -- local variables are just so much easier to reason about than allocations. So, this PR brings back the ability to have the return place be just a local of the caller. To make this work cleanly I had to rework stack pop handling a bit, which also changes the output of Miri in some cases as the span for errors occurring during a particular phase of stack pop changed. With these changes, a no-std binary with a function of functions that just take and return scalar types and that uses no pointers now does not move *any* local variables into memory. :) r? `@oli-obk`
2 parents d717586 + 1d514f5 commit 445694c

File tree

10 files changed

+54
-53
lines changed

10 files changed

+54
-53
lines changed

src/concurrency/thread.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,34 +218,37 @@ impl<'tcx> Thread<'tcx> {
218218
}
219219
}
220220

221-
/// Return the top user-relevant frame, if there is one.
221+
/// Return the top user-relevant frame, if there is one. `skip` indicates how many top frames
222+
/// should be skipped.
222223
/// Note that the choice to return `None` here when there is no user-relevant frame is part of
223224
/// justifying the optimization that only pushes of user-relevant frames require updating the
224225
/// `top_user_relevant_frame` field.
225-
fn compute_top_user_relevant_frame(&self) -> Option<usize> {
226+
fn compute_top_user_relevant_frame(&self, skip: usize) -> Option<usize> {
226227
self.stack
227228
.iter()
228229
.enumerate()
229230
.rev()
231+
.skip(skip)
230232
.find_map(|(idx, frame)| if frame.extra.is_user_relevant { Some(idx) } else { None })
231233
}
232234

233-
/// Re-compute the top user-relevant frame from scratch.
234-
pub fn recompute_top_user_relevant_frame(&mut self) {
235-
self.top_user_relevant_frame = self.compute_top_user_relevant_frame();
235+
/// Re-compute the top user-relevant frame from scratch. `skip` indicates how many top frames
236+
/// should be skipped.
237+
pub fn recompute_top_user_relevant_frame(&mut self, skip: usize) {
238+
self.top_user_relevant_frame = self.compute_top_user_relevant_frame(skip);
236239
}
237240

238241
/// Set the top user-relevant frame to the given value. Must be equal to what
239242
/// `get_top_user_relevant_frame` would return!
240243
pub fn set_top_user_relevant_frame(&mut self, frame_idx: usize) {
241-
debug_assert_eq!(Some(frame_idx), self.compute_top_user_relevant_frame());
244+
debug_assert_eq!(Some(frame_idx), self.compute_top_user_relevant_frame(0));
242245
self.top_user_relevant_frame = Some(frame_idx);
243246
}
244247

245248
/// Returns the topmost frame that is considered user-relevant, or the
246249
/// top of the stack if there is no such frame, or `None` if the stack is empty.
247250
pub fn top_user_relevant_frame(&self) -> Option<usize> {
248-
debug_assert_eq!(self.top_user_relevant_frame, self.compute_top_user_relevant_frame());
251+
debug_assert_eq!(self.top_user_relevant_frame, self.compute_top_user_relevant_frame(0));
249252
// This can be called upon creation of an allocation. We create allocations while setting up
250253
// parts of the Rust runtime when we do not have any stack frames yet, so we need to handle
251254
// empty stacks.

src/eval.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,10 @@ pub fn create_ecx<'tcx>(
354354
argvs.push(arg_place.to_ref(&ecx));
355355
}
356356
// Make an array with all these pointers, in the Miri memory.
357-
let argvs_layout = ecx.layout_of(Ty::new_array(
358-
tcx,
359-
Ty::new_imm_ptr(tcx, tcx.types.u8),
360-
u64::try_from(argvs.len()).unwrap(),
361-
))?;
357+
let u8_ptr_type = Ty::new_imm_ptr(tcx, tcx.types.u8);
358+
let u8_ptr_ptr_type = Ty::new_imm_ptr(tcx, u8_ptr_type);
359+
let argvs_layout =
360+
ecx.layout_of(Ty::new_array(tcx, u8_ptr_type, u64::try_from(argvs.len()).unwrap()))?;
362361
let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?;
363362
for (idx, arg) in argvs.into_iter().enumerate() {
364363
let place = ecx.project_field(&argvs_place, idx)?;
@@ -373,10 +372,8 @@ pub fn create_ecx<'tcx>(
373372
ecx.mark_immutable(&argc_place);
374373
ecx.machine.argc = Some(argc_place.ptr());
375374

376-
let argv_place = ecx.allocate(
377-
ecx.layout_of(Ty::new_imm_ptr(tcx, tcx.types.unit))?,
378-
MiriMemoryKind::Machine.into(),
379-
)?;
375+
let argv_place =
376+
ecx.allocate(ecx.layout_of(u8_ptr_ptr_type)?, MiriMemoryKind::Machine.into())?;
380377
ecx.write_pointer(argvs_place.ptr(), &argv_place)?;
381378
ecx.mark_immutable(&argv_place);
382379
ecx.machine.argv = Some(argv_place.ptr());
@@ -398,7 +395,9 @@ pub fn create_ecx<'tcx>(
398395
}
399396
ecx.mark_immutable(&cmd_place);
400397
}
401-
ecx.mplace_to_ref(&argvs_place)?
398+
let imm = argvs_place.to_ref(&ecx);
399+
let layout = ecx.layout_of(u8_ptr_ptr_type)?;
400+
ImmTy::from_immediate(imm, layout)
402401
};
403402

404403
// Return place (in static memory so that it does not count as leak).

src/helpers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
470470
caller_fn_abi,
471471
&args.iter().map(|a| FnArg::Copy(a.clone().into())).collect::<Vec<_>>(),
472472
/*with_caller_location*/ false,
473-
&dest,
473+
&dest.into(),
474474
stack_pop,
475475
)
476476
}

src/intrinsics/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
2222
&mut self,
2323
instance: ty::Instance<'tcx>,
2424
args: &[OpTy<'tcx>],
25-
dest: &MPlaceTy<'tcx>,
25+
dest: &PlaceTy<'tcx>,
2626
ret: Option<mir::BasicBlock>,
2727
unwind: mir::UnwindAction,
2828
) -> InterpResult<'tcx, Option<ty::Instance<'tcx>>> {
@@ -45,7 +45,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
4545
let intrinsic_name = this.tcx.item_name(instance.def_id());
4646
let intrinsic_name = intrinsic_name.as_str();
4747

48-
match this.emulate_intrinsic_by_name(intrinsic_name, instance.args, args, dest, ret)? {
48+
// FIXME: avoid allocating memory
49+
let dest = this.force_allocation(dest)?;
50+
51+
match this.emulate_intrinsic_by_name(intrinsic_name, instance.args, args, &dest, ret)? {
4952
EmulateItemResult::NotSupported => {
5053
// We haven't handled the intrinsic, let's see if we can use a fallback body.
5154
if this.tcx.intrinsic(instance.def_id()).unwrap().must_be_overridden {

src/machine.rs

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,7 +1115,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
11151115
instance: ty::Instance<'tcx>,
11161116
abi: &FnAbi<'tcx, Ty<'tcx>>,
11171117
args: &[FnArg<'tcx, Provenance>],
1118-
dest: &MPlaceTy<'tcx>,
1118+
dest: &PlaceTy<'tcx>,
11191119
ret: Option<mir::BasicBlock>,
11201120
unwind: mir::UnwindAction,
11211121
) -> InterpResult<'tcx, Option<(&'tcx mir::Body<'tcx>, ty::Instance<'tcx>)>> {
@@ -1142,7 +1142,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
11421142
fn_val: DynSym,
11431143
abi: &FnAbi<'tcx, Ty<'tcx>>,
11441144
args: &[FnArg<'tcx, Provenance>],
1145-
dest: &MPlaceTy<'tcx>,
1145+
dest: &PlaceTy<'tcx>,
11461146
ret: Option<mir::BasicBlock>,
11471147
unwind: mir::UnwindAction,
11481148
) -> InterpResult<'tcx> {
@@ -1155,7 +1155,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
11551155
ecx: &mut MiriInterpCx<'tcx>,
11561156
instance: ty::Instance<'tcx>,
11571157
args: &[OpTy<'tcx>],
1158-
dest: &MPlaceTy<'tcx>,
1158+
dest: &PlaceTy<'tcx>,
11591159
ret: Option<mir::BasicBlock>,
11601160
unwind: mir::UnwindAction,
11611161
) -> InterpResult<'tcx, Option<ty::Instance<'tcx>>> {
@@ -1634,15 +1634,21 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
16341634
interp_ok(())
16351635
}
16361636

1637-
fn before_stack_pop(
1638-
ecx: &InterpCx<'tcx, Self>,
1639-
frame: &Frame<'tcx, Self::Provenance, Self::FrameExtra>,
1640-
) -> InterpResult<'tcx> {
1637+
fn before_stack_pop(ecx: &mut InterpCx<'tcx, Self>) -> InterpResult<'tcx> {
1638+
let frame = ecx.frame();
16411639
// We want this *before* the return value copy, because the return place itself is protected
16421640
// until we do `end_call` here.
16431641
if ecx.machine.borrow_tracker.is_some() {
16441642
ecx.on_stack_pop(frame)?;
16451643
}
1644+
if frame.extra.is_user_relevant {
1645+
// All that we store is whether or not the frame we just removed is local, so now we
1646+
// have no idea where the next topmost local frame is. So we recompute it.
1647+
// (If this ever becomes a bottleneck, we could have `push` store the previous
1648+
// user-relevant frame and restore that here.)
1649+
// We have to skip the frame that is just being popped.
1650+
ecx.active_thread_mut().recompute_top_user_relevant_frame(/* skip */ 1);
1651+
}
16461652
// tracing-tree can autoamtically annotate scope changes, but it gets very confused by our
16471653
// concurrency and what it prints is just plain wrong. So we print our own information
16481654
// instead. (Cc https://github.com/rust-lang/miri/issues/2266)
@@ -1656,15 +1662,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
16561662
frame: Frame<'tcx, Provenance, FrameExtra<'tcx>>,
16571663
unwinding: bool,
16581664
) -> InterpResult<'tcx, ReturnAction> {
1659-
if frame.extra.is_user_relevant {
1660-
// All that we store is whether or not the frame we just removed is local, so now we
1661-
// have no idea where the next topmost local frame is. So we recompute it.
1662-
// (If this ever becomes a bottleneck, we could have `push` store the previous
1663-
// user-relevant frame and restore that here.)
1664-
ecx.active_thread_mut().recompute_top_user_relevant_frame();
1665-
}
16661665
let res = {
1667-
// Move `frame`` into a sub-scope so we control when it will be dropped.
1666+
// Move `frame` into a sub-scope so we control when it will be dropped.
16681667
let mut frame = frame;
16691668
let timing = frame.extra.timing.take();
16701669
let res = ecx.handle_stack_pop_unwind(frame.extra, unwinding);

src/shims/foreign_items.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
4343
link_name: Symbol,
4444
abi: &FnAbi<'tcx, Ty<'tcx>>,
4545
args: &[OpTy<'tcx>],
46-
dest: &MPlaceTy<'tcx>,
46+
dest: &PlaceTy<'tcx>,
4747
ret: Option<mir::BasicBlock>,
4848
unwind: mir::UnwindAction,
4949
) -> InterpResult<'tcx, Option<(&'tcx mir::Body<'tcx>, ty::Instance<'tcx>)>> {
@@ -69,8 +69,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
6969
_ => {}
7070
}
7171

72+
// FIXME: avoid allocating memory
73+
let dest = this.force_allocation(dest)?;
74+
7275
// The rest either implements the logic, or falls back to `lookup_exported_symbol`.
73-
match this.emulate_foreign_item_inner(link_name, abi, args, dest)? {
76+
match this.emulate_foreign_item_inner(link_name, abi, args, &dest)? {
7477
EmulateItemResult::NeedsReturn => {
7578
trace!("{:?}", this.dump_place(&dest.clone().into()));
7679
this.return_to_block(ret)?;
@@ -111,7 +114,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
111114
sym: DynSym,
112115
abi: &FnAbi<'tcx, Ty<'tcx>>,
113116
args: &[OpTy<'tcx>],
114-
dest: &MPlaceTy<'tcx>,
117+
dest: &PlaceTy<'tcx>,
115118
ret: Option<mir::BasicBlock>,
116119
unwind: mir::UnwindAction,
117120
) -> InterpResult<'tcx> {

tests/fail/data_race/stack_pop_race.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ struct MakeSend(*const i32);
88
unsafe impl Send for MakeSend {}
99

1010
fn main() {
11-
race(0);
11+
race(0); //~ERROR: Data race detected between (1) non-atomic read on thread `unnamed-1` and (2) deallocation on thread `main`
1212
}
1313

1414
// Using an argument for the ptr to point to, since those do not get StorageDead.
@@ -22,5 +22,4 @@ fn race(local: i32) {
2222
thread::yield_now();
2323
// Deallocating the local (when `main` returns)
2424
// races with the read in the other thread.
25-
// Make sure the error points at this function's end, not just the call site.
26-
} //~ERROR: Data race detected between (1) non-atomic read on thread `unnamed-1` and (2) deallocation on thread `main`
25+
}

tests/fail/data_race/stack_pop_race.stderr

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
error: Undefined Behavior: Data race detected between (1) non-atomic read on thread `unnamed-ID` and (2) deallocation on thread `main` at ALLOC. (2) just happened here
22
--> tests/fail/data_race/stack_pop_race.rs:LL:CC
33
|
4-
LL | }
5-
| ^ Data race detected between (1) non-atomic read on thread `unnamed-ID` and (2) deallocation on thread `main` at ALLOC. (2) just happened here
4+
LL | race(0);
5+
| ^^^^^^^ Data race detected between (1) non-atomic read on thread `unnamed-ID` and (2) deallocation on thread `main` at ALLOC. (2) just happened here
66
|
77
help: and (1) occurred earlier here
88
--> tests/fail/data_race/stack_pop_race.rs:LL:CC
@@ -12,12 +12,7 @@ LL | let _val = unsafe { *ptr.0 };
1212
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
1313
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
1414
= note: BACKTRACE (of the first span):
15-
= note: inside `race` at tests/fail/data_race/stack_pop_race.rs:LL:CC
16-
note: inside `main`
17-
--> tests/fail/data_race/stack_pop_race.rs:LL:CC
18-
|
19-
LL | race(0);
20-
| ^^^^^^^
15+
= note: inside `main` at tests/fail/data_race/stack_pop_race.rs:LL:CC
2116

2217
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
2318

tests/fail/tail_calls/dangling-local-var.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ LL | let local = 0;
1414
help: ALLOC was deallocated here:
1515
--> tests/fail/tail_calls/dangling-local-var.rs:LL:CC
1616
|
17-
LL | become g(ptr)
18-
| ^^^^^^^^^^^^^
17+
LL | f(std::ptr::null());
18+
| ^^^^^^^^^^^^^^^^^^^
1919
= note: BACKTRACE (of the first span):
2020
= note: inside `g` at tests/fail/tail_calls/dangling-local-var.rs:LL:CC
2121
note: inside `main`

tests/pass/alloc-access-tracking.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![no_std]
22
#![no_main]
3-
//@compile-flags: -Zmiri-track-alloc-id=21 -Zmiri-track-alloc-accesses -Cpanic=abort
4-
//@normalize-stderr-test: "id 21" -> "id $$ALLOC"
3+
//@compile-flags: -Zmiri-track-alloc-id=20 -Zmiri-track-alloc-accesses -Cpanic=abort
4+
//@normalize-stderr-test: "id 20" -> "id $$ALLOC"
55
//@only-target: linux # alloc IDs differ between OSes (due to extern static allocations)
66

77
extern "Rust" {

0 commit comments

Comments
 (0)