Skip to content

Commit e7407d9

Browse files
committed
TOS caching for the interpreter. Work in progress.
1 parent ff2cb21 commit e7407d9

File tree

11 files changed

+5600
-826
lines changed

11 files changed

+5600
-826
lines changed

Include/internal/pycore_interpframe.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) {
4343
}
4444

4545
static inline _PyStackRef *_PyFrame_Stackbase(_PyInterpreterFrame *f) {
46-
return (f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus);
46+
return (f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus + 1);
4747
}
4848

4949
static inline _PyStackRef _PyFrame_StackPeek(_PyInterpreterFrame *f) {
@@ -134,7 +134,7 @@ _PyFrame_Initialize(
134134
frame->f_builtins = func_obj->func_builtins;
135135
frame->f_globals = func_obj->func_globals;
136136
frame->f_locals = locals;
137-
frame->stackpointer = frame->localsplus + code->co_nlocalsplus;
137+
frame->stackpointer = frame->localsplus + code->co_nlocalsplus + 1;
138138
frame->frame_obj = NULL;
139139
#ifdef Py_GIL_DISABLED
140140
_PyFrame_InitializeTLBC(tstate, frame, code);
@@ -146,6 +146,7 @@ _PyFrame_Initialize(
146146
frame->owner = FRAME_OWNED_BY_THREAD;
147147
frame->visited = 0;
148148
#ifdef Py_DEBUG
149+
frame->localsplus[code->co_nlocalsplus] = PyStackRef_NULL;
149150
frame->lltrace = 0;
150151
#endif
151152

@@ -317,7 +318,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
317318
#endif
318319
frame->f_locals = NULL;
319320
assert(stackdepth <= code->co_stacksize);
320-
frame->stackpointer = frame->localsplus + code->co_nlocalsplus + stackdepth;
321+
frame->stackpointer = frame->localsplus + code->co_nlocalsplus + stackdepth + 1;
321322
frame->frame_obj = NULL;
322323
#ifdef Py_GIL_DISABLED
323324
_PyFrame_InitializeTLBC(tstate, frame, code);

Objects/codeobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
546546
/* derived values */
547547
co->co_nlocalsplus = nlocalsplus;
548548
co->co_nlocals = nlocals;
549-
co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE;
549+
co->co_framesize = nlocalsplus + con->stacksize + FRAME_SPECIALS_SIZE + 1;
550550
co->co_ncellvars = ncellvars;
551551
co->co_nfreevars = nfreevars;
552552
#ifdef Py_GIL_DISABLED

Python/bytecodes.c

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ dummy_func(
204204
ptrdiff_t off = this_instr - _PyFrame_GetBytecode(frame);
205205
frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index;
206206
frame->instr_ptr = bytecode + off;
207-
// Make sure this_instr gets reset correctley for any uops that
207+
// Make sure this_instr gets reset correctly for any uops that
208208
// follow
209209
next_instr = frame->instr_ptr;
210210
DISPATCH();
@@ -874,18 +874,19 @@ dummy_func(
874874
// Deopt unless 0 <= sub < PyList_Size(list)
875875
DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
876876
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
877-
#ifdef Py_GIL_DISABLED
878-
PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
879-
DEOPT_IF(res_o == NULL);
880-
STAT_INC(BINARY_OP, hit);
881-
res = PyStackRef_FromPyObjectSteal(res_o);
882-
#else
883-
DEOPT_IF(index >= PyList_GET_SIZE(list));
884-
STAT_INC(BINARY_OP, hit);
885-
PyObject *res_o = PyList_GET_ITEM(list, index);
886-
assert(res_o != NULL);
887-
res = PyStackRef_FromPyObjectNew(res_o);
888-
#endif
877+
if (Py_FREE_THREADING) {
878+
PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
879+
DEOPT_IF(res_o == NULL);
880+
STAT_INC(BINARY_OP, hit);
881+
res = PyStackRef_FromPyObjectSteal(res_o);
882+
}
883+
else {
884+
DEOPT_IF(index >= PyList_GET_SIZE(list));
885+
STAT_INC(BINARY_OP, hit);
886+
PyObject *res_o = PyList_GET_ITEM(list, index);
887+
assert(res_o != NULL);
888+
res = PyStackRef_FromPyObjectNew(res_o);
889+
}
889890
STAT_INC(BINARY_OP, hit);
890891
DECREF_INPUTS();
891892
}
@@ -1111,7 +1112,6 @@ dummy_func(
11111112
tstate->current_frame = frame->previous;
11121113
assert(!_PyErr_Occurred(tstate));
11131114
PyObject *result = PyStackRef_AsPyObjectSteal(retval);
1114-
SYNC_SP(); /* Not strictly necessary, but prevents warnings */
11151115
return result;
11161116
}
11171117

@@ -3168,26 +3168,31 @@ dummy_func(
31683168
assert(Py_TYPE(iter_o) == &PyListIter_Type);
31693169
PyListObject *seq = it->it_seq;
31703170
assert(seq);
3171+
// The code generator doesn't understand #ifdef Py_GIL_DISABLED
3172+
// so put in some control flow
3173+
if (Py_FREE_THREADING) {
31713174
#ifdef Py_GIL_DISABLED
3172-
assert(_PyObject_IsUniquelyReferenced(iter_o));
3173-
assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
3174-
_PyObject_GC_IS_SHARED(seq));
3175-
STAT_INC(FOR_ITER, hit);
3176-
int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
3177-
// A negative result means we lost a race with another thread
3178-
// and we need to take the slow path.
3179-
DEOPT_IF(result < 0);
3180-
if (result == 0) {
3181-
it->it_index = -1;
3182-
/* Jump forward oparg, then skip following END_FOR instruction */
3183-
JUMPBY(oparg + 1);
3184-
DISPATCH();
3185-
}
3186-
it->it_index++;
3187-
#else
3188-
assert(it->it_index < PyList_GET_SIZE(seq));
3189-
next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++));
3175+
assert(_PyObject_IsUniquelyReferenced(iter_o));
3176+
assert(_Py_IsOwnedByCurrentThread((PyObject *)seq) ||
3177+
_PyObject_GC_IS_SHARED(seq));
3178+
STAT_INC(FOR_ITER, hit);
3179+
int result = _PyList_GetItemRefNoLock(seq, it->it_index, &next);
3180+
// A negative result means we lost a race with another thread
3181+
// and we need to take the slow path.
3182+
DEOPT_IF(result < 0);
3183+
if (result == 0) {
3184+
it->it_index = -1;
3185+
/* Jump forward oparg, then skip following END_FOR instruction */
3186+
JUMPBY(oparg + 1);
3187+
DISPATCH();
3188+
}
3189+
it->it_index++;
31903190
#endif
3191+
}
3192+
else {
3193+
assert(it->it_index < PyList_GET_SIZE(seq));
3194+
next = PyStackRef_FromPyObjectNew(PyList_GET_ITEM(seq, it->it_index++));
3195+
}
31913196
}
31923197

31933198
// Only used by Tier 2
@@ -3994,8 +3999,9 @@ dummy_func(
39943999
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
39954000
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
39964001
assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
4002+
shim->localsplus[0] = PyStackRef_NULL;
39974003
/* Push self onto stack of shim */
3998-
shim->localsplus[0] = PyStackRef_DUP(self[0]);
4004+
shim->localsplus[1] = PyStackRef_DUP(self[0]);
39994005
_PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
40004006
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
40014007
DEAD(init);
@@ -4022,7 +4028,6 @@ dummy_func(
40224028
_PUSH_FRAME;
40234029

40244030
inst(EXIT_INIT_CHECK, (should_be_none -- )) {
4025-
assert(STACK_LEVEL() == 2);
40264031
if (!PyStackRef_IsNone(should_be_none)) {
40274032
PyErr_Format(PyExc_TypeError,
40284033
"__init__() should return None, not '%.200s'",

Python/ceval.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -151,18 +151,6 @@ dump_item(_PyStackRef item)
151151
printf("<nil>");
152152
return;
153153
}
154-
if (
155-
obj == Py_None
156-
|| PyBool_Check(obj)
157-
|| PyLong_CheckExact(obj)
158-
|| PyFloat_CheckExact(obj)
159-
|| PyUnicode_CheckExact(obj)
160-
) {
161-
if (PyObject_Print(obj, stdout, 0) == 0) {
162-
return;
163-
}
164-
PyErr_Clear();
165-
}
166154
// Don't call __repr__(), it might recurse into the interpreter.
167155
printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)obj);
168156
}
@@ -197,6 +185,7 @@ dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer)
197185

198186
static void
199187
lltrace_instruction(_PyInterpreterFrame *frame,
188+
_PyStackRef _tos,
200189
_PyStackRef *stack_pointer,
201190
_Py_CODEUNIT *next_instr,
202191
int opcode,
@@ -205,6 +194,9 @@ lltrace_instruction(_PyInterpreterFrame *frame,
205194
if (frame->owner >= FRAME_OWNED_BY_INTERPRETER) {
206195
return;
207196
}
197+
printf("_tos = ");
198+
dump_item(_tos);
199+
printf("; ");
208200
dump_stack(frame, stack_pointer);
209201
const char *opname = _PyOpcode_OpName[opcode];
210202
assert(opname != NULL);
@@ -274,6 +266,7 @@ maybe_lltrace_resume_frame(_PyInterpreterFrame *frame, PyObject *globals)
274266
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
275267
}
276268
}
269+
lltrace = 5;
277270
if (lltrace >= 5) {
278271
lltrace_resume_frame(frame);
279272
}
@@ -950,6 +943,13 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch)
950943
/* This setting is reversed below following _PyEval_EvalFrameDefault */
951944
#endif
952945

946+
#ifdef Py_GIL_DISABLED
947+
#define Py_FREE_THREADING 1
948+
#else
949+
#define Py_FREE_THREADING 0
950+
#endif
951+
952+
953953
#if Py_TAIL_CALL_INTERP
954954
#include "opcode_targets.h"
955955
#include "generated_cases.c.h"
@@ -986,7 +986,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
986986
* These are cached values from the frame and code object. */
987987
_Py_CODEUNIT *next_instr;
988988
_PyStackRef *stack_pointer;
989-
989+
#ifdef Py_DEBUG
990+
entry_frame.localsplus[0] = PyStackRef_NULL;
991+
#endif
990992
#if defined(Py_DEBUG) && !defined(Py_STACKREF_DEBUG)
991993
/* Set these to invalid but identifiable values for debugging. */
992994
entry_frame.f_funcobj = (_PyStackRef){.bits = 0xaaa0};
@@ -997,7 +999,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
997999
#endif
9981000
entry_frame.f_executable = PyStackRef_None;
9991001
entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
1000-
entry_frame.stackpointer = entry_frame.localsplus;
1002+
entry_frame.stackpointer = entry_frame.localsplus + 1;
10011003
entry_frame.owner = FRAME_OWNED_BY_INTERPRETER;
10021004
entry_frame.visited = 0;
10031005
entry_frame.return_offset = 0;
@@ -1044,7 +1046,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
10441046
_PyExecutorObject *current_executor = NULL;
10451047
const _PyUOpInstruction *next_uop = NULL;
10461048
#endif
1047-
1049+
_PyStackRef _tos;
10481050
#if Py_TAIL_CALL_INTERP
10491051
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, 0);
10501052
#else

Python/ceval_macros.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@
110110
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
111111
#ifdef Py_DEBUG
112112
#define PRE_DISPATCH_GOTO() if (frame->lltrace >= 5) { \
113-
lltrace_instruction(frame, stack_pointer, next_instr, opcode, oparg); }
113+
lltrace_instruction(frame, _tos, stack_pointer, next_instr, opcode, oparg); }
114114
#else
115115
#define PRE_DISPATCH_GOTO() ((void)0)
116116
#endif
@@ -238,7 +238,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
238238
#endif
239239

240240
#define WITHIN_STACK_BOUNDS() \
241-
(frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
241+
(frame->owner == FRAME_OWNED_BY_INTERPRETER || (STACK_LEVEL() >= -1 && STACK_LEVEL() <= STACK_SIZE()))
242242

243243
/* Data access macros */
244244
#define FRAME_CO_CONSTS (_PyFrame_GetCode(frame)->co_consts)

0 commit comments

Comments
 (0)