Mailing List Archive

GH-118095: Handle `RETURN_GENERATOR` in tier 2 (GH-118180)
https://github.com/python/cpython/commit/f180b31e7629d36265fa36f1560365358b4fd47c
commit: f180b31e7629d36265fa36f1560365358b4fd47c
branch: main
author: Mark Shannon <mark@hotpy.org>
committer: markshannon <mark@hotpy.org>
date: 2024-04-25T11:32:47+01:00
summary:

GH-118095: Handle `RETURN_GENERATOR` in tier 2 (GH-118180)

files:
M Include/internal/pycore_ceval.h
M Include/internal/pycore_frame.h
M Include/internal/pycore_opcode_metadata.h
M Include/internal/pycore_uop_ids.h
M Include/internal/pycore_uop_metadata.h
M Lib/test/test_capi/test_opt.py
M Objects/frameobject.c
M Python/bytecodes.c
M Python/ceval_macros.h
M Python/executor_cases.c.h
M Python/frame.c
M Python/generated_cases.c.h
M Python/optimizer.c
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h

diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 946f82ae3c20e3..8d88b5c1d15cb8 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -182,7 +182,7 @@ static inline void _Py_LeaveRecursiveCall(void) {

extern struct _PyInterpreterFrame* _PyEval_GetFrame(void);

-extern PyObject* _Py_MakeCoro(PyFunctionObject *func);
+PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func);

/* Handle signals, pending calls, GIL drop request
and asynchronous exception */
diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h
index 74d9e4cac72c0e..f913928f38bd05 100644
--- a/Include/internal/pycore_frame.h
+++ b/Include/internal/pycore_frame.h
@@ -110,7 +110,17 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code)
return code->co_framesize - FRAME_SPECIALS_SIZE;
}

-void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest);
+static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
+{
+ assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus);
+ *dest = *src;
+ for (int i = 1; i < src->stacktop; i++) {
+ dest->localsplus[i] = src->localsplus[i];
+ }
+ // Don't leave a dangling pointer to the old frame when creating generators
+ // and coroutines:
+ dest->previous = NULL;
+}

/* Consumes reference to func and locals.
Does not initialize frame->previous, which happens
@@ -256,7 +266,7 @@ _PyThreadState_HasStackSpace(PyThreadState *tstate, int size)
extern _PyInterpreterFrame *
_PyThreadState_PushFrame(PyThreadState *tstate, size_t size);

-void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame);
+PyAPI_FUNC(void) _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame);

/* Pushes a frame without checking for space.
* Must be guarded by _PyThreadState_HasStackSpace()
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index 5636debbf4a7f2..400d7c334db8e7 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -805,7 +805,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) {
case RETURN_CONST:
return 0;
case RETURN_GENERATOR:
- return 0;
+ return 1;
case RETURN_VALUE:
return 0;
case SEND:
@@ -1310,6 +1310,7 @@ _PyOpcode_macro_expansion[256] = {
[PUSH_NULL] = { .nuops = 1, .uops = { { _PUSH_NULL, 0, 0 } } },
[RESUME_CHECK] = { .nuops = 1, .uops = { { _RESUME_CHECK, 0, 0 } } },
[RETURN_CONST] = { .nuops = 2, .uops = { { _LOAD_CONST, 0, 0 }, { _POP_FRAME, 0, 0 } } },
+ [RETURN_GENERATOR] = { .nuops = 1, .uops = { { _RETURN_GENERATOR, 0, 0 } } },
[RETURN_VALUE] = { .nuops = 1, .uops = { { _POP_FRAME, 0, 0 } } },
[SETUP_ANNOTATIONS] = { .nuops = 1, .uops = { { _SETUP_ANNOTATIONS, 0, 0 } } },
[SET_ADD] = { .nuops = 1, .uops = { { _SET_ADD, 0, 0 } } },
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
index f0558743b32f5e..bb49d6e77d2562 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -231,6 +231,7 @@ extern "C" {
#define _PUSH_NULL PUSH_NULL
#define _REPLACE_WITH_TRUE 424
#define _RESUME_CHECK RESUME_CHECK
+#define _RETURN_GENERATOR RETURN_GENERATOR
#define _SAVE_RETURN_OFFSET 425
#define _SEND 426
#define _SEND_GEN SEND_GEN
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index 2da4c4d4e21e93..b8cdfae8391460 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -219,6 +219,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
[_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG,
+ [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG,
[_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG,
[_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG,
[_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
@@ -445,6 +446,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_PUSH_NULL] = "_PUSH_NULL",
[_REPLACE_WITH_TRUE] = "_REPLACE_WITH_TRUE",
[_RESUME_CHECK] = "_RESUME_CHECK",
+ [_RETURN_GENERATOR] = "_RETURN_GENERATOR",
[_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET",
[_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS",
[_SET_ADD] = "_SET_ADD",
@@ -894,6 +896,8 @@ int _PyUop_num_popped(int opcode, int oparg)
return 1;
case _SET_FUNCTION_ATTRIBUTE:
return 2;
+ case _RETURN_GENERATOR:
+ return 0;
case _BUILD_SLICE:
return 2 + ((oparg == 3) ? 1 : 0);
case _CONVERT_VALUE:
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index c004f463770019..e2e772a52d764e 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -1286,5 +1286,17 @@ def testfunc(n):
self.assertEqual(res, 32 * 32)
self.assertIsNone(ex)

+ def test_return_generator(self):
+ def gen():
+ yield None
+ def testfunc(n):
+ for i in range(n):
+ gen()
+ return i
+ res, ex = self._run_with_optimizer(testfunc, 20)
+ self.assertEqual(res, 19)
+ self.assertIsNotNone(ex)
+ self.assertIn("_RETURN_GENERATOR", get_opnames(ex))
+
if __name__ == "__main__":
unittest.main()
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index d55c246d80dd6a..07b7ef3df46a5c 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -304,11 +304,6 @@ mark_stacks(PyCodeObject *code_obj, int len)
stacks[i] = UNINITIALIZED;
}
stacks[0] = EMPTY_STACK;
- if (code_obj->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR))
- {
- // Generators get sent None while starting:
- stacks[0] = push_value(stacks[0], Object);
- }
int todo = 1;
while (todo) {
todo = 0;
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index c31617d35b02f5..485504914912f9 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -837,12 +837,7 @@ dummy_func(
_PyFrame_StackPush(frame, retval);
LOAD_SP();
LOAD_IP(frame->return_offset);
-#if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
-#endif
+ LLTRACE_RESUME_FRAME();
}

macro(RETURN_VALUE) =
@@ -3186,12 +3181,7 @@ dummy_func(
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
-#if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
-#endif
+ LLTRACE_RESUME_FRAME();
}

macro(CALL_BOUND_METHOD_EXACT_ARGS) =
@@ -3877,7 +3867,7 @@ dummy_func(
}
}

- tier1 inst(RETURN_GENERATOR, (--)) {
+ inst(RETURN_GENERATOR, (-- res)) {
assert(PyFunction_Check(frame->f_funcobj));
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
@@ -3887,19 +3877,19 @@ dummy_func(
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->instr_ptr = next_instr;
+ frame->instr_ptr++;
_PyFrame_Copy(frame, gen_frame);
assert(frame->frame_obj == NULL);
gen->gi_frame_state = FRAME_CREATED;
gen_frame->owner = FRAME_OWNED_BY_GENERATOR;
_Py_LeaveRecursiveCallPy(tstate);
- assert(frame != &entry_frame);
+ res = (PyObject *)gen;
_PyInterpreterFrame *prev = frame->previous;
_PyThreadState_PopFrame(tstate, frame);
frame = tstate->current_frame = prev;
- _PyFrame_StackPush(frame, (PyObject *)gen);
LOAD_IP(frame->return_offset);
- goto resume_frame;
+ LOAD_SP();
+ LLTRACE_RESUME_FRAME();
}

inst(BUILD_SLICE, (start, stop, step if (oparg == 3) -- slice)) {
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index 224cd1da7d4a0e..871d1747e2bb8d 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -86,6 +86,18 @@
#define PRE_DISPATCH_GOTO() ((void)0)
#endif

+#if LLTRACE
+#define LLTRACE_RESUME_FRAME() \
+do { \
+ lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); \
+ if (lltrace < 0) { \
+ goto exit_unwind; \
+ } \
+} while (0)
+#else
+#define LLTRACE_RESUME_FRAME() ((void)0)
+#endif
+
#ifdef Py_GIL_DISABLED
#define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr)
#else
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 7403d6fdaf0e2b..1eb3da9b70002c 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -988,12 +988,7 @@
_PyFrame_StackPush(frame, retval);
LOAD_SP();
LOAD_IP(frame->return_offset);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
break;
}

@@ -3213,12 +3208,7 @@
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
break;
}

@@ -3833,6 +3823,35 @@
break;
}

+ case _RETURN_GENERATOR: {
+ PyObject *res;
+ assert(PyFunction_Check(frame->f_funcobj));
+ PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
+ PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
+ if (gen == NULL) {
+ JUMP_TO_ERROR();
+ }
+ assert(EMPTY());
+ _PyFrame_SetStackPointer(frame, stack_pointer);
+ _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
+ frame->instr_ptr++;
+ _PyFrame_Copy(frame, gen_frame);
+ assert(frame->frame_obj == NULL);
+ gen->gi_frame_state = FRAME_CREATED;
+ gen_frame->owner = FRAME_OWNED_BY_GENERATOR;
+ _Py_LeaveRecursiveCallPy(tstate);
+ res = (PyObject *)gen;
+ _PyInterpreterFrame *prev = frame->previous;
+ _PyThreadState_PopFrame(tstate, frame);
+ frame = tstate->current_frame = prev;
+ LOAD_IP(frame->return_offset);
+ LOAD_SP();
+ LLTRACE_RESUME_FRAME();
+ stack_pointer[0] = res;
+ stack_pointer += 1;
+ break;
+ }
+
case _BUILD_SLICE: {
PyObject *step = NULL;
PyObject *stop;
diff --git a/Python/frame.c b/Python/frame.c
index f88a8f0d73d3f8..db9d13359a23ca 100644
--- a/Python/frame.c
+++ b/Python/frame.c
@@ -53,18 +53,6 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
return f;
}

-void
-_PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest)
-{
- assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus);
- Py_ssize_t size = ((char*)&src->localsplus[src->stacktop]) - (char *)src;
- memcpy(dest, src, size);
- // Don't leave a dangling pointer to the old frame when creating generators
- // and coroutines:
- dest->previous = NULL;
-}
-
-
static void
take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
{
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 058cac8bedd917..0c58f3f87d4041 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -997,12 +997,7 @@
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
@@ -1786,12 +1781,7 @@
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
@@ -4992,12 +4982,7 @@
_PyFrame_StackPush(frame, retval);
LOAD_SP();
LOAD_IP(frame->return_offset);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
@@ -5006,6 +4991,7 @@
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(RETURN_GENERATOR);
+ PyObject *res;
assert(PyFunction_Check(frame->f_funcobj));
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
@@ -5015,19 +5001,22 @@
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe;
- frame->instr_ptr = next_instr;
+ frame->instr_ptr++;
_PyFrame_Copy(frame, gen_frame);
assert(frame->frame_obj == NULL);
gen->gi_frame_state = FRAME_CREATED;
gen_frame->owner = FRAME_OWNED_BY_GENERATOR;
_Py_LeaveRecursiveCallPy(tstate);
- assert(frame != &entry_frame);
+ res = (PyObject *)gen;
_PyInterpreterFrame *prev = frame->previous;
_PyThreadState_PopFrame(tstate, frame);
frame = tstate->current_frame = prev;
- _PyFrame_StackPush(frame, (PyObject *)gen);
LOAD_IP(frame->return_offset);
- goto resume_frame;
+ LOAD_SP();
+ LLTRACE_RESUME_FRAME();
+ stack_pointer[0] = res;
+ stack_pointer += 1;
+ DISPATCH();
}

TARGET(RETURN_VALUE) {
@@ -5050,12 +5039,7 @@
_PyFrame_StackPush(frame, retval);
LOAD_SP();
LOAD_IP(frame->return_offset);
- #if LLTRACE && TIER_ONE
- lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS());
- if (lltrace < 0) {
- goto exit_unwind;
- }
- #endif
+ LLTRACE_RESUME_FRAME();
DISPATCH();
}

diff --git a/Python/optimizer.c b/Python/optimizer.c
index b17c2998e2504b..e5c70f72f9c324 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -697,7 +697,8 @@ translate_bytecode_to_trace(
// Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
int nuops = expansion->nuops;
RESERVE(nuops + 1); /* One extra for exit */
- if (expansion->uops[nuops-1].uop == _POP_FRAME) {
+ int16_t last_op = expansion->uops[nuops-1].uop;
+ if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR) {
// Check for trace stack underflow now:
// We can't bail e.g. in the middle of
// LOAD_CONST + _POP_FRAME.
@@ -756,7 +757,7 @@ translate_bytecode_to_trace(
Py_FatalError("garbled expansion");
}

- if (uop == _POP_FRAME) {
+ if (uop == _POP_FRAME || uop == _RETURN_GENERATOR) {
TRACE_STACK_POP();
/* Set the operand to the function or code object returned to,
* to assist optimization passes. (See _PUSH_FRAME below.)
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index a76edd62c94c13..9315d7228b5732 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -369,7 +369,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
static PyCodeObject *
get_code(_PyUOpInstruction *op)
{
- assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
+ assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR);
PyCodeObject *co = NULL;
uint64_t operand = op->operand;
if (operand == 0) {
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index 481fb8387af416..8bc56342774790 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -651,6 +651,28 @@ dummy_func(void) {
}
}

+ op(_RETURN_GENERATOR, ( -- res)) {
+ SYNC_SP();
+ ctx->frame->stack_pointer = stack_pointer;
+ frame_pop(ctx);
+ stack_pointer = ctx->frame->stack_pointer;
+ OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx));
+
+ /* Stack space handling */
+ assert(corresponding_check_stack == NULL);
+ assert(co != NULL);
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ assert(framesize <= curr_space);
+ curr_space -= framesize;
+
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // might be impossible, but bailing is still safe
+ goto done;
+ }
+ }
+
op(_CHECK_STACK_SPACE, ( --)) {
assert(corresponding_check_stack == NULL);
corresponding_check_stack = this_instr;
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 0a7d96d30ad3e8..4f0941a3cc3e09 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1840,6 +1840,29 @@
break;
}

+ case _RETURN_GENERATOR: {
+ _Py_UopsSymbol *res;
+ ctx->frame->stack_pointer = stack_pointer;
+ frame_pop(ctx);
+ stack_pointer = ctx->frame->stack_pointer;
+ OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx));
+ /* Stack space handling */
+ assert(corresponding_check_stack == NULL);
+ assert(co != NULL);
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ assert(framesize <= curr_space);
+ curr_space -= framesize;
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // might be impossible, but bailing is still safe
+ goto done;
+ }
+ stack_pointer[0] = res;
+ stack_pointer += 1;
+ break;
+ }
+
case _BUILD_SLICE: {
_Py_UopsSymbol *slice;
slice = sym_new_not_null(ctx);

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com