Mailing List Archive

GH-115419: Tidy up tier 2 optimizer. Merge peephole pass into main pass (GH-117997)
https://github.com/python/cpython/commit/e32f6e9e4b202ce8759b201f7eafe4982c0eaa41
commit: e32f6e9e4b202ce8759b201f7eafe4982c0eaa41
branch: main
author: Mark Shannon <mark@hotpy.org>
committer: markshannon <mark@hotpy.org>
date: 2024-04-18T11:09:30+01:00
summary:

GH-115419: Tidy up tier 2 optimizer. Merge peephole pass into main pass (GH-117997)

files:
M Python/optimizer_analysis.c
M Python/optimizer_bytecodes.c
M Python/optimizer_cases.c.h

diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index a21679f366a74e..90444e33568b9d 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -362,6 +362,30 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
}
}

+/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
+ * PyCodeObject *. Retrieve the code object if possible.
+ */
+static PyCodeObject *
+get_code(_PyUOpInstruction *op)
+{
+ assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
+ PyCodeObject *co = NULL;
+ uint64_t operand = op->operand;
+ if (operand == 0) {
+ return NULL;
+ }
+ if (operand & 1) {
+ co = (PyCodeObject *)(operand & ~1);
+ }
+ else {
+ PyFunctionObject *func = (PyFunctionObject *)operand;
+ assert(PyFunction_Check(func));
+ co = (PyCodeObject *)func->func_code;
+ }
+ assert(PyCode_Check(co));
+ return co;
+}
+
/* 1 for success, 0 for not ready, cannot error at the moment. */
static int
optimize_uops(
@@ -376,6 +400,10 @@ optimize_uops(
_Py_UOpsContext context;
_Py_UOpsContext *ctx = &context;
uint32_t opcode = UINT16_MAX;
+ int curr_space = 0;
+ int max_space = 0;
+ _PyUOpInstruction *first_valid_check_stack = NULL;
+ _PyUOpInstruction *corresponding_check_stack = NULL;

if (_Py_uop_abstractcontext_init(ctx) < 0) {
goto out_of_space;
@@ -416,8 +444,7 @@ optimize_uops(
ctx->frame->stack_pointer = stack_pointer;
assert(STACK_LEVEL() >= 0);
}
- _Py_uop_abstractcontext_fini(ctx);
- return trace_len;
+ Py_UNREACHABLE();

out_of_space:
DPRINTF(3, "\n");
@@ -443,9 +470,17 @@ optimize_uops(
_Py_uop_abstractcontext_fini(ctx);
return 0;
done:
- /* Cannot optimize further, but there would be no benefit
- * in retrying later */
+ /* Either reached the end or cannot optimize further, but there
+ * would be no benefit in retrying later */
_Py_uop_abstractcontext_fini(ctx);
+ if (first_valid_check_stack != NULL) {
+ assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
+ assert(max_space > 0);
+ assert(max_space <= INT_MAX);
+ assert(max_space <= INT32_MAX);
+ first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
+ first_valid_check_stack->operand = max_space;
+ }
return trace_len;
}

@@ -532,124 +567,6 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
Py_UNREACHABLE();
}

-/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a
- * PyCodeObject *. Retrieve the code object if possible.
- */
-static PyCodeObject *
-get_co(_PyUOpInstruction *op)
-{
- assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME);
- PyCodeObject *co = NULL;
- uint64_t operand = op->operand;
- if (operand == 0) {
- return NULL;
- }
- if (operand & 1) {
- co = (PyCodeObject *)(operand & ~1);
- }
- else {
- PyFunctionObject *func = (PyFunctionObject *)operand;
- assert(PyFunction_Check(func));
- co = (PyCodeObject *)func->func_code;
- }
- assert(PyCode_Check(co));
- return co;
-}
-
-static void
-peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
-{
- PyCodeObject *co = _PyFrame_GetCode(frame);
- int curr_space = 0;
- int max_space = 0;
- _PyUOpInstruction *first_valid_check_stack = NULL;
- _PyUOpInstruction *corresponding_check_stack = NULL;
- for (int pc = 0; pc < buffer_size; pc++) {
- int opcode = buffer[pc].opcode;
- switch(opcode) {
- case _LOAD_CONST: {
- assert(co != NULL);
- PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg);
- buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
- buffer[pc].operand = (uintptr_t)val;
- break;
- }
- case _CHECK_PEP_523: {
- /* Setting the eval frame function invalidates
- * all executors, so no need to check dynamically */
- if (_PyInterpreterState_GET()->eval_frame == NULL) {
- buffer[pc].opcode = _NOP;
- }
- break;
- }
- case _CHECK_STACK_SPACE: {
- assert(corresponding_check_stack == NULL);
- corresponding_check_stack = &buffer[pc];
- break;
- }
- case _PUSH_FRAME: {
- assert(corresponding_check_stack != NULL);
- co = get_co(&buffer[pc]);
- if (co == NULL) {
- // should be about to _EXIT_TRACE anyway
- goto finish;
- }
- int framesize = co->co_framesize;
- assert(framesize > 0);
- curr_space += framesize;
- if (curr_space < 0 || curr_space > INT32_MAX) {
- // won't fit in signed 32-bit int
- goto finish;
- }
- max_space = curr_space > max_space ? curr_space : max_space;
- if (first_valid_check_stack == NULL) {
- first_valid_check_stack = corresponding_check_stack;
- }
- else {
- // delete all but the first valid _CHECK_STACK_SPACE
- corresponding_check_stack->opcode = _NOP;
- }
- corresponding_check_stack = NULL;
- break;
- }
- case _POP_FRAME: {
- assert(corresponding_check_stack == NULL);
- assert(co != NULL);
- int framesize = co->co_framesize;
- assert(framesize > 0);
- assert(framesize <= curr_space);
- curr_space -= framesize;
- co = get_co(&buffer[pc]);
- if (co == NULL) {
- // might be impossible, but bailing is still safe
- goto finish;
- }
- break;
- }
- case _JUMP_TO_TOP:
- case _EXIT_TRACE:
- goto finish;
-#ifdef Py_DEBUG
- case _CHECK_STACK_SPACE_OPERAND: {
- /* We should never see _CHECK_STACK_SPACE_OPERANDs.
- * They are only created at the end of this pass. */
- Py_UNREACHABLE();
- }
-#endif
- }
- }
- Py_UNREACHABLE();
-finish:
- if (first_valid_check_stack != NULL) {
- assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE);
- assert(max_space > 0);
- assert(max_space <= INT_MAX);
- assert(max_space <= INT32_MAX);
- first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND;
- first_valid_check_stack->operand = max_space;
- }
-}
-
// 0 - failure, no error raised, just fall back to Tier 1
// -1 - failure, and raise error
// > 0 - length of optimized trace
@@ -669,8 +586,6 @@ _Py_uop_analyze_and_optimize(
return err;
}

- peephole_opt(frame, buffer, length);
-
length = optimize_uops(
_PyFrame_GetCode(frame), buffer,
length, curr_stacklen, dependencies);
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
index e38428af108893..f119b8e20719fa 100644
--- a/Python/optimizer_bytecodes.c
+++ b/Python/optimizer_bytecodes.c
@@ -38,12 +38,14 @@ optimize_to_bool(
_Py_UopsSymbol **result_ptr);

extern void
-eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
+eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit);
+
+extern PyCodeObject *get_code(_PyUOpInstruction *op);

static int
dummy_func(void) {

- PyCodeObject *code;
+ PyCodeObject *co;
int oparg;
_Py_UopsSymbol *flag;
_Py_UopsSymbol *left;
@@ -54,10 +56,15 @@ dummy_func(void) {
_Py_UopsSymbol *top;
_Py_UopsSymbol *bottom;
_Py_UOpsAbstractFrame *frame;
+ _Py_UOpsAbstractFrame *new_frame;
_Py_UOpsContext *ctx;
_PyUOpInstruction *this_instr;
_PyBloomFilter *dependencies;
int modified;
+ int curr_space;
+ int max_space;
+ _PyUOpInstruction *first_valid_check_stack;
+ _PyUOpInstruction *corresponding_check_stack;

// BEGIN BYTECODES //

@@ -393,9 +400,10 @@ dummy_func(void) {
}

op(_LOAD_CONST, (-- value)) {
- // There should be no LOAD_CONST. It should be all
- // replaced by peephole_opt.
- Py_UNREACHABLE();
+ PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg);
+ int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
+ REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val);
+ OUT_OF_SPACE_IF_NULL(value = sym_new_const(ctx, val));
}

op(_LOAD_CONST_INLINE, (ptr/4 -- value)) {
@@ -590,6 +598,32 @@ dummy_func(void) {
frame_pop(ctx);
stack_pointer = ctx->frame->stack_pointer;
res = retval;
+
+ /* Stack space handling */
+ assert(corresponding_check_stack == NULL);
+ assert(co != NULL);
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ assert(framesize <= curr_space);
+ curr_space -= framesize;
+
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // might be impossible, but bailing is still safe
+ goto done;
+ }
+ }
+
+ op(_CHECK_STACK_SPACE, ( --)) {
+ assert(corresponding_check_stack == NULL);
+ corresponding_check_stack = this_instr;
+ }
+
+ op (_CHECK_STACK_SPACE_OPERAND, ( -- )) {
+ (void)framesize;
+ /* We should never see _CHECK_STACK_SPACE_OPERANDs.
+ * They are only created at the end of this pass. */
+ Py_UNREACHABLE();
}

op(_PUSH_FRAME, (new_frame: _Py_UOpsAbstractFrame * -- unused if (0))) {
@@ -598,6 +632,29 @@ dummy_func(void) {
ctx->frame = new_frame;
ctx->curr_frame_depth++;
stack_pointer = new_frame->stack_pointer;
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // should be about to _EXIT_TRACE anyway
+ goto done;
+ }
+
+ /* Stack space handling */
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ curr_space += framesize;
+ if (curr_space < 0 || curr_space > INT32_MAX) {
+ // won't fit in signed 32-bit int
+ goto done;
+ }
+ max_space = curr_space > max_space ? curr_space : max_space;
+ if (first_valid_check_stack == NULL) {
+ first_valid_check_stack = corresponding_check_stack;
+ }
+ else {
+ // delete all but the first valid _CHECK_STACK_SPACE
+ corresponding_check_stack->opcode = _NOP;
+ }
+ corresponding_check_stack = NULL;
}

op(_UNPACK_SEQUENCE, (seq -- values[oparg])) {
@@ -662,6 +719,22 @@ dummy_func(void) {
}
}

+ op(_CHECK_PEP_523, (--)) {
+ /* Setting the eval frame function invalidates
+ * all executors, so no need to check dynamically */
+ if (_PyInterpreterState_GET()->eval_frame == NULL) {
+ REPLACE_OP(this_instr, _NOP, 0 ,0);
+ }
+ }
+
+ op(_JUMP_TO_TOP, (--)) {
+ goto done;
+ }
+
+ op(_EXIT_TRACE, (--)) {
+ goto done;
+ }
+

// END BYTECODES //

diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 209be370c4aa38..50f335e0c8a0a2 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -46,9 +46,10 @@

case _LOAD_CONST: {
_Py_UopsSymbol *value;
- // There should be no LOAD_CONST. It should be all
- // replaced by peephole_opt.
- Py_UNREACHABLE();
+ PyObject *val = PyTuple_GET_ITEM(co->co_consts, this_instr->oparg);
+ int opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE;
+ REPLACE_OP(this_instr, opcode, 0, (uintptr_t)val);
+ OUT_OF_SPACE_IF_NULL(value = sym_new_const(ctx, val));
stack_pointer[0] = value;
stack_pointer += 1;
break;
@@ -597,6 +598,18 @@
frame_pop(ctx);
stack_pointer = ctx->frame->stack_pointer;
res = retval;
+ /* Stack space handling */
+ assert(corresponding_check_stack == NULL);
+ assert(co != NULL);
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ assert(framesize <= curr_space);
+ curr_space -= framesize;
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // might be impossible, but bailing is still safe
+ goto done;
+ }
stack_pointer[0] = res;
stack_pointer += 1;
break;
@@ -1529,6 +1542,11 @@
}

case _CHECK_PEP_523: {
+ /* Setting the eval frame function invalidates
+ * all executors, so no need to check dynamically */
+ if (_PyInterpreterState_GET()->eval_frame == NULL) {
+ REPLACE_OP(this_instr, _NOP, 0 ,0);
+ }
break;
}

@@ -1547,6 +1565,8 @@
}

case _CHECK_STACK_SPACE: {
+ assert(corresponding_check_stack == NULL);
+ corresponding_check_stack = this_instr;
break;
}

@@ -1610,6 +1630,28 @@
ctx->frame = new_frame;
ctx->curr_frame_depth++;
stack_pointer = new_frame->stack_pointer;
+ co = get_code(this_instr);
+ if (co == NULL) {
+ // should be about to _EXIT_TRACE anyway
+ goto done;
+ }
+ /* Stack space handling */
+ int framesize = co->co_framesize;
+ assert(framesize > 0);
+ curr_space += framesize;
+ if (curr_space < 0 || curr_space > INT32_MAX) {
+ // won't fit in signed 32-bit int
+ goto done;
+ }
+ max_space = curr_space > max_space ? curr_space : max_space;
+ if (first_valid_check_stack == NULL) {
+ first_valid_check_stack = corresponding_check_stack;
+ }
+ else {
+ // delete all but the first valid _CHECK_STACK_SPACE
+ corresponding_check_stack->opcode = _NOP;
+ }
+ corresponding_check_stack = NULL;
break;
}

@@ -1899,6 +1941,7 @@
}

case _JUMP_TO_TOP: {
+ goto done;
break;
}

@@ -1907,6 +1950,11 @@
}

case _CHECK_STACK_SPACE_OPERAND: {
+ uint32_t framesize = (uint32_t)this_instr->operand;
+ (void)framesize;
+ /* We should never see _CHECK_STACK_SPACE_OPERANDs.
+ * They are only created at the end of this pass. */
+ Py_UNREACHABLE();
break;
}

@@ -1915,6 +1963,7 @@
}

case _EXIT_TRACE: {
+ goto done;
break;
}


_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com