gh-128563: Move GO_TO_INSTRUCTION and PREDICT to cases generator (GH-129115)

This commit is contained in:
Ken Jin 2025-01-22 09:22:25 +08:00 committed by GitHub
parent 767cf70844
commit 86c1a60d5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 47 additions and 59 deletions

View File

@ -412,7 +412,7 @@ def test_predictions(self):
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(OP1);
PREDICTED(OP1);
PREDICTED_OP1:;
_PyStackRef res;
res = Py_None;
stack_pointer[-1] = res;
@ -646,7 +646,7 @@ def test_macro_instruction(self):
frame->instr_ptr = next_instr;
next_instr += 6;
INSTRUCTION_STATS(OP);
PREDICTED(OP);
PREDICTED_OP:;
_Py_CODEUNIT* const this_instr = next_instr - 6;
(void)this_instr;
_PyStackRef left;

View File

@ -166,35 +166,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
#define JUMPBY(x) (next_instr += (x))
#define SKIP_OVER(x) (next_instr += (x))
/* OpCode prediction macros
Some opcodes tend to come in pairs thus making it possible to
predict the second code when the first is run. For example,
COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.
Verifying the prediction costs a single high-speed test of a register
variable against a constant. If the pairing was good, then the
processor's own internal branch predication has a high likelihood of
success, resulting in a nearly zero-overhead transition to the
next opcode. A successful prediction saves a trip through the eval-loop
including its unpredictable switch-case branch. Combined with the
processor's internal branch prediction, a successful PREDICT has the
effect of making the two opcodes run as if they were a single new opcode
with the bodies combined.
If collecting opcode statistics, your choices are to either keep the
predictions turned-on and interpret the results as if some opcodes
had been combined or turn-off predictions so that the opcode frequency
counter updates for both opcodes.
Opcode prediction is disabled with threaded code, since the latter allows
the CPU to record separate branch prediction information for each
opcode.
*/
#define PREDICT_ID(op) PRED_##op
#define PREDICTED(op) PREDICT_ID(op):
/* Stack manipulation macros */
@ -260,8 +231,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
GETLOCAL(i) = value; \
PyStackRef_XCLOSE(tmp); } while (0)
#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op)
#ifdef Py_STATS
#define UPDATE_MISS_STATS(INSTNAME) \
do { \
@ -281,7 +250,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
/* This is only a single jump on release builds! */ \
UPDATE_MISS_STATS((INSTNAME)); \
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
GO_TO_INSTRUCTION(INSTNAME); \
goto PREDICTED_##INSTNAME; \
}

View File

@ -13,7 +13,7 @@
frame->instr_ptr = next_instr;
next_instr += 6;
INSTRUCTION_STATS(BINARY_OP);
PREDICTED(BINARY_OP);
PREDICTED_BINARY_OP:;
_Py_CODEUNIT* const this_instr = next_instr - 6;
(void)this_instr;
_PyStackRef lhs;
@ -484,7 +484,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(BINARY_SUBSCR);
PREDICTED(BINARY_SUBSCR);
PREDICTED_BINARY_SUBSCR:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef container;
@ -931,7 +931,7 @@
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(CALL);
PREDICTED(CALL);
PREDICTED_CALL:;
_Py_CODEUNIT* const this_instr = next_instr - 4;
(void)this_instr;
_PyStackRef *callable;
@ -1707,7 +1707,7 @@
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(CALL_FUNCTION_EX);
PREDICTED(CALL_FUNCTION_EX);
PREDICTED_CALL_FUNCTION_EX:;
_Py_CODEUNIT* const this_instr = next_instr - 1;
(void)this_instr;
_PyStackRef func;
@ -1960,7 +1960,7 @@
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(CALL_KW);
PREDICTED(CALL_KW);
PREDICTED_CALL_KW:;
_Py_CODEUNIT* const this_instr = next_instr - 4;
(void)this_instr;
_PyStackRef *callable;
@ -3299,7 +3299,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(COMPARE_OP);
PREDICTED(COMPARE_OP);
PREDICTED_COMPARE_OP:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef left;
@ -3479,7 +3479,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(CONTAINS_OP);
PREDICTED(CONTAINS_OP);
PREDICTED_CONTAINS_OP:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef left;
@ -4000,7 +4000,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(FOR_ITER);
PREDICTED(FOR_ITER);
PREDICTED_FOR_ITER:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef iter;
@ -4631,7 +4631,8 @@
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(INSTRUMENTED_CALL_FUNCTION_EX);
GO_TO_INSTRUCTION(CALL_FUNCTION_EX);
goto PREDICTED_CALL_FUNCTION_EX;
}
TARGET(INSTRUMENTED_CALL_KW) {
@ -4655,7 +4656,7 @@
stack_pointer = _PyFrame_GetStackPointer(frame);
if (err) goto error;
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(CALL_KW);
goto PREDICTED_CALL_KW;
}
TARGET(INSTRUMENTED_END_FOR) {
@ -4846,7 +4847,7 @@
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_ATTR);
goto PREDICTED_LOAD_SUPER_ATTR;
}
TARGET(INSTRUMENTED_LOAD_SUPER_METHOD) {
@ -4858,7 +4859,7 @@
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
GO_TO_INSTRUCTION(LOAD_SUPER_METHOD);
goto PREDICTED_LOAD_SUPER_METHOD;
}
TARGET(INSTRUMENTED_NOT_TAKEN) {
@ -5309,7 +5310,7 @@
frame->instr_ptr = next_instr;
next_instr += 10;
INSTRUCTION_STATS(LOAD_ATTR);
PREDICTED(LOAD_ATTR);
PREDICTED_LOAD_ATTR:;
_Py_CODEUNIT* const this_instr = next_instr - 10;
(void)this_instr;
_PyStackRef owner;
@ -5826,7 +5827,7 @@
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(LOAD_CONST);
PREDICTED(LOAD_CONST);
PREDICTED_LOAD_CONST:;
_Py_CODEUNIT* const this_instr = next_instr - 1;
(void)this_instr;
_PyStackRef value;
@ -6069,7 +6070,7 @@
frame->instr_ptr = next_instr;
next_instr += 5;
INSTRUCTION_STATS(LOAD_GLOBAL);
PREDICTED(LOAD_GLOBAL);
PREDICTED_LOAD_GLOBAL:;
_Py_CODEUNIT* const this_instr = next_instr - 5;
(void)this_instr;
_PyStackRef *res;
@ -6219,7 +6220,7 @@
frame->instr_ptr = next_instr;
next_instr += 10;
INSTRUCTION_STATS(LOAD_METHOD);
PREDICTED(LOAD_METHOD);
PREDICTED_LOAD_METHOD:;
_Py_CODEUNIT* const this_instr = next_instr - 10;
(void)this_instr;
_PyStackRef owner;
@ -6479,7 +6480,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(LOAD_SUPER_ATTR);
PREDICTED(LOAD_SUPER_ATTR);
PREDICTED_LOAD_SUPER_ATTR:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef global_super_st;
@ -6609,7 +6610,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(LOAD_SUPER_METHOD);
PREDICTED(LOAD_SUPER_METHOD);
PREDICTED_LOAD_SUPER_METHOD:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef global_super_st;
@ -7179,7 +7180,7 @@
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(RESUME);
PREDICTED(RESUME);
PREDICTED_RESUME:;
_Py_CODEUNIT* const this_instr = next_instr - 1;
(void)this_instr;
// _LOAD_BYTECODE
@ -7330,7 +7331,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(SEND);
PREDICTED(SEND);
PREDICTED_SEND:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef receiver;
@ -7571,7 +7572,7 @@
frame->instr_ptr = next_instr;
next_instr += 5;
INSTRUCTION_STATS(STORE_ATTR);
PREDICTED(STORE_ATTR);
PREDICTED_STORE_ATTR:;
_Py_CODEUNIT* const this_instr = next_instr - 5;
(void)this_instr;
_PyStackRef owner;
@ -7935,7 +7936,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(STORE_SUBSCR);
PREDICTED(STORE_SUBSCR);
PREDICTED_STORE_SUBSCR:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef container;
@ -8065,7 +8066,7 @@
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(TO_BOOL);
PREDICTED(TO_BOOL);
PREDICTED_TO_BOOL:;
_Py_CODEUNIT* const this_instr = next_instr - 4;
(void)this_instr;
_PyStackRef value;
@ -8301,7 +8302,7 @@
frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(UNPACK_SEQUENCE);
PREDICTED(UNPACK_SEQUENCE);
PREDICTED_UNPACK_SEQUENCE:;
_Py_CODEUNIT* const this_instr = next_instr - 2;
(void)this_instr;
_PyStackRef seq;

View File

@ -127,6 +127,7 @@ def __init__(self, out: CWriter):
"DISPATCH": self.dispatch,
"INSTRUCTION_SIZE": self.instruction_size,
"POP_INPUT": self.pop_input,
"GO_TO_INSTRUCTION": self.go_to_instruction,
}
self.out = out
@ -402,6 +403,23 @@ def sync_sp(
self._print_storage(storage)
return True
def go_to_instruction(
self,
tkn: Token,
tkn_iter: TokenIterator,
uop: Uop,
storage: Storage,
inst: Instruction | None,
) -> bool:
next(tkn_iter)
name = next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
assert name.kind == "IDENTIFIER"
self.emit("\n")
self.emit(f"goto PREDICTED_{name.text};\n")
return True
def emit_save(self, storage: Storage) -> None:
storage.save(self.out)
self._print_storage(storage)

View File

@ -158,7 +158,7 @@ def generate_tier1(
out.emit(f"next_instr += {inst.size};\n")
out.emit(f"INSTRUCTION_STATS({name});\n")
if inst.is_target:
out.emit(f"PREDICTED({name});\n")
out.emit(f"PREDICTED_{name}:;\n")
if needs_this:
out.emit(f"_Py_CODEUNIT* const this_instr = next_instr - {inst.size};\n")
out.emit(unused_guard)