Skip to content

Commit 2d04e2e

Browse files
feat: compile collected basic blocks (python#62)
* fix: disable BB_TEST_ITER in stencil compiler * feat: compile collected basic blocks * fix: memleak * feat: compile trace exits too * nit: logging when JIT_DEBUG isn't set * nit: fix warning --------- Co-authored-by: Jules <[email protected]>
1 parent 22580dc commit 2d04e2e

File tree

14 files changed

+189
-21
lines changed

14 files changed

+189
-21
lines changed

Include/cpython/code.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ typedef struct _PyTier2BBMetadata {
116116
_Py_CODEUNIT *tier2_start;
117117
// Note, this is the first tier 1 instruction to execute AFTER the BB ends.
118118
_Py_CODEUNIT *tier1_end;
119+
// Tier 2.5 machine code function trampoline pointer
120+
void *machine_code;
119121
} _PyTier2BBMetadata;
120122

121123
// Bump allocator for basic blocks (overallocated)

Include/internal/pycore_code.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ typedef struct {
2020
} _PyBBBranchCache;
2121

2222
#define INLINE_CACHE_ENTRIES_BB_BRANCH CACHE_ENTRIES(_PyBBBranchCache)
23-
2423
#define INLINE_CACHE_ENTRIES_JUMP_BACKWARD CACHE_ENTRIES(_PyBBBranchCache)
2524

25+
2626
/* PEP 659
2727
* Specialization and quickening structs and helper functions
2828
*/
@@ -279,7 +279,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co);
279279
#define BB_TEST_IS_SUCCESSOR(frame) ((frame->bb_test) >> 4)
280280
#define BB_TEST_GET_N_REQUIRES_POP(bb_flag) ((bb_flag) & 0b1111)
281281

282-
extern _Py_CODEUNIT *_PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
282+
PyAPI_FUNC(_Py_CODEUNIT *) _PyCode_Tier2Warmup(struct _PyInterpreterFrame *,
283283
_Py_CODEUNIT *);
284284
extern _Py_CODEUNIT *_PyTier2_GenerateNextBB(
285285
struct _PyInterpreterFrame *frame,

Include/internal/pycore_jit.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ typedef enum {
66

77
typedef _PyJITReturnCode (*_PyJITFunction)(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, _Py_CODEUNIT *next_instr);
88

9-
PyAPI_FUNC(_PyJITFunction)_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace);
10-
PyAPI_FUNC(void)_PyJIT_Free(_PyJITFunction trace);
9+
PyAPI_FUNC(_PyJITFunction)_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets);
10+
PyAPI_FUNC(void)_PyJIT_Free(_PyJITFunction trace);

Include/internal/pycore_opcode.h

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/opcode.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/opcode.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,4 +532,7 @@ def pseudo_op(name, op, real_ops):
532532
# Storing an unboxed value, overwriting an unboxed local.
533533
'STORE_FAST_UNBOXED_UNBOXED',
534534
# The traditional STORE_FAST is storing a boxed value, overwriting a boxed local.
535+
536+
# Tier 2.5
537+
'EXIT_TRACE',
535538
]

Python/bytecodes.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,6 +3286,11 @@ dummy_func(
32863286
Py_UNREACHABLE();
32873287
}
32883288

3289+
// Special sentinel to indicate the end of a machine code trace
3290+
inst(EXIT_TRACE, (--)) {
3291+
Py_UNREACHABLE();
3292+
}
3293+
32893294
// Tier 2 instructions
32903295
// Type propagator assumes this doesn't affect type context
32913296
inst(BB_BRANCH, (unused/10 --)) {

Python/generated_cases.c.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/jit.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ copy_and_patch(unsigned char *memory, const Stencil *stencil, uintptr_t patches[
104104
// The world's smallest compiler?
105105
// Make sure to call _PyJIT_Free on the memory when you're done with it!
106106
_PyJITFunction
107-
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
107+
_PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace, int *jump_target_trace_offsets, int n_jump_targets)
108108
{
109+
assert(size > 0);
109110
if (!stencils_loaded) {
110111
stencils_loaded = 1;
111112
for (size_t i = 0; i < Py_ARRAY_LENGTH(stencils); i++) {
@@ -120,7 +121,8 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
120121
return NULL;
121122
}
122123
// First, loop over everything once to find the total compiled size:
123-
size_t nbytes = trampoline_stencil.nbytes;
124+
// size_t nbytes = trampoline_stencil.nbytes;
125+
size_t nbytes = 0;
124126
for (int i = 0; i < size; i++) {
125127
_Py_CODEUNIT *instruction = trace[i];
126128
const Stencil *stencil = &stencils[instruction->op.code];
@@ -135,14 +137,26 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
135137
}
136138
unsigned char *head = memory;
137139
uintptr_t patches[] = GET_PATCHES();
138-
// First, the trampoline:
139-
const Stencil *stencil = &trampoline_stencil;
140-
patches[HOLE_base] = (uintptr_t)head;
141-
patches[HOLE_continue] = (uintptr_t)head + stencil->nbytes;
142-
copy_and_patch(head, stencil, patches);
143-
head += stencil->nbytes;
140+
//// First, the trampoline:
141+
//const Stencil *stencil = &trampoline_stencil;
142+
//patches[HOLE_base] = (uintptr_t)head;
143+
//patches[HOLE_continue] = (uintptr_t)head + stencil->nbytes;
144+
//copy_and_patch(head, stencil, patches);
145+
//head += stencil->nbytes;
144146
// Then, all of the stencils:
147+
int seen_jump_targets = 0;
148+
// Allocate all the entry point (trampoline) stencils,
149+
unsigned char *entry_points = alloc(trampoline_stencil.nbytes * n_jump_targets);
145150
for (int i = 0; i < size; i++) {
151+
// For each jump target, create an entry trampoline.
152+
if (i == jump_target_trace_offsets[seen_jump_targets]) {
153+
seen_jump_targets++;
154+
const Stencil *trampoline = &trampoline_stencil;
155+
patches[HOLE_base] = (uintptr_t)entry_points;
156+
patches[HOLE_continue] = (uintptr_t)head;
157+
copy_and_patch(entry_points, trampoline, patches);
158+
entry_points += trampoline->nbytes;
159+
}
146160
_Py_CODEUNIT *instruction = trace[i];
147161
const Stencil *stencil = &stencils[instruction->op.code];
148162
patches[HOLE_base] = (uintptr_t)head;
@@ -156,5 +170,6 @@ _PyJIT_CompileTrace(int size, _Py_CODEUNIT **trace)
156170
};
157171
// Wow, done already?
158172
assert(memory + nbytes == head);
159-
return (_PyJITFunction)memory;
173+
assert(seen_jump_targets == n_jump_targets);
174+
return (_PyJITFunction)entry_points;
160175
}

Python/opcode_metadata.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
401401
return 0;
402402
case CACHE:
403403
return 0;
404+
case EXIT_TRACE:
405+
return 0;
404406
case BB_BRANCH:
405407
return 0;
406408
case BB_BRANCH_IF_FLAG_UNSET:
@@ -817,6 +819,8 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
817819
return 0;
818820
case CACHE:
819821
return 0;
822+
case EXIT_TRACE:
823+
return 0;
820824
case BB_BRANCH:
821825
return 0;
822826
case BB_BRANCH_IF_FLAG_UNSET:
@@ -1041,6 +1045,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
10411045
[SWAP] = { true, INSTR_FMT_IB },
10421046
[EXTENDED_ARG] = { true, INSTR_FMT_IB },
10431047
[CACHE] = { true, INSTR_FMT_IX },
1048+
[EXIT_TRACE] = { true, INSTR_FMT_IX },
10441049
[BB_BRANCH] = { true, INSTR_FMT_IBC000000000 },
10451050
[BB_BRANCH_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC000000000 },
10461051
[BB_JUMP_IF_FLAG_UNSET] = { true, INSTR_FMT_IBC000000000 },

0 commit comments

Comments
 (0)