Skip to content

Commit 90d34f0

Browse files
committed
specialize: optimize for single-threaded programs
1 parent 7e75686 commit 90d34f0

File tree

4 files changed

+135
-107
lines changed

4 files changed

+135
-107
lines changed

Python/bytecodes.c

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -342,18 +342,16 @@ dummy_func(
342342
};
343343

344344
inst(BINARY_SUBSCR, (unused/4, container, sub -- unused)) {
345-
_PyMutex_lock(&_PyRuntime.mutex);
346345
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
347-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
346+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
347+
_PyMutex_lock(&_PyRuntime.mutex);
348348
assert(cframe.use_tracing == 0);
349349
next_instr--;
350350
_Py_Specialize_BinarySubscr(container, sub, next_instr);
351351
_PyMutex_unlock(&_PyRuntime.mutex);
352352
DISPATCH_SAME_OPARG();
353353
}
354354
STAT_INC(BINARY_SUBSCR, deferred);
355-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
356-
_PyMutex_unlock(&_PyRuntime.mutex);
357355
GO_TO_INSTRUCTION(BINARY_SUBSCR_GENERIC);
358356
}
359357

@@ -490,18 +488,16 @@ dummy_func(
490488
};
491489

492490
inst(STORE_SUBSCR, (unused/1, unused, container, sub -- )) {
493-
_PyMutex_lock(&_PyRuntime.mutex);
494491
_PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr;
495-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
492+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
493+
_PyMutex_lock(&_PyRuntime.mutex);
496494
assert(cframe.use_tracing == 0);
497495
next_instr--;
498496
_Py_Specialize_StoreSubscr(container, sub, next_instr);
499497
_PyMutex_unlock(&_PyRuntime.mutex);
500498
DISPATCH_SAME_OPARG();
501499
}
502500
STAT_INC(STORE_SUBSCR, deferred);
503-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
504-
_PyMutex_unlock(&_PyRuntime.mutex);
505501
GO_TO_INSTRUCTION(STORE_SUBSCR_GENERIC);
506502
}
507503

@@ -928,9 +924,9 @@ dummy_func(
928924

929925
// stack effect: (__0 -- __array[oparg])
930926
inst(UNPACK_SEQUENCE) {
931-
_PyMutex_lock(&_PyRuntime.mutex);
932927
_PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr;
933-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
928+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
929+
_PyMutex_lock(&_PyRuntime.mutex);
934930
assert(cframe.use_tracing == 0);
935931
PyObject *seq = TOP();
936932
next_instr--;
@@ -939,8 +935,6 @@ dummy_func(
939935
DISPATCH_SAME_OPARG();
940936
}
941937
STAT_INC(UNPACK_SEQUENCE, deferred);
942-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
943-
_PyMutex_unlock(&_PyRuntime.mutex);
944938
GO_TO_INSTRUCTION(UNPACK_SEQUENCE_GENERIC);
945939
}
946940

@@ -1021,9 +1015,9 @@ dummy_func(
10211015
};
10221016

10231017
inst(STORE_ATTR, (unused/1, unused/3, unused, owner --)) {
1024-
_PyMutex_lock(&_PyRuntime.mutex);
10251018
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
1026-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
1019+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
1020+
_PyMutex_lock(&_PyRuntime.mutex);
10271021
assert(cframe.use_tracing == 0);
10281022
PyObject *name = GETITEM(names, oparg);
10291023
next_instr--;
@@ -1032,8 +1026,6 @@ dummy_func(
10321026
DISPATCH_SAME_OPARG();
10331027
}
10341028
STAT_INC(STORE_ATTR, deferred);
1035-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
1036-
_PyMutex_unlock(&_PyRuntime.mutex);
10371029
GO_TO_INSTRUCTION(STORE_ATTR_GENERIC);
10381030
}
10391031

@@ -1136,9 +1128,9 @@ dummy_func(
11361128

11371129
// error: LOAD_GLOBAL has irregular stack effect
11381130
inst(LOAD_GLOBAL) {
1139-
_PyMutex_lock(&_PyRuntime.mutex);
11401131
_PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr;
1141-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
1132+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
1133+
_PyMutex_lock(&_PyRuntime.mutex);
11421134
assert(cframe.use_tracing == 0);
11431135
PyObject *name = GETITEM(names, oparg>>1);
11441136
next_instr--;
@@ -1147,8 +1139,6 @@ dummy_func(
11471139
DISPATCH_SAME_OPARG();
11481140
}
11491141
STAT_INC(LOAD_GLOBAL, deferred);
1150-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
1151-
_PyMutex_unlock(&_PyRuntime.mutex);
11521142
GO_TO_INSTRUCTION(LOAD_GLOBAL_GENERIC);
11531143
}
11541144

@@ -1537,9 +1527,9 @@ dummy_func(
15371527

15381528
// error: LOAD_ATTR has irregular stack effect
15391529
inst(LOAD_ATTR) {
1540-
_PyMutex_lock(&_PyRuntime.mutex);
15411530
_PyAttrCache *cache = (_PyAttrCache *)next_instr;
1542-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
1531+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
1532+
_PyMutex_lock(&_PyRuntime.mutex);
15431533
assert(cframe.use_tracing == 0);
15441534
PyObject *owner = TOP();
15451535
PyObject *name = GETITEM(names, oparg>>1);
@@ -1549,8 +1539,6 @@ dummy_func(
15491539
DISPATCH_SAME_OPARG();
15501540
}
15511541
STAT_INC(LOAD_ATTR, deferred);
1552-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
1553-
_PyMutex_unlock(&_PyRuntime.mutex);
15541542
GO_TO_INSTRUCTION(LOAD_ATTR_GENERIC);
15551543
}
15561544

@@ -1885,18 +1873,16 @@ dummy_func(
18851873
};
18861874

18871875
inst(COMPARE_OP, (unused/2, left, right -- unused)) {
1888-
_PyMutex_lock(&_PyRuntime.mutex);
18891876
_PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr;
1890-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
1877+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
1878+
_PyMutex_lock(&_PyRuntime.mutex);
18911879
assert(cframe.use_tracing == 0);
18921880
next_instr--;
18931881
_Py_Specialize_CompareOp(left, right, next_instr, oparg);
18941882
_PyMutex_unlock(&_PyRuntime.mutex);
18951883
DISPATCH_SAME_OPARG();
18961884
}
18971885
STAT_INC(COMPARE_OP, deferred);
1898-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
1899-
_PyMutex_unlock(&_PyRuntime.mutex);
19001886
GO_TO_INSTRUCTION(COMPARE_OP_GENERIC);
19011887
}
19021888

@@ -2301,18 +2287,16 @@ dummy_func(
23012287

23022288
// stack effect: ( -- __0)
23032289
inst(FOR_ITER) {
2304-
_PyMutex_lock(&_PyRuntime.mutex);
23052290
_PyForIterCache *cache = (_PyForIterCache *)next_instr;
2306-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
2291+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
2292+
_PyMutex_lock(&_PyRuntime.mutex);
23072293
assert(cframe.use_tracing == 0);
23082294
next_instr--;
23092295
_Py_Specialize_ForIter(TOP(), next_instr, oparg);
23102296
_PyMutex_unlock(&_PyRuntime.mutex);
23112297
DISPATCH_SAME_OPARG();
23122298
}
23132299
STAT_INC(FOR_ITER, deferred);
2314-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
2315-
_PyMutex_unlock(&_PyRuntime.mutex);
23162300
GO_TO_INSTRUCTION(FOR_ITER_GENERIC);
23172301
}
23182302

@@ -2636,9 +2620,9 @@ dummy_func(
26362620

26372621
// stack effect: (__0, __array[oparg] -- )
26382622
inst(CALL) {
2639-
_PyMutex_lock(&_PyRuntime.mutex);
26402623
_PyCallCache *cache = (_PyCallCache *)next_instr;
2641-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
2624+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
2625+
_PyMutex_lock(&_PyRuntime.mutex);
26422626
assert(cframe.use_tracing == 0);
26432627
int is_meth = is_method(stack_pointer, oparg);
26442628
int nargs = oparg + is_meth;
@@ -2649,8 +2633,6 @@ dummy_func(
26492633
DISPATCH_SAME_OPARG();
26502634
}
26512635
STAT_INC(CALL, deferred);
2652-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
2653-
_PyMutex_unlock(&_PyRuntime.mutex);
26542636
GO_TO_INSTRUCTION(CALL_GENERIC);
26552637
}
26562638

@@ -3393,18 +3375,16 @@ dummy_func(
33933375
}
33943376

33953377
inst(BINARY_OP, (unused/1, lhs, rhs -- unused)) {
3396-
_PyMutex_lock(&_PyRuntime.mutex);
33973378
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
3398-
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
3379+
if (DECREMENT_ADAPTIVE_COUNTER(&cache->counter)) {
3380+
_PyMutex_lock(&_PyRuntime.mutex);
33993381
assert(cframe.use_tracing == 0);
34003382
next_instr--;
34013383
_Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0));
34023384
_PyMutex_unlock(&_PyRuntime.mutex);
34033385
DISPATCH_SAME_OPARG();
34043386
}
34053387
STAT_INC(BINARY_OP, deferred);
3406-
DECREMENT_ADAPTIVE_COUNTER(cache->counter);
3407-
_PyMutex_unlock(&_PyRuntime.mutex);
34083388
GO_TO_INSTRUCTION(BINARY_OP_GENERIC);
34093389
}
34103390

Python/ceval.c

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
887887
/* This is only a single jump on release builds! */ \
888888
UPDATE_MISS_STATS((INSTNAME)); \
889889
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
890-
GO_TO_INSTRUCTION(INSTNAME ## _GENERIC); \
890+
goto INSTNAME ## _DEOPT; \
891891
}
892892

893893
#define DEOPT_UNLOCK_IF(COND, INSTNAME) \
@@ -896,7 +896,7 @@ GETITEM(PyObject *v, Py_ssize_t i) {
896896
UPDATE_MISS_STATS((INSTNAME)); \
897897
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
898898
_Py_critical_section_end(&_cs); \
899-
GO_TO_INSTRUCTION(INSTNAME ## _GENERIC); \
899+
goto INSTNAME ## _DEOPT; \
900900
}
901901

902902

@@ -955,11 +955,17 @@ GETITEM(PyObject *v, Py_ssize_t i) {
955955
#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
956956
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
957957

958-
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
959-
do { \
960-
assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
961-
(COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
962-
} while (0);
958+
static _Py_ALWAYS_INLINE int
959+
DECREMENT_ADAPTIVE_COUNTER(uint16_t *ptr)
960+
{
961+
uint16_t counter = _Py_atomic_load_uint16_relaxed(ptr);
962+
if (ADAPTIVE_COUNTER_IS_ZERO(counter)) {
963+
return 1;
964+
}
965+
counter -= (1 << ADAPTIVE_BACKOFF_BITS);
966+
_Py_atomic_store_uint16_relaxed(ptr, counter);
967+
return 0;
968+
}
963969

964970
#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
965971
do { \
@@ -1334,6 +1340,57 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
13341340
or goto error. */
13351341
Py_UNREACHABLE();
13361342

1343+
BINARY_OP_DEOPT:
1344+
if (!_PyRuntime.multithreaded) {
1345+
GO_TO_INSTRUCTION(BINARY_OP);
1346+
}
1347+
GO_TO_INSTRUCTION(BINARY_OP_GENERIC);
1348+
BINARY_SUBSCR_DEOPT:
1349+
if (!_PyRuntime.multithreaded) {
1350+
GO_TO_INSTRUCTION(BINARY_SUBSCR);
1351+
}
1352+
GO_TO_INSTRUCTION(BINARY_SUBSCR_GENERIC);
1353+
CALL_DEOPT:
1354+
if (!_PyRuntime.multithreaded) {
1355+
GO_TO_INSTRUCTION(CALL);
1356+
}
1357+
GO_TO_INSTRUCTION(CALL_GENERIC);
1358+
COMPARE_OP_DEOPT:
1359+
if (!_PyRuntime.multithreaded) {
1360+
GO_TO_INSTRUCTION(COMPARE_OP);
1361+
}
1362+
GO_TO_INSTRUCTION(COMPARE_OP_GENERIC);
1363+
FOR_ITER_DEOPT:
1364+
if (!_PyRuntime.multithreaded) {
1365+
GO_TO_INSTRUCTION(FOR_ITER);
1366+
}
1367+
GO_TO_INSTRUCTION(FOR_ITER_GENERIC);
1368+
LOAD_ATTR_DEOPT:
1369+
if (!_PyRuntime.multithreaded) {
1370+
GO_TO_INSTRUCTION(LOAD_ATTR);
1371+
}
1372+
GO_TO_INSTRUCTION(LOAD_ATTR_GENERIC);
1373+
LOAD_GLOBAL_DEOPT:
1374+
if (!_PyRuntime.multithreaded) {
1375+
GO_TO_INSTRUCTION(LOAD_GLOBAL);
1376+
}
1377+
GO_TO_INSTRUCTION(LOAD_GLOBAL_GENERIC);
1378+
STORE_ATTR_DEOPT:
1379+
if (!_PyRuntime.multithreaded) {
1380+
GO_TO_INSTRUCTION(STORE_ATTR);
1381+
}
1382+
GO_TO_INSTRUCTION(STORE_ATTR_GENERIC);
1383+
STORE_SUBSCR_DEOPT:
1384+
if (!_PyRuntime.multithreaded) {
1385+
GO_TO_INSTRUCTION(STORE_SUBSCR);
1386+
}
1387+
GO_TO_INSTRUCTION(STORE_SUBSCR_GENERIC);
1388+
UNPACK_SEQUENCE_DEOPT:
1389+
if (!_PyRuntime.multithreaded) {
1390+
GO_TO_INSTRUCTION(UNPACK_SEQUENCE);
1391+
}
1392+
GO_TO_INSTRUCTION(UNPACK_SEQUENCE_GENERIC);
1393+
13371394
unbound_local_error:
13381395
{
13391396
format_exc_check_arg(tstate, PyExc_UnboundLocalError,

0 commit comments

Comments
 (0)