Skip to content

bpo-45829: Specialize BINARY_SUBSCR for __getitem__ implemented in Python. #29592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 18, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ typedef struct {
uint8_t original_oparg;
uint8_t counter;
uint16_t index;
uint32_t version;
} _PyAdaptiveEntry;


Expand Down Expand Up @@ -266,7 +267,7 @@ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name
int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache);
int _Py_Specialize_CallFunction(PyObject *callable, _Py_CODEUNIT *instr, int nargs, SpecializedCacheEntry *cache, PyObject *builtins);
void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
SpecializedCacheEntry *cache);
Expand Down
65 changes: 33 additions & 32 deletions Include/opcode.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Lib/opcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def jabs_op(name, op):
"BINARY_OP_MULTIPLY_INT",
"BINARY_OP_MULTIPLY_FLOAT",
"BINARY_SUBSCR_ADAPTIVE",
"BINARY_SUBSCR_GETITEM",
"BINARY_SUBSCR_LIST_INT",
"BINARY_SUBSCR_TUPLE_INT",
"BINARY_SUBSCR_DICT",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Specialize BINARY_SUBSCR for classes with a ``__getitem__`` method
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Specialize BINARY_SUBSCR for classes with a ``__getitem__`` method
Specialize :opcode:`BINARY_SUBSCR` for classes with a ``__getitem__`` method

implemented in Python
45 changes: 38 additions & 7 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -2075,21 +2075,21 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}

TARGET(BINARY_SUBSCR_ADAPTIVE) {
if (oparg == 0) {
SpecializedCacheEntry *cache = GET_CACHE();
if (cache->adaptive.counter == 0) {
PyObject *sub = TOP();
PyObject *container = SECOND();
next_instr--;
if (_Py_Specialize_BinarySubscr(container, sub, next_instr) < 0) {
if (_Py_Specialize_BinarySubscr(container, sub, next_instr, cache) < 0) {
goto error;
}
DISPATCH();
}
else {
STAT_INC(BINARY_SUBSCR, deferred);
// oparg is the adaptive cache counter
UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1);
assert(_Py_OPCODE(next_instr[-1]) == BINARY_SUBSCR_ADAPTIVE);
assert(_Py_OPARG(next_instr[-1]) == oparg - 1);
cache->adaptive.counter--;
assert(cache->adaptive.original_oparg == 0);
oparg = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just to clarify that the oparg is still unused (even though we use cache entries)?

STAT_DEC(BINARY_SUBSCR, unquickened);
JUMP_TO_INSTRUCTION(BINARY_SUBSCR);
}
Expand Down Expand Up @@ -2158,6 +2158,37 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
DISPATCH();
}

TARGET(BINARY_SUBSCR_GETITEM) {
PyObject *sub = TOP();
PyObject *container = SECOND();
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyObjectCache *cache1 = &caches[-1].obj;
PyFunctionObject *getitem = (PyFunctionObject *)cache1->obj;
DEOPT_IF(Py_TYPE(container)->tp_version_tag != cache0->version, BINARY_SUBSCR);
DEOPT_IF(getitem->func_version != cache0->index, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
size_t size = code->co_nlocalsplus + code->co_stacksize + FRAME_SPECIALS_SIZE;
assert(code->co_argcount == 2);
InterpreterFrame *new_frame = _PyThreadState_BumpFramePointer(tstate, size);
if (new_frame == NULL) {
goto error;
}
_PyFrame_InitializeSpecials(new_frame, PyFunction_AS_FRAME_CONSTRUCTOR(getitem),
NULL, code->co_nlocalsplus);
STACK_SHRINK(2);
new_frame->localsplus[0] = container;
new_frame->localsplus[1] = sub;
for (int i = 2; i < code->co_nlocalsplus; i++) {
new_frame->localsplus[i] = NULL;
}
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
frame = cframe.current_frame = new_frame;
new_frame->depth = frame->depth + 1;
goto start_frame;
}

TARGET(LIST_APPEND) {
PyObject *v = POP();
PyObject *list = PEEK(oparg);
Expand Down Expand Up @@ -4914,7 +4945,7 @@ MISS_WITH_CACHE(LOAD_GLOBAL)
MISS_WITH_CACHE(LOAD_METHOD)
MISS_WITH_CACHE(CALL_FUNCTION)
MISS_WITH_CACHE(BINARY_OP)
MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR)
MISS_WITH_CACHE(BINARY_SUBSCR)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing it's still worth keeping the logic for oparg counters, just in case we implement a new family that can uses it in the future?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well delete it. It's in the git history.


binary_subscr_dict_error:
{
Expand Down
14 changes: 7 additions & 7 deletions Python/opcode_targets.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 60 additions & 25 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 0,
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[BINARY_OP] = 1, // _PyAdaptiveEntry
Expand Down Expand Up @@ -1100,7 +1100,7 @@ _Py_Specialize_LoadGlobal(

#if COLLECT_SPECIALIZATION_STATS_DETAILED
static int
binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub)
binary_subscr_fail_kind(PyTypeObject *container_type, PyObject *sub)
{
if (container_type == &PyUnicode_Type) {
if (PyLong_CheckExact(sub)) {
Expand Down Expand Up @@ -1138,14 +1138,36 @@ binary_subscr_faiL_kind(PyTypeObject *container_type, PyObject *sub)
}
#endif

_Py_IDENTIFIER(__getitem__);

static int
function_kind(PyCodeObject *code) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
function_kind(PyCodeObject *code) {
function_spec_fail_kind(PyCodeObject *code) {

int flags = code->co_flags;
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
return SPEC_FAIL_GENERATOR;
return -1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return -1;

}
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
return SPEC_FAIL_COMPLEX_PARAMETERS;
}
if ((flags & CO_OPTIMIZED) == 0) {
return SPEC_FAIL_CO_NOT_OPTIMIZED;
}
if (code->co_nfreevars) {
return SPEC_FAIL_FREE_VARS;
}
return 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like this, since 0 is a valid failure code. Perhaps return -1 on success and check for that instead (it's sort of a weird interface, but whatever).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a success code, for clarity.

In many other C programs, yes 0 is a failure.
However, in CPython, 0 is usually a success, as -1 (or any negative number in some cases) is a failure.
E.g. PyObject_RichCompareBool return 0 for False, and -1 for an error.

compile.c is an unfortunate counter example, where some functions return 0 for a failure and others return 0 for a success.

}

int
_Py_Specialize_BinarySubscr(
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr)
PyObject *container, PyObject *sub, _Py_CODEUNIT *instr, SpecializedCacheEntry *cache)
{
_PyAdaptiveEntry *cache0 = &cache->adaptive;
PyTypeObject *container_type = Py_TYPE(container);
if (container_type == &PyList_Type) {
if (PyLong_CheckExact(sub)) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_LIST_INT, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
Expand All @@ -1154,28 +1176,54 @@ _Py_Specialize_BinarySubscr(
}
if (container_type == &PyTuple_Type) {
if (PyLong_CheckExact(sub)) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_TUPLE_INT, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
PySlice_Check(sub) ? SPEC_FAIL_TUPLE_SLICE : SPEC_FAIL_OTHER);
goto fail;
}
if (container_type == &PyDict_Type) {
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, initial_counter_value());
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_DICT, _Py_OPARG(*instr));
goto success;
}
PyTypeObject *cls = Py_TYPE(container);
PyObject *descriptor = _PyType_LookupId(cls, &PyId___getitem__);
if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
PyFunctionObject *func = (PyFunctionObject *)descriptor;
PyCodeObject *code = (PyCodeObject *)func->func_code;
int kind = function_kind(code);
if (kind) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, kind);
goto fail;
}
if (code->co_argcount != 2) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail;
}
assert(cls->tp_version_tag != 0);
cache0->version = cls->tp_version_tag;
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache0->index = version;
cache[-1].obj.obj = descriptor;
*instr = _Py_MAKECODEUNIT(BINARY_SUBSCR_GETITEM, _Py_OPARG(*instr));
goto success;
}
SPECIALIZATION_FAIL(BINARY_SUBSCR,
binary_subscr_faiL_kind(container_type, sub));
goto fail;
binary_subscr_fail_kind(container_type, sub));
fail:
STAT_INC(BINARY_SUBSCR, specialization_failure);
assert(!PyErr_Occurred());
*instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF);
cache_backoff(cache0);
return 0;
success:
STAT_INC(BINARY_SUBSCR, specialization_success);
assert(!PyErr_Occurred());
cache0->counter = initial_counter_value();
return 0;
}

Expand All @@ -1194,23 +1242,10 @@ specialize_py_call(
int nargs, SpecializedCacheEntry *cache)
{
_PyCallCache *cache1 = &cache[-1].call;
/* Exclude generator or coroutines for now */
PyCodeObject *code = (PyCodeObject *)func->func_code;
int flags = code->co_flags;
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
return -1;
}
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
return -1;
}
if ((flags & CO_OPTIMIZED) == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
return -1;
}
if (code->co_nfreevars) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
int kind = function_kind(code);
if (kind) {
SPECIALIZATION_FAIL(CALL_FUNCTION, kind);
return -1;
}
int argcount = code->co_argcount;
Expand Down