Mailing List Archive

bpo-42093: Add opcode cache for LOAD_ATTR (GH-22803)
https://github.com/python/cpython/commit/109826c8508dd02e06ae0f1784f1d202495a8680
commit: 109826c8508dd02e06ae0f1784f1d202495a8680
branch: master
author: Pablo Galindo <Pablogsal@gmail.com>
committer: GitHub <noreply@github.com>
date: 2020-10-20T06:22:44+01:00
summary:

bpo-42093: Add opcode cache for LOAD_ATTR (GH-22803)

files:
A Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst
M Doc/whatsnew/3.10.rst
M Include/cpython/dictobject.h
M Include/internal/pycore_code.h
M Objects/codeobject.c
M Objects/dictobject.c
M Python/ceval.c

diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index f57e1b412378e..e275a7cb4573f 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -252,6 +252,9 @@ Optimizations
average.
(Contributed by Victor Stinner in :issue:`41006`.)

+* The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism.
+ It is about 36% faster now. (Contributed by Pablo Galindo and Yury Selivanov
+ in :issue:`42093`.)

Deprecated
==========
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index 5a15630cfbac7..f67c3214ddd9a 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -71,6 +71,7 @@ PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);

int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
+Py_ssize_t _PyDict_GetItemHint(PyDictObject *, PyObject *, Py_ssize_t, PyObject **);

/* _PyDictView */

diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
index 88956f109b4f7..f1e89d96b9ebb 100644
--- a/Include/internal/pycore_code.h
+++ b/Include/internal/pycore_code.h
@@ -10,9 +10,16 @@ typedef struct {
uint64_t builtins_ver; /* ma_version of builtin dict */
} _PyOpcache_LoadGlobal;

+typedef struct {
+ PyTypeObject *type;
+ Py_ssize_t hint;
+ unsigned int tp_version_tag;
+} _PyOpCodeOpt_LoadAttr;
+
struct _PyOpcache {
union {
_PyOpcache_LoadGlobal lg;
+ _PyOpCodeOpt_LoadAttr la;
} u;
char optimized;
};
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst
new file mode 100644
index 0000000000000..36a12c1c1cb58
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-20-04-24-07.bpo-42093.ooZZNh.rst
@@ -0,0 +1,2 @@
+The ``LOAD_ATTR`` instruction now uses new "per opcode cache" mechanism and
+it is about 36% faster now. Patch by Pablo Galindo and Yury Selivanov.
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 4ca22fc5029b8..c86d0e1f4ab71 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -301,8 +301,8 @@ _PyCode_InitOpcache(PyCodeObject *co)
unsigned char opcode = _Py_OPCODE(opcodes[i]);
i++; // 'i' is now aligned to (next_instr - first_instr)

- // TODO: LOAD_METHOD, LOAD_ATTR
- if (opcode == LOAD_GLOBAL) {
+ // TODO: LOAD_METHOD
+ if (opcode == LOAD_GLOBAL || opcode == LOAD_ATTR) {
opts++;
co->co_opcache_map[i] = (unsigned char)opts;
if (opts > 254) {
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 6c3fc62d2ecc7..8e74962345286 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1437,6 +1437,71 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return value;
}

+Py_ssize_t
+_PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
+ Py_ssize_t hint, PyObject **value)
+{
+ Py_hash_t hash;
+ PyThreadState *tstate;
+
+ assert(*value == NULL);
+ assert(PyDict_CheckExact((PyObject*)mp));
+ assert(PyUnicode_CheckExact(key));
+
+ if (hint >= 0 && hint < _PyDict_KeysSize(mp->ma_keys)) {
+ PyObject *res = NULL;
+
+ PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
+ if (ep->me_key == key) {
+ if (mp->ma_keys->dk_lookup == lookdict_split) {
+ assert(mp->ma_values != NULL);
+ res = mp->ma_values[(size_t)hint];
+ }
+ else {
+ res = ep->me_value;
+ }
+ if (res != NULL) {
+ *value = res;
+ return hint;
+ }
+ }
+ }
+
+ if ((hash = ((PyASCIIObject *) key)->hash) == -1)
+ {
+ hash = PyObject_Hash(key);
+ if (hash == -1) {
+ PyErr_Clear();
+ return -1;
+ }
+ }
+
+ // We can arrive here with a NULL tstate during initialization: try
+ // running "python -Wi" for an example related to string interning
+ tstate = _PyThreadState_UncheckedGet();
+ Py_ssize_t ix = 0;
+ if (tstate != NULL && tstate->curexc_type != NULL) {
+ /* preserve the existing exception */
+ PyObject *err_type, *err_value, *err_tb;
+ PyErr_Fetch(&err_type, &err_value, &err_tb);
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
+ /* ignore errors */
+ PyErr_Restore(err_type, err_value, err_tb);
+ if (ix < 0) {
+ return -1;
+ }
+ }
+ else {
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, value);
+ if (ix < 0) {
+ PyErr_Clear();
+ return -1;
+ }
+ }
+
+ return ix;
+}
+
/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
This returns NULL *with* an exception set if an exception occurred.
It returns NULL *without* an exception set if the key wasn't present.
diff --git a/Python/ceval.c b/Python/ceval.c
index 762de577e6b55..fafbf7524bb84 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -111,6 +111,7 @@ static long dxp[256];
#else
#define OPCACHE_MIN_RUNS 1024 /* create opcache when code executed this time */
#endif
+#define OPCODE_CACHE_MAX_TRIES 20
#define OPCACHE_STATS 0 /* Enable stats */

#if OPCACHE_STATS
@@ -120,6 +121,12 @@ static size_t opcache_code_objects_extra_mem = 0;
static size_t opcache_global_opts = 0;
static size_t opcache_global_hits = 0;
static size_t opcache_global_misses = 0;
+
+static size_t opcache_attr_opts = 0;
+static size_t opcache_attr_hits = 0;
+static size_t opcache_attr_misses = 0;
+static size_t opcache_attr_deopts = 0;
+static size_t opcache_attr_total = 0;
#endif


@@ -365,6 +372,25 @@ _PyEval_Fini(void)
opcache_global_opts);

fprintf(stderr, "\n");
+
+ fprintf(stderr, "-- Opcode cache LOAD_ATTR hits = %zd (%d%%)\n",
+ opcache_attr_hits,
+ (int) (100.0 * opcache_attr_hits /
+ opcache_attr_total));
+
+ fprintf(stderr, "-- Opcode cache LOAD_ATTR misses = %zd (%d%%)\n",
+ opcache_attr_misses,
+ (int) (100.0 * opcache_attr_misses /
+ opcache_attr_total));
+
+ fprintf(stderr, "-- Opcode cache LOAD_ATTR opts = %zd\n",
+ opcache_attr_opts);
+
+ fprintf(stderr, "-- Opcode cache LOAD_ATTR deopts = %zd\n",
+ opcache_attr_deopts);
+
+ fprintf(stderr, "-- Opcode cache LOAD_ATTR total = %zd\n",
+ opcache_attr_total);
#endif
}

@@ -1224,16 +1250,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
do { \
co_opcache = NULL; \
if (co->co_opcache != NULL) { \
- unsigned char co_opt_offset = \
+ unsigned char co_opcache_offset = \
co->co_opcache_map[next_instr - first_instr]; \
- if (co_opt_offset > 0) { \
- assert(co_opt_offset <= co->co_opcache_size); \
- co_opcache = &co->co_opcache[co_opt_offset - 1]; \
+ if (co_opcache_offset > 0) { \
+ assert(co_opcache_offset <= co->co_opcache_size); \
+ co_opcache = &co->co_opcache[co_opcache_offset - 1]; \
assert(co_opcache != NULL); \
} \
} \
} while (0)

+#define OPCACHE_DEOPT() \
+ do { \
+ if (co_opcache != NULL) { \
+ co_opcache->optimized = -1; \
+ unsigned char co_opcache_offset = \
+ co->co_opcache_map[next_instr - first_instr]; \
+ assert(co_opcache_offset <= co->co_opcache_size); \
+ co->co_opcache_map[co_opcache_offset] = 0; \
+ co_opcache = NULL; \
+ } \
+ } while (0)
+
+#define OPCACHE_DEOPT_LOAD_ATTR() \
+ do { \
+ if (co_opcache != NULL) { \
+ OPCACHE_STAT_ATTR_DEOPT(); \
+ OPCACHE_DEOPT(); \
+ } \
+ } while (0)
+
+#define OPCACHE_MAYBE_DEOPT_LOAD_ATTR() \
+ do { \
+ if (co_opcache != NULL && --co_opcache->optimized <= 0) { \
+ OPCACHE_DEOPT_LOAD_ATTR(); \
+ } \
+ } while (0)
+
#if OPCACHE_STATS

#define OPCACHE_STAT_GLOBAL_HIT() \
@@ -1251,12 +1304,43 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
if (co->co_opcache != NULL) opcache_global_opts++; \
} while (0)

+#define OPCACHE_STAT_ATTR_HIT() \
+ do { \
+ if (co->co_opcache != NULL) opcache_attr_hits++; \
+ } while (0)
+
+#define OPCACHE_STAT_ATTR_MISS() \
+ do { \
+ if (co->co_opcache != NULL) opcache_attr_misses++; \
+ } while (0)
+
+#define OPCACHE_STAT_ATTR_OPT() \
+ do { \
+ if (co->co_opcache!= NULL) opcache_attr_opts++; \
+ } while (0)
+
+#define OPCACHE_STAT_ATTR_DEOPT() \
+ do { \
+ if (co->co_opcache != NULL) opcache_attr_deopts++; \
+ } while (0)
+
+#define OPCACHE_STAT_ATTR_TOTAL() \
+ do { \
+ if (co->co_opcache != NULL) opcache_attr_total++; \
+ } while (0)
+
#else /* OPCACHE_STATS */

#define OPCACHE_STAT_GLOBAL_HIT()
#define OPCACHE_STAT_GLOBAL_MISS()
#define OPCACHE_STAT_GLOBAL_OPT()

+#define OPCACHE_STAT_ATTR_HIT()
+#define OPCACHE_STAT_ATTR_MISS()
+#define OPCACHE_STAT_ATTR_OPT()
+#define OPCACHE_STAT_ATTR_DEOPT()
+#define OPCACHE_STAT_ATTR_TOTAL()
+
#endif

/* Start of code */
@@ -3023,7 +3107,134 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
case TARGET(LOAD_ATTR): {
PyObject *name = GETITEM(names, oparg);
PyObject *owner = TOP();
- PyObject *res = PyObject_GetAttr(owner, name);
+
+ PyTypeObject *type = Py_TYPE(owner);
+ PyObject *res;
+ PyObject **dictptr;
+ PyObject *dict;
+ _PyOpCodeOpt_LoadAttr *la;
+
+ OPCACHE_STAT_ATTR_TOTAL();
+
+ OPCACHE_CHECK();
+ if (co_opcache != NULL && PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG))
+ {
+ if (co_opcache->optimized > 0) {
+ /* Fast path -- cache hit makes LOAD_ATTR ~30% faster */
+ la = &co_opcache->u.la;
+ if (la->type == type && la->tp_version_tag == type->tp_version_tag)
+ {
+ assert(type->tp_dict != NULL);
+ assert(type->tp_dictoffset > 0);
+
+ dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
+ dict = *dictptr;
+ if (dict != NULL && PyDict_CheckExact(dict)) {
+ Py_ssize_t hint = la->hint;
+ Py_INCREF(dict);
+ res = NULL;
+ la->hint = _PyDict_GetItemHint((PyDictObject*)dict, name, hint, &res);
+
+ if (res != NULL) {
+ if (la->hint == hint && hint >= 0) {
+ /* Our hint has helped -- cache hit. */
+ OPCACHE_STAT_ATTR_HIT();
+ } else {
+ /* The hint we provided didn't work.
+ Maybe next time? */
+ OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
+ }
+
+ Py_INCREF(res);
+ SET_TOP(res);
+ Py_DECREF(owner);
+ Py_DECREF(dict);
+ DISPATCH();
+ } else {
+ // This attribute can be missing sometimes -- we
+ // don't want to optimize this lookup.
+ OPCACHE_DEOPT_LOAD_ATTR();
+ Py_DECREF(dict);
+ }
+ } else {
+ // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
+ OPCACHE_DEOPT_LOAD_ATTR();
+ }
+ } else {
+ // The type of the object has either been updated,
+ // or is different. Maybe it will stabilize?
+ OPCACHE_MAYBE_DEOPT_LOAD_ATTR();
+ }
+
+ OPCACHE_STAT_ATTR_MISS();
+ }
+
+ if (co_opcache != NULL && /* co_opcache can be NULL after a DEOPT() call. */
+ type->tp_getattro == PyObject_GenericGetAttr)
+ {
+ PyObject *descr;
+ Py_ssize_t ret;
+
+ if (type->tp_dictoffset > 0) {
+ if (type->tp_dict == NULL) {
+ if (PyType_Ready(type) < 0) {
+ Py_DECREF(owner);
+ SET_TOP(NULL);
+ goto error;
+ }
+ }
+
+ descr = _PyType_Lookup(type, name);
+ if (descr == NULL ||
+ descr->ob_type->tp_descr_get == NULL ||
+ !PyDescr_IsData(descr))
+ {
+ dictptr = (PyObject **) ((char *)owner + type->tp_dictoffset);
+ dict = *dictptr;
+
+ if (dict != NULL && PyDict_CheckExact(dict)) {
+ Py_INCREF(dict);
+ res = NULL;
+ ret = _PyDict_GetItemHint((PyDictObject*)dict, name, -1, &res);
+ if (res != NULL) {
+ Py_INCREF(res);
+ Py_DECREF(dict);
+ Py_DECREF(owner);
+ SET_TOP(res);
+
+ if (co_opcache->optimized == 0) {
+ // First time we optimize this opcode. */
+ OPCACHE_STAT_ATTR_OPT();
+ co_opcache->optimized = OPCODE_CACHE_MAX_TRIES;
+ }
+
+ la = &co_opcache->u.la;
+ la->type = type;
+ la->tp_version_tag = type->tp_version_tag;
+ la->hint = ret;
+
+ DISPATCH();
+ }
+ Py_DECREF(dict);
+ } else {
+ // There is no dict, or __dict__ doesn't satisfy PyDict_CheckExact
+ OPCACHE_DEOPT_LOAD_ATTR();
+ }
+ } else {
+ // We failed to find an attribute without a data-like descriptor
+ OPCACHE_DEOPT_LOAD_ATTR();
+ }
+ } else {
+ // The object's class does not have a tp_dictoffset we can use
+ OPCACHE_DEOPT_LOAD_ATTR();
+ }
+ } else if (type->tp_getattro != PyObject_GenericGetAttr) {
+ OPCACHE_DEOPT_LOAD_ATTR();
+ }
+ }
+
+ /* slow path */
+ res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
SET_TOP(res);
if (res == NULL)

_______________________________________________
Python-checkins mailing list
Python-checkins@python.org
https://mail.python.org/mailman/listinfo/python-checkins