Mailing List Archive

gh-117439: Make refleak checking thread-safe without the GIL (#117469)
https://github.com/python/cpython/commit/1a6594f66166206b08f24c3ba633c85f86f99a56
commit: 1a6594f66166206b08f24c3ba633c85f86f99a56
branch: main
author: Sam Gross <colesbury@gmail.com>
committer: colesbury <colesbury@gmail.com>
date: 2024-04-08T12:11:36-04:00
summary:

gh-117439: Make refleak checking thread-safe without the GIL (#117469)

This keeps track of the per-thread total reference count operations in
PyThreadState in the free-threaded builds. The count is merged into the
interpreter's total when the thread exits.

files:
M Include/internal/pycore_object.h
M Include/internal/pycore_tstate.h
M Objects/bytesobject.c
M Objects/dictobject.c
M Objects/object.c
M Objects/tupleobject.c
M Objects/unicodeobject.c
M Python/gc_free_threading.c
M Python/pystate.c

diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 1e1b664000f108..9aa2e5bf918a7b 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -86,9 +86,9 @@ PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
built against the pre-3.12 stable ABI. */
PyAPI_DATA(Py_ssize_t) _Py_RefTotal;

-extern void _Py_AddRefTotal(PyInterpreterState *, Py_ssize_t);
-extern void _Py_IncRefTotal(PyInterpreterState *);
-extern void _Py_DecRefTotal(PyInterpreterState *);
+extern void _Py_AddRefTotal(PyThreadState *, Py_ssize_t);
+extern void _Py_IncRefTotal(PyThreadState *);
+extern void _Py_DecRefTotal(PyThreadState *);

# define _Py_DEC_REFTOTAL(interp) \
interp->object_state.reftotal--
@@ -101,7 +101,7 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
return;
}
#ifdef Py_REF_DEBUG
- _Py_AddRefTotal(_PyInterpreterState_GET(), n);
+ _Py_AddRefTotal(_PyThreadState_GET(), n);
#endif
#if !defined(Py_GIL_DISABLED)
op->ob_refcnt += n;
@@ -393,7 +393,7 @@ _Py_TryIncrefFast(PyObject *op) {
_Py_INCREF_STAT_INC();
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local);
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
return 1;
}
@@ -416,7 +416,7 @@ _Py_TryIncRefShared(PyObject *op)
&shared,
shared + (1 << _Py_REF_SHARED_SHIFT))) {
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
_Py_INCREF_STAT_INC();
return 1;
diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h
index e268e6fbbb087b..733e3172a1c0ff 100644
--- a/Include/internal/pycore_tstate.h
+++ b/Include/internal/pycore_tstate.h
@@ -38,6 +38,10 @@ typedef struct _PyThreadStateImpl {
struct _brc_thread_state brc;
#endif

+#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
+ Py_ssize_t reftotal; // this thread's total refcount operations
+#endif
+
} _PyThreadStateImpl;


diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index d7b0c6b7b01aa9..d576dd93f05e10 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3118,7 +3118,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
if (*pv == NULL) {
#ifdef Py_REF_DEBUG
- _Py_DecRefTotal(_PyInterpreterState_GET());
+ _Py_DecRefTotal(_PyThreadState_GET());
#endif
PyObject_Free(v);
PyErr_NoMemory();
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 9218b1aa470663..e7993e4b051433 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -445,7 +445,7 @@ dictkeys_incref(PyDictKeysObject *dk)
return;
}
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
INCREF_KEYS(dk);
}
@@ -458,7 +458,7 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr)
}
assert(dk->dk_refcnt > 0);
#ifdef Py_REF_DEBUG
- _Py_DecRefTotal(_PyInterpreterState_GET());
+ _Py_DecRefTotal(_PyThreadState_GET());
#endif
if (DECREF_KEYS(dk) == 1) {
if (DK_IS_UNICODE(dk)) {
@@ -790,7 +790,7 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode)
}
}
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
dk->dk_refcnt = 1;
dk->dk_log2_size = log2_size;
@@ -978,7 +978,7 @@ clone_combined_dict_keys(PyDictObject *orig)
we have it now; calling dictkeys_incref would be an error as
keys->dk_refcnt is already set to 1 (after memcpy). */
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
return keys;
}
@@ -2021,7 +2021,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp,

if (oldkeys != Py_EMPTY_KEYS) {
#ifdef Py_REF_DEBUG
- _Py_DecRefTotal(_PyInterpreterState_GET());
+ _Py_DecRefTotal(_PyThreadState_GET());
#endif
assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
assert(oldkeys->dk_refcnt == 1);
diff --git a/Objects/object.c b/Objects/object.c
index 60642d899bcafa..c8e6f8fc1a2b40 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -73,21 +73,16 @@ get_legacy_reftotal(void)
interp->object_state.reftotal

static inline void
-reftotal_increment(PyInterpreterState *interp)
+reftotal_add(PyThreadState *tstate, Py_ssize_t n)
{
- REFTOTAL(interp)++;
-}
-
-static inline void
-reftotal_decrement(PyInterpreterState *interp)
-{
- REFTOTAL(interp)--;
-}
-
-static inline void
-reftotal_add(PyInterpreterState *interp, Py_ssize_t n)
-{
- REFTOTAL(interp) += n;
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+ // relaxed store to avoid data race with read in get_reftotal()
+ Py_ssize_t reftotal = tstate_impl->reftotal + n;
+ _Py_atomic_store_ssize_relaxed(&tstate_impl->reftotal, reftotal);
+#else
+ REFTOTAL(tstate->interp) += n;
+#endif
}

static inline Py_ssize_t get_global_reftotal(_PyRuntimeState *);
@@ -117,7 +112,15 @@ get_reftotal(PyInterpreterState *interp)
{
/* For a single interpreter, we ignore the legacy _Py_RefTotal,
since we can't determine which interpreter updated it. */
- return REFTOTAL(interp);
+ Py_ssize_t total = REFTOTAL(interp);
+#ifdef Py_GIL_DISABLED
+ for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
+ /* This may race with other threads modifications to their reftotal */
+ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p;
+ total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal);
+ }
+#endif
+ return total;
}

static inline Py_ssize_t
@@ -129,7 +132,7 @@ get_global_reftotal(_PyRuntimeState *runtime)
HEAD_LOCK(&_PyRuntime);
PyInterpreterState *interp = PyInterpreterState_Head();
for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
- total += REFTOTAL(interp);
+ total += get_reftotal(interp);
}
HEAD_UNLOCK(&_PyRuntime);

@@ -222,32 +225,32 @@ _Py_NegativeRefcount(const char *filename, int lineno, PyObject *op)
void
_Py_INCREF_IncRefTotal(void)
{
- reftotal_increment(_PyInterpreterState_GET());
+ reftotal_add(_PyThreadState_GET(), 1);
}

/* This is used strictly by Py_DECREF(). */
void
_Py_DECREF_DecRefTotal(void)
{
- reftotal_decrement(_PyInterpreterState_GET());
+ reftotal_add(_PyThreadState_GET(), -1);
}

void
-_Py_IncRefTotal(PyInterpreterState *interp)
+_Py_IncRefTotal(PyThreadState *tstate)
{
- reftotal_increment(interp);
+ reftotal_add(tstate, 1);
}

void
-_Py_DecRefTotal(PyInterpreterState *interp)
+_Py_DecRefTotal(PyThreadState *tstate)
{
- reftotal_decrement(interp);
+ reftotal_add(tstate, -1);
}

void
-_Py_AddRefTotal(PyInterpreterState *interp, Py_ssize_t n)
+_Py_AddRefTotal(PyThreadState *tstate, Py_ssize_t n)
{
- reftotal_add(interp, n);
+ reftotal_add(tstate, n);
}

/* This includes the legacy total
@@ -267,7 +270,10 @@ _Py_GetLegacyRefTotal(void)
Py_ssize_t
_PyInterpreterState_GetRefTotal(PyInterpreterState *interp)
{
- return get_reftotal(interp);
+ HEAD_LOCK(&_PyRuntime);
+ Py_ssize_t total = get_reftotal(interp);
+ HEAD_UNLOCK(&_PyRuntime);
+ return total;
}

#endif /* Py_REF_DEBUG */
@@ -345,7 +351,7 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno)

if (should_queue) {
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
_Py_brc_queue_object(o);
}
@@ -405,7 +411,7 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra)
&shared, new_shared));

#ifdef Py_REF_DEBUG
- _Py_AddRefTotal(_PyInterpreterState_GET(), extra);
+ _Py_AddRefTotal(_PyThreadState_GET(), extra);
#endif

_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0);
@@ -2376,7 +2382,7 @@ void
_Py_NewReference(PyObject *op)
{
#ifdef Py_REF_DEBUG
- reftotal_increment(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
new_reference(op);
}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index d9dc00da368a84..5ae1ee9a89af84 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -946,7 +946,7 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
if (sv == NULL) {
*pv = NULL;
#ifdef Py_REF_DEBUG
- _Py_DecRefTotal(_PyInterpreterState_GET());
+ _Py_DecRefTotal(_PyThreadState_GET());
#endif
PyObject_GC_Del(v);
return -1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e135638c696fa4..59b350f0a609a6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -14916,7 +14916,7 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
decrements to these objects will not be registered so they
need to be accounted for in here. */
for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) {
- _Py_DecRefTotal(_PyInterpreterState_GET());
+ _Py_DecRefTotal(_PyThreadState_GET());
}
#endif
_Py_SetImmortal(s);
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 7e4137a8e342b1..111632ffb77641 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -168,7 +168,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra)
refcount += extra;

#ifdef Py_REF_DEBUG
- _Py_AddRefTotal(_PyInterpreterState_GET(), extra);
+ _Py_AddRefTotal(_PyThreadState_GET(), extra);
#endif

// No atomics necessary; all other threads in this interpreter are paused.
@@ -307,7 +307,7 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
// decref and deallocate the object once we start the world again.
op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
#ifdef Py_REF_DEBUG
- _Py_IncRefTotal(_PyInterpreterState_GET());
+ _Py_IncRefTotal(_PyThreadState_GET());
#endif
worklist_push(&state->objs_to_decref, op);
}
diff --git a/Python/pystate.c b/Python/pystate.c
index cee481c564b0cb..4a52f6444ba10a 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1698,6 +1698,14 @@ tstate_delete_common(PyThreadState *tstate)
decrement_stoptheworld_countdown(&runtime->stoptheworld);
}
}
+
+#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
+ // Add our portion of the total refcount to the interpreter's total.
+ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+ tstate->interp->object_state.reftotal += tstate_impl->reftotal;
+ tstate_impl->reftotal = 0;
+#endif
+
HEAD_UNLOCK(runtime);

#ifdef Py_GIL_DISABLED

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-leave@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: list-python-checkins@lists.gossamer-threads.com