Mailing List Archive

python/dist/src/Modules gcmodule.c,2.45,2.46
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv24796/python/Modules

Modified Files:
gcmodule.c
Log Message:
SF bug #574132: Major GC related performance regression
"The regression" is actually due to that 2.2.1 had a bug that prevented
the regression (which isn't a regression at all) from showing up. "The
regression" is actually a glitch in cyclic gc that's been there forever.

As the generation being collected is analyzed, objects that can't be
collected (because, e.g., we find they're externally referenced, or
are in an unreachable cycle but have a __del__ method) are moved out
of the list of candidates. A tricksy scheme uses negative values of
gc_refs to mark such objects as being moved. However, the exact
negative value set at the start may become "more negative" over time
for objects not in the generation being collected, and the scheme was
checking for an exact match on the negative value originally assigned.
As a result, objects in generations older than the one being collected
could get scanned too, and yanked back into a younger generation. Doing
so doesn't lead to an error, but doesn't do any good, and can burn an
unbounded amount of time doing useless work.

A test case is simple (thanks to Kevin Jacobs for finding it!):

x = []
for i in xrange(200000):
x.append((1,))

Without the patch, this ends up scanning all of x on every gen0 collection,
scans all of x twice on every gen1 collection, and x gets yanked back into
gen1 on every gen0 collection. With the patch, once x gets to gen2, it's
never scanned again until another gen2 collection, and stays in gen2.

Bugfix candidate, although the code has changed enough that I think I'll
need to port it by hand. 2.2.1 also has a different bug that causes
bound method objects not to get tracked at all (so the test case doesn't
burn absurd amounts of time in 2.2.1, but *should* <wink>).


Index: gcmodule.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/gcmodule.c,v
retrieving revision 2.45
retrieving revision 2.46
diff -C2 -d -r2.45 -r2.46
*** gcmodule.c 28 Jun 2002 19:16:04 -0000 2.45
--- gcmodule.c 30 Jun 2002 17:56:40 -0000 2.46
***************
*** 1,4 ****
/*
!
Reference Cycle Garbage Collection
==================================
--- 1,4 ----
/*
!
Reference Cycle Garbage Collection
==================================
***************
*** 73,81 ****
static int debug;

! /* Special gc_refs value */
#define GC_MOVED -123

! /* True if an object has been moved to the older generation */
! #define IS_MOVED(o) ((AS_GC(o))->gc.gc_refs == GC_MOVED)

/* list of uncollectable objects */
--- 73,89 ----
static int debug;

! /* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
! * the objects in the generation being collected, called the "young"
! * generation at that point. As collection proceeds, when it's determined
! * that one of these can't be collected (e.g., because it's reachable from
! * outside, or has a __del__ method), the object is moved out of young, and
! * gc_refs is set to a negative value. The latter is so we can distinguish
! * collection candidates from non-candidates just by looking at the object.
! */
! /* Special gc_refs value, although any negative value means "moved". */
#define GC_MOVED -123

! /* True iff an object is still a candidate for collection. */
! #define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)

/* list of uncollectable objects */
***************
*** 117,121 ****
}

! static void
gc_list_move(PyGC_Head *from, PyGC_Head *to)
{
--- 125,129 ----
}

! static void
gc_list_move(PyGC_Head *from, PyGC_Head *to)
{
***************
*** 162,166 ****


! /* Set all gc_refs = ob_refcnt */
static void
update_refs(PyGC_Head *containers)
--- 170,177 ----


! /* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
! * for all objects in containers, and false for all tracked gc objects not
! * in containers (although see the comment in visit_decref).
! */
static void
update_refs(PyGC_Head *containers)
***************
*** 175,181 ****
visit_decref(PyObject *op, void *data)
{
if (op && PyObject_IS_GC(op)) {
! if (IS_TRACKED(op))
AS_GC(op)->gc.gc_refs--;
}
return 0;
--- 186,204 ----
visit_decref(PyObject *op, void *data)
{
+ /* There's no point to decrementing gc_refs unless
+ * STILL_A_CANDIDATE(op) is true. It would take extra cycles to
+ * check that, though. If STILL_A_CANDIDATE(op) is false,
+ * decrementing gc_refs almost always makes it "even more negative",
+ * so doesn't change that STILL_A_CANDIDATE is false, and no harm is
+ * done. However, it's possible that, after many collections, this
+ * could underflow gc_refs in a long-lived old object. In that case,
+ * visit_move() may move the old object back to the generation
+ * getting collected. That would be a waste of time, but wouldn't
+ * cause an error.
+ */
if (op && PyObject_IS_GC(op)) {
! if (IS_TRACKED(op)) {
AS_GC(op)->gc.gc_refs--;
+ }
}
return 0;
***************
*** 196,200 ****
}

! /* Append objects with gc_refs > 0 to roots list */
static void
move_roots(PyGC_Head *containers, PyGC_Head *roots)
--- 219,223 ----
}

! /* Move objects with gc_refs > 0 to roots list. They can't be collected. */
static void
move_roots(PyGC_Head *containers, PyGC_Head *roots)
***************
*** 217,221 ****
{
if (PyObject_IS_GC(op)) {
! if (IS_TRACKED(op) && !IS_MOVED(op)) {
PyGC_Head *gc = AS_GC(op);
gc_list_remove(gc);
--- 240,244 ----
{
if (PyObject_IS_GC(op)) {
! if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
PyGC_Head *gc = AS_GC(op);
gc_list_remove(gc);
***************
*** 227,231 ****
}

! /* Move objects referenced from reachable to reachable set. */
static void
move_root_reachable(PyGC_Head *reachable)
--- 250,256 ----
}

! /* Move candidates referenced from reachable to reachable set (they're no
! * longer candidates).
! */
static void
move_root_reachable(PyGC_Head *reachable)
***************
*** 243,247 ****
}

! /* return true of object has a finalization method */
static int
has_finalizer(PyObject *op)
--- 268,272 ----
}

! /* return true if object has a finalization method */
static int
has_finalizer(PyObject *op)
***************
*** 270,273 ****
--- 295,299 ----
gc_list_remove(gc);
gc_list_append(gc, finalizers);
+ gc->gc.gc_refs = GC_MOVED;
}
}
***************
*** 283,287 ****
/* careful, finalizers list is growing here */
traverse = FROM_GC(gc)->ob_type->tp_traverse;
! (void) traverse(FROM_GC(gc),
(visitproc)visit_move,
(void *)finalizers);
--- 309,313 ----
/* careful, finalizers list is growing here */
traverse = FROM_GC(gc)->ob_type->tp_traverse;
! (void) traverse(FROM_GC(gc),
(visitproc)visit_move,
(void *)finalizers);
***************
*** 333,337 ****
PyList_Append(garbage, op);
}
! /* object is now reachable again */
gc_list_remove(gc);
gc_list_append(gc, old);
--- 359,364 ----
PyList_Append(garbage, op);
}
! /* object is now reachable again */
! assert(!STILL_A_CANDIDATE(op));
gc_list_remove(gc);
gc_list_append(gc, old);
***************
*** 350,353 ****
--- 377,382 ----
PyGC_Head *gc = unreachable->gc.gc_next;
PyObject *op = FROM_GC(gc);
+
+ assert(STILL_A_CANDIDATE(op));
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
***************
*** 364,367 ****
--- 393,397 ----
gc_list_remove(gc);
gc_list_append(gc, old);
+ gc->gc.gc_refs = GC_MOVED;
}
}