From b6ec9924fbbba6d0662d59438c72889761c85563 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 27 Apr 2026 00:19:06 +0500 Subject: [PATCH 1/6] Add heap_size to _gc_runtime_state --- Include/internal/pycore_gc.h | 6 +++++- Include/internal/pycore_interp_structs.h | 6 +++++- Lib/test/test_gc.py | 9 +++++++++ Modules/_testinternalcapi.c | 3 +-- Python/gc.c | 2 ++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index e105677cd2e674..bfe52f42f1141c 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -223,12 +223,14 @@ static inline void _PyObject_GC_TRACK( "object is in generation which is garbage collected", filename, lineno, __func__); - PyGC_Head *generation0 = _PyInterpreterState_GET()->gc.generation0; + struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc; + PyGC_Head *generation0 = gcstate->generation0; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); _PyGCHead_SET_NEXT(gc, generation0); generation0->_gc_prev = (uintptr_t)gc; + gcstate->heap_size++; #endif } @@ -263,6 +265,8 @@ static inline void _PyObject_GC_UNTRACK( _PyGCHead_SET_PREV(next, prev); gc->_gc_next = 0; gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; + struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc; + gcstate->heap_size--; #endif } diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 349044116b9d18..4448744f168db6 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -244,6 +244,9 @@ struct _gc_runtime_state { /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + /* The number of live objects. */ + Py_ssize_t heap_size; + /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects tracked by the GC. @@ -278,7 +281,8 @@ struct _gc_runtime_state { { .threshold = 2000, }, \ { .threshold = 10, }, \ { .threshold = 10, }, \ - }, + }, \ + .heap_size = 0, #else #define GC_GENERATION_INIT \ .young = { .threshold = 2000, }, \ diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 88d265cbc21709..3fc084ea6e9c6e 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1288,6 +1288,15 @@ def test_tuple_untrack_counts(self): # Use n // 2 just in case some other objects were collected. self.assertTrue(new_count - count > (n // 2)) + @requires_gil_enabled('need generational GC') + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") + def test_heap_size(self): + count = _testinternalcapi.get_tracked_heap_size() + l = [] + self.assertEqual(count + 1, _testinternalcapi.get_tracked_heap_size()) + del l + self.assertEqual(count, _testinternalcapi.get_tracked_heap_size()) + class GCCallbackTests(unittest.TestCase): def setUp(self): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 5319d9c7a4819b..619f9f50574429 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2725,8 +2725,7 @@ has_deferred_refcount(PyObject *self, PyObject *op) static PyObject * get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) { - // Generational GC doesn't track heap_size, return -1. - return PyLong_FromInt64(-1); + return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); } static PyObject * diff --git a/Python/gc.c b/Python/gc.c index 59bed10c1fb230..f3ed20dda826e6 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -2087,6 +2087,8 @@ PyObject_GC_Del(void *op) PyGC_Head *g = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op)) { gc_list_remove(g); + GCState *gcstate = get_gc_state(); + gcstate->heap_size--; #ifdef Py_DEBUG PyObject *exc = PyErr_GetRaisedException(); if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, From 22814995137104f04ed1a58e5ca16f669cda18b3 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Fri, 1 May 2026 00:28:05 +0500 Subject: [PATCH 2/6] Add heap_size to stats --- Include/internal/pycore_interp_structs.h | 2 ++ Python/gc.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 4448744f168db6..485e82b3312860 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -181,6 +181,8 @@ struct gc_generation { struct gc_generation_stats { PyTime_t ts_start; PyTime_t ts_stop; + /* heap_size on the start of the collection */ + Py_ssize_t heap_size; /* total number of collections */ Py_ssize_t collections; /* total number of collected objects */ diff --git a/Python/gc.c b/Python/gc.c index f3ed20dda826e6..636a8f3859b99c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1405,7 +1405,7 @@ add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats) memcpy(cur_stats, prev_stats, sizeof(struct gc_generation_stats)); cur_stats->ts_start = stats->ts_start; - cur_stats->ts_stop = stats->ts_stop; + cur_stats->heap_size = stats->heap_size; cur_stats->collections += 1; cur_stats->collected += stats->collected; @@ -1413,6 +1413,7 @@ add_stats(GCState *gcstate, int gen, struct gc_generation_stats *stats) cur_stats->candidates += stats->candidates; cur_stats->duration += stats->duration; + cur_stats->ts_stop = stats->ts_stop; } /* This is the main function. Read this to understand how the @@ -1465,6 +1466,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) invoke_gc_callback(tstate, "start", generation, &stats); } + stats.heap_size = gcstate->heap_size; // ignore error: don't interrupt the GC if reading the clock fails (void)PyTime_PerfCounterRaw(&stats.ts_start); if (gcstate->debug & _PyGC_DEBUG_STATS) { From 704e94e036eb739c994c08c9605290229961e5f4 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 3 May 2026 17:01:43 +0500 Subject: [PATCH 3/6] Heap size should be added at end of the struct Co-authored-by: Dino Viehland Co-authored-by: Neil Schemenauer --- Include/internal/pycore_interp_structs.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 485e82b3312860..b84bbbe7b3597e 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -228,11 +228,11 @@ struct _gc_runtime_state { /* linked lists of container objects */ #ifndef Py_GIL_DISABLED struct gc_generation generations[NUM_GENERATIONS]; - PyGC_Head *generation0; #else struct gc_generation young; struct gc_generation old[2]; #endif + /* a permanent generation which won't be collected */ struct gc_generation permanent_generation; struct gc_stats *generation_stats; @@ -249,6 +249,11 @@ struct _gc_runtime_state { /* The number of live objects. */ Py_ssize_t heap_size; + /* dummy members to preserve other offsets */ + Py_ssize_t dummy1; /* was work_to_do */ + int dummy2; /* was visited_space */ + int dummy3; /* was phase */ + /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects tracked by the GC. @@ -274,6 +279,8 @@ struct _gc_runtime_state { /* Mutex held for gc_should_collect_mem_usage(). */ PyMutex mutex; +#else + PyGC_Head *generation0; #endif }; From 83953c58072cc7ab12d985c865d98ebe23cd8fbc Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 3 May 2026 17:04:00 +0500 Subject: [PATCH 4/6] Move heap_size to the end of the gc_generation_stats struct --- Include/internal/pycore_interp_structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index b84bbbe7b3597e..b9b9d22920f2b9 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -181,8 +181,6 @@ struct gc_generation { struct gc_generation_stats { PyTime_t ts_start; PyTime_t ts_stop; - /* heap_size on the start of the collection */ - Py_ssize_t heap_size; /* total number of collections */ Py_ssize_t collections; /* total number of collected objects */ @@ -193,6 +191,8 @@ struct gc_generation_stats { Py_ssize_t candidates; // Total duration of the collection in seconds: double duration; + /* heap_size on the start of the collection */ + Py_ssize_t heap_size; }; #ifdef Py_GIL_DISABLED From fd93679c4c66395d9f72f913507a8e91a188f337 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 3 May 2026 17:27:16 +0500 Subject: [PATCH 5/6] Add heap_size to get_gc_stats --- Lib/test/test_gc_stats.py | 2 +- Modules/_remote_debugging/gc_stats.c | 1 + Modules/_remote_debugging/module.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_gc_stats.py b/Lib/test/test_gc_stats.py index 59365ad45b32c9..bd75924397e76e 100644 --- a/Lib/test/test_gc_stats.py +++ b/Lib/test/test_gc_stats.py @@ -22,7 +22,7 @@ GC_STATS_FIELDS = ( "gen", "iid", "ts_start", "ts_stop", "collections", "collected", - "uncollectable", "candidates", "duration") + "uncollectable", "candidates", "heap_size", "duration") def get_interpreter_identifiers(gc_stats) -> tuple[int,...]: diff --git a/Modules/_remote_debugging/gc_stats.c b/Modules/_remote_debugging/gc_stats.c index 852dc866153192..d5d05edb8ecf5e 100644 --- a/Modules/_remote_debugging/gc_stats.c +++ b/Modules/_remote_debugging/gc_stats.c @@ -53,6 +53,7 @@ read_gc_stats(struct gc_stats *stats, int64_t iid, PyObject *result, SET_FIELD(PyLong_FromSsize_t, items->collected); SET_FIELD(PyLong_FromSsize_t, items->uncollectable); SET_FIELD(PyLong_FromSsize_t, items->candidates); + SET_FIELD(PyLong_FromSsize_t, items->heap_size); SET_FIELD(PyFloat_FromDouble, items->duration); diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index c840c59971c478..3c034fd025dd72 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -143,6 +143,7 @@ static PyStructSequence_Field GCStatsInfo_fields[] = { {"collected", "Total number of collected objects"}, {"uncollectable", "Total number of uncollectable objects"}, {"candidates", "Total objects considered and traversed"}, + {"heap_size", "Number of live objects"}, {"duration", "Total collection time, in seconds"}, {NULL} }; @@ -151,7 +152,7 @@ PyStructSequence_Desc GCStatsInfo_desc = { "_remote_debugging.GCStatsInfo", "Information about a garbage collector stats sample", GCStatsInfo_fields, - 9 + 10 }; /* ============================================================================ @@ -1225,6 +1226,7 @@ Returns a list of GCStatsInfo objects with GC statistics data. - collected: Total number of collected objects. - uncollectable: Total number of uncollectable objects. - candidates: Total objects considered and traversed. + - heap_size: number of live objects. - duration: Total collection time, in seconds. Raises: From 344f32c1324965f43038673d9a4a387ce07cb032 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 3 May 2026 17:31:16 +0500 Subject: [PATCH 6/6] Make AC happy --- Modules/_remote_debugging/clinic/module.c.h | 3 ++- Modules/_remote_debugging/module.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 179a7b97dd4e2f..1133db808efaec 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -601,6 +601,7 @@ PyDoc_STRVAR(_remote_debugging_GCMonitor_get_gc_stats__doc__, " - collected: Total number of collected objects.\n" " - uncollectable: Total number of uncollectable objects.\n" " - candidates: Total objects considered and traversed.\n" +" - heap_size: number of live objects.\n" " - duration: Total collection time, in seconds.\n" "\n" "Raises:\n" @@ -1563,4 +1564,4 @@ _remote_debugging_get_gc_stats(PyObject *module, PyObject *const *args, Py_ssize exit: return return_value; } -/*[clinic end generated code: output=1151e58683dab9f4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=36674f4cb8a653f3 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 3c034fd025dd72..c694e587e7cccb 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -1237,7 +1237,7 @@ Returns a list of GCStatsInfo objects with GC statistics data. static PyObject * _remote_debugging_GCMonitor_get_gc_stats_impl(GCMonitorObject *self, int all_interpreters) -/*[clinic end generated code: output=f73f365725224f7a input=09e647719c65f9e4]*/ +/*[clinic end generated code: output=f73f365725224f7a input=12f7c1a288cf2741]*/ { RemoteDebuggingState *st = RemoteDebugging_GetStateFromType(Py_TYPE(self)); return get_gc_stats(&self->offsets, all_interpreters, st->GCStatsInfo_Type);