diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h
index 286e7bf668312db..178825ab2904a1a 100644
--- a/Include/internal/mimalloc/mimalloc/types.h
+++ b/Include/internal/mimalloc/mimalloc/types.h
@@ -516,6 +516,12 @@ typedef struct mi_abandoned_pool_s {
   // in order to prevent resetting/decommitting segment memory if it might
   // still be read.
   mi_decl_cache_align _Atomic(size_t)           abandoned_readers; // = 0
+
+#if MI_FULL_PAGE_BYTES
+  // Bytes (block_size * capacity) of full pages currently abandoned to this
+  // pool.
+  mi_decl_cache_align _Atomic(intptr_t)         full_page_bytes; // = 0
+#endif
 } mi_abandoned_pool_t;
 
 
@@ -588,6 +594,11 @@ struct mi_heap_s {
   uint8_t               tag;                                 // custom identifier for this heap
   uint8_t               debug_offset;                        // number of bytes to preserve when filling freed or uninitialized memory
   bool                  page_use_qsbr;                       // should freeing pages be delayed using QSBR
+#if MI_FULL_PAGE_BYTES
+  // Bytes (block_size * capacity) of pages currently in MI_BIN_FULL state
+  // owned by this heap.
+  _Atomic(intptr_t)     full_page_bytes;
+#endif
 };
 
 
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index e105677cd2e674a..345f2f51519698e 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -337,6 +337,8 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
 #ifdef Py_GIL_DISABLED
 extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
                                            gcvisitobjects_t callback, void *arg);
+// Estimate of bytes allocated by mimalloc.
+PyAPI_FUNC(Py_ssize_t) _PyGC_GetHeapBytes(PyInterpreterState *interp);
 #endif
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index 86f018e328656e2..ae4126c6210aeb7 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -260,11 +260,11 @@ struct _gc_runtime_state {
     /* True if gc.freeze() has been used. */
     int freeze_active;
 
-    /* Memory usage of the process (RSS + swap) after last GC. */
-    Py_ssize_t last_mem;
+    /* Estimate of the number of bytes used by mimalloc after last GC. */
+    Py_ssize_t last_heap_bytes;
 
     /* This accumulates the new object count whenever collection is deferred
-       due to the RSS increase condition not being meet.  Reset on collection. */
+       due to memory usage not increasing enough.  Reset on collection. */
     Py_ssize_t deferred_count;
 
     /* Mutex held for gc_should_collect_mem_usage(). */
diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h
index d870d01beb702c0..733d37d1ffd53dc 100644
--- a/Include/internal/pycore_mimalloc.h
+++ b/Include/internal/pycore_mimalloc.h
@@ -36,6 +36,11 @@ typedef enum {
 #  define MI_TSAN 1
 #endif
 
+#ifdef Py_GIL_DISABLED
+// Track full-page byte totals on each mi_heap_t and mi_abandoned_pool_t.
+#  define MI_FULL_PAGE_BYTES 1
+#endif
+
 #ifdef __cplusplus
 extern "C++" {
 #endif
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 88d265cbc21709d..0469476e58e931f 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -1271,8 +1271,58 @@ def test():
         assert_python_ok("-c", code_inside_function)
 
 
-    @unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
-    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+
+@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
+@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
+class FreeThreadingTests(unittest.TestCase):
+    # Tests that are specific to the free-threading GC.
+
+    def test_gc_heap_bytes_large_allocs(self):
+        # The free-threaded GC threshold uses _PyGC_GetHeapBytes(), which
+        # sums mimalloc's full_page_bytes counters.  Large/huge pages
+        # (>MI_MEDIUM_OBJ_SIZE_MAX, MI_BIN_HUGE) get eagerly promoted to
+        # MI_BIN_FULL by `_mi_malloc_generic` -- without that, mimalloc
+        # would never count these pages, and a cycle holding a large
+        # buffer would not register as memory pressure.
+        gc.collect()
+        baseline = _testinternalcapi.get_gc_heap_bytes()
+        size = 1 << 20  # 1 MiB
+        k = 5
+        data = [bytearray(size) for _ in range(k)]
+        after_alloc = _testinternalcapi.get_gc_heap_bytes()
+        # All k pages should be counted.  Page size rounds up the request,
+        # so the increase should be at least k * size.
+        self.assertGreaterEqual(after_alloc - baseline, k * size)
+        del data
+        gc.collect()
+        after_free = _testinternalcapi.get_gc_heap_bytes()
+        # Freeing the lone block in each huge page un-fulls it.  Allow some
+        # slop for unrelated allocations triggered by gc.collect().
+        self.assertLess(abs(after_free - baseline), size)
+
+    def test_gc_heap_bytes_many_small_allocs(self):
+        # Filling small pages should also bump the counter.  Small/medium
+        # transitions are lazy (only when a page actually becomes full), so
+        # use enough allocations to fill many pages.
+        gc.collect()
+        baseline = _testinternalcapi.get_gc_heap_bytes()
+        n = 100_000
+        objs = [bytes(4) for i in range(n)]
+        after_alloc = _testinternalcapi.get_gc_heap_bytes()
+        print('small after alloc', baseline, after_alloc)
+        self.assertGreater(after_alloc - baseline, 1 << 20)
+        del objs
+        gc.collect()
+        after_free = _testinternalcapi.get_gc_heap_bytes()
+        print('small after free', baseline, after_free)
+        # Should drop substantially once the pages empty out.
+        self.assertLess(after_free - baseline, (after_alloc - baseline) // 2)
+
+    def test_gc_heap_bytes_nonneg(self):
+        # Counter is intptr_t and only increases or decreases via paired
+        # hooks; it must never go negative.
+        self.assertGreaterEqual(_testinternalcapi.get_gc_heap_bytes(), 0)
+
     def test_tuple_untrack_counts(self):
         # This ensures that the free-threaded GC is counting untracked tuples
         # in the "long_lived_total" count.  This is required to avoid
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-23-18-39-41.gh-issue-148937.yp--1l.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-23-18-39-41.gh-issue-148937.yp--1l.rst
new file mode 100644
index 000000000000000..523792372bc8e5b
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-23-18-39-41.gh-issue-148937.yp--1l.rst
@@ -0,0 +1,3 @@
+Fix a bug in the free-threaded GC that caused collections to be deferred too
+long.  This would result in excess memory usage since cyclic trash was not
+freed quickly enough.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index a07675bb66d8cc8..cc0854fd5c87e6a 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2642,6 +2642,12 @@ get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
     return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
 }
 
+static PyObject *
+get_gc_heap_bytes(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromSsize_t(_PyGC_GetHeapBytes(PyInterpreterState_Get()));
+}
+
 #endif
 
 static PyObject *
@@ -3007,6 +3013,7 @@ static PyMethodDef module_functions[] = {
     {"get_tlbc", get_tlbc, METH_O, NULL},
     {"get_tlbc_id", get_tlbc_id, METH_O, NULL},
     {"get_long_lived_total", get_long_lived_total, METH_NOARGS},
+    {"get_gc_heap_bytes", get_gc_heap_bytes, METH_NOARGS},
 #endif
 #ifdef _Py_TIER2
     {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},
diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c
index 5fbfb82baa02040..c4ac30cde26f1b4 100644
--- a/Objects/mimalloc/heap.c
+++ b/Objects/mimalloc/heap.c
@@ -270,6 +270,11 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
   _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
   heap->thread_delayed_free = NULL;
   heap->page_count = 0;
+#if MI_FULL_PAGE_BYTES
+  // All pages have been removed (destroyed, or transferred via
+  // mi_heap_absorb which already moved the bytes to the destination heap).
+  mi_atomic_store_relaxed(&heap->full_page_bytes, (intptr_t)0);
+#endif
 }
 
 // called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
@@ -427,6 +432,14 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   }
   mi_assert_internal(from->page_count == 0);
 
+#if MI_FULL_PAGE_BYTES
+  // The page-state hooks didn't fire for these transfers, so move the
+  // full_page_bytes accounting in bulk.  mi_heap_reset_pages(from) below
+  // will zero `from->full_page_bytes`.
+  intptr_t bytes = mi_atomic_load_relaxed(&from->full_page_bytes);
+  mi_atomic_addi(&heap->full_page_bytes, bytes);
+#endif
+
   // and do outstanding delayed frees in the `from` heap
   // note: be careful here as the `heap` field in all those pages no longer point to `from`,
   // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
diff --git a/Objects/mimalloc/init.c b/Objects/mimalloc/init.c
index 81b241063ff40fc..2ab0cb414347df2 100644
--- a/Objects/mimalloc/init.c
+++ b/Objects/mimalloc/init.c
@@ -103,7 +103,11 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   NULL,             // next
   false,
   0,
-  0
+  0,
+  0,
+#if MI_FULL_PAGE_BYTES
+  MI_ATOMIC_VAR_INIT(0),  // full_page_bytes
+#endif
 };
 
 #define tld_empty_stats  ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
diff --git a/Objects/mimalloc/page-queue.c b/Objects/mimalloc/page-queue.c
index cb54b3740196e97..d343f9fab196675 100644
--- a/Objects/mimalloc/page-queue.c
+++ b/Objects/mimalloc/page-queue.c
@@ -151,7 +151,7 @@ static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t*
   uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
   mi_assert_internal(bin <= MI_BIN_FULL);
   mi_page_queue_t* pq = &heap->pages[bin];
-  mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
+  mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
   return pq;
 }
 
@@ -264,7 +264,9 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
                      (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
                      (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
                      (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
-                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
+                     (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)) ||
+                     (mi_page_queue_is_huge(from) && mi_page_queue_is_full(to)) ||
+                     (mi_page_queue_is_full(from) && mi_page_queue_is_huge(to)));
 
   mi_heap_t* heap = mi_page_heap(page);
   if (page->prev != NULL) page->prev->next = page->next;
diff --git a/Objects/mimalloc/page.c b/Objects/mimalloc/page.c
index ded59f8eb1ccaac..f8891d375eee085 100644
--- a/Objects/mimalloc/page.c
+++ b/Objects/mimalloc/page.c
@@ -255,6 +255,78 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
   mi_assert_internal(!force || page->local_free == NULL);
 }
 
+/* -----------------------------------------------------------
+  Full-page byte accounting (MI_FULL_PAGE_BYTES)
+
+  Maintain `mi_heap_t.full_page_bytes` (bytes of MI_BIN_FULL pages owned by
+  the heap) and `mi_abandoned_pool_t.full_page_bytes` (bytes of MI_BIN_FULL
+  pages currently abandoned to that pool).  Page weight is
+  `mi_page_block_size(page) * page->capacity`.  Capacity is stable while a
+  page is in the full queue (`mi_page_extend_free` only runs on non-full
+  queues), so inc and dec see the same value.
+
+  State machine:
+    to-full        : heap += size
+    from-full      : heap -= size
+    abandon a full : heap -= size; pool += size
+    reclaim a full : pool -= size; heap += size
+    free a full    : heap -= size
+
+  The in_full bit is unconditionally cleared by `mi_page_queue_remove`, so
+  `_mi_page_abandon` re-sets it after queue_remove to preserve the "this
+  page's bytes were transferred to the pool" marker through abandonment.
+  `_mi_page_reclaim` then routes such pages straight to MI_BIN_FULL, so
+  `mi_page_queue_push` keeps the bit set; subsequent unfull/free fires the
+  matching dec.
+
+  Large/huge pages (block_size > MI_MEDIUM_OBJ_SIZE_MAX) are 1-block pages
+  in MI_BIN_HUGE; mimalloc never walks that queue on a subsequent alloc, so
+  it would never call `mi_page_to_full` on them.  `_mi_malloc_generic`
+  therefore eagerly calls `mi_page_to_full` on a freshly-filled huge page
+  (see the MI_FULL_PAGE_BYTES block at the bottom of that function).
+  Inc/dec then proceed identically to small/medium pages.
+
+  Known minor leak: if a page abandoned-while-full later becomes empty and
+  then freed, the +size we added on abandon is never subtracted.
+----------------------------------------------------------- */
+
+#if MI_FULL_PAGE_BYTES
+static inline intptr_t mi_page_full_size(mi_page_t* page) {
+  return (intptr_t)(mi_page_block_size(page) * (size_t)page->capacity);
+}
+
+static void mi_page_full_inc(mi_page_t* page) {
+  mi_atomic_addi(&mi_page_heap(page)->full_page_bytes, mi_page_full_size(page));
+}
+
+static void mi_page_full_dec(mi_page_t* page) {
+  mi_atomic_addi(&mi_page_heap(page)->full_page_bytes, -mi_page_full_size(page));
+}
+
+// Called from `_mi_page_abandon` *before* the page's heap pointer is cleared.
+// Transfers the page's bytes from its heap to the pool that will own the
+// abandoned page.  No-op if the page is not currently in MI_BIN_FULL.
+static void mi_page_full_abandon(mi_page_t* page) {
+  if (!mi_page_is_in_full(page)) return;
+  intptr_t bytes = mi_page_full_size(page);
+  mi_heap_t* heap = mi_page_heap(page);
+  mi_atomic_addi(&heap->full_page_bytes, -bytes);
+  mi_atomic_addi(&heap->tld->segments.abandoned->full_page_bytes, bytes);
+}
+
+// Called from `_mi_page_reclaim` when a page abandoned-while-full is
+// returning to a heap.  in_full=true here means "this page's bytes are
+// currently in the pool counter from abandon".  Transfer them: pool -= size,
+// new-heap += size.  The caller routes the page directly into MI_BIN_FULL,
+// so the in_full bit (and matching dec hook on free/unfull) survives.
+static void mi_page_full_reclaim(mi_page_t* page) {
+  if (!mi_page_is_in_full(page)) return;
+  intptr_t bytes = mi_page_full_size(page);
+  mi_heap_t* heap = mi_page_heap(page);
+  mi_atomic_addi(&heap->tld->segments.abandoned->full_page_bytes, -bytes);
+  mi_atomic_addi(&heap->full_page_bytes, bytes);
+}
+#endif // MI_FULL_PAGE_BYTES
 
 
 /* -----------------------------------------------------------
@@ -271,8 +343,24 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
   mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
   #endif
 
-  // TODO: push on full queue immediately if it is full?
-  mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
+  mi_page_queue_t* pq;
+#if MI_FULL_PAGE_BYTES
+  // If the page was abandoned full (in_full preserved as marker), route
+  // it directly to MI_BIN_FULL.  Pushing to the size-bucket queue would
+  // rely on a later alloc walking that queue to promote it via
+  // mi_page_to_full -- which happens for small/medium bins but never for
+  // MI_BIN_HUGE, so a reclaimed full huge page would otherwise leave the
+  // pool counter without re-crediting any heap.  mi_page_full_reclaim
+  // does the pool-to-heap transfer.
+  if (mi_page_is_in_full(page)) {
+    pq = &heap->pages[MI_BIN_FULL];
+  } else {
+    pq = mi_page_queue(heap, mi_page_block_size(page));
+  }
+  mi_page_full_reclaim(page);
+#else
+  pq = mi_page_queue(heap, mi_page_block_size(page));
+#endif
   mi_page_queue_push(heap, pq, page);
   _PyMem_mi_page_reclaimed(page);
   mi_assert_expensive(_mi_page_is_valid(page));
@@ -360,6 +448,10 @@ void _mi_page_unfull(mi_page_t* page) {
   mi_assert_internal(mi_page_is_in_full(page));
   if (!mi_page_is_in_full(page)) return;
 
+#if MI_FULL_PAGE_BYTES
+  mi_page_full_dec(page);
+#endif
+
   mi_heap_t* heap = mi_page_heap(page);
   mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
   mi_page_set_in_full(page, false); // to get the right queue
@@ -374,6 +466,9 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(!mi_page_is_in_full(page));
 
   if (mi_page_is_in_full(page)) return;
+#if MI_FULL_PAGE_BYTES
+  mi_page_full_inc(page);
+#endif
   mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
   _mi_page_free_collect(page,false);  // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
 }
@@ -391,6 +486,13 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
 
   mi_heap_t* pheap = mi_page_heap(page);
 
+#if MI_FULL_PAGE_BYTES
+  // Capture in_full while the heap pointer is still valid; transfer the
+  // bytes from heap counter to pool counter.  Must run before
+  // mi_page_queue_remove, which clears the in_full bit unconditionally.
+  bool was_in_full = mi_page_is_in_full(page);
+  mi_page_full_abandon(page);
+#endif
 #ifdef Py_GIL_DISABLED
   if (page->qsbr_node.next != NULL) {
     // remove from QSBR queue, but keep the goal
@@ -406,6 +508,15 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
   mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
   mi_page_set_heap(page, NULL);
 
+#if MI_FULL_PAGE_BYTES
+  // Preserve the in_full marker through abandonment so `_mi_page_reclaim`'s
+  // `mi_page_full_reclaim` call can transfer the bytes back to the
+  // reclaiming heap.  Nothing reads in_full on a heap-less page.
+  if (was_in_full) {
+    mi_page_set_in_full(page, true);
+  }
+#endif
+
 #if (MI_DEBUG>1) && !MI_TRACK_ENABLED
   // check there are no references left..
   for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
@@ -436,6 +547,17 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
   mi_assert_internal(page->qsbr_goal == 0);
   mi_assert_internal(page->qsbr_node.next == NULL);
 #endif
+#if MI_FULL_PAGE_BYTES
+  // A full page whose last block is freed locally goes through
+  // _mi_page_retire -> _PyMem_mi_page_maybe_free -> _mi_page_free without
+  // ever calling _mi_page_unfull, so the heap's full_page_bytes counter
+  // must be decremented here to maintain the invariant.  `heap` is non-NULL
+  // for any page reaching _mi_page_free (abandoned pages take the
+  // segment-level cleanup path instead).
+  if (mi_page_is_in_full(page)) {
+    mi_page_full_dec(page);
+  }
+#endif
 
   // remove from the page list
   // (no need to do _mi_heap_delayed_free first as all blocks are already free)
@@ -963,14 +1085,28 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
   mi_assert_internal(mi_page_block_size(page) >= size);
 
   // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
+  void* p;
   if mi_unlikely(zero && page->xblock_size == 0) {
     // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
-    void* p = _mi_page_malloc(heap, page, size, false);
+    p = _mi_page_malloc(heap, page, size, false);
     mi_assert_internal(p != NULL);
     _mi_memzero_aligned(p, mi_page_usable_block_size(page));
-    return p;
   }
   else {
-    return _mi_page_malloc(heap, page, size, zero);
+    p = _mi_page_malloc(heap, page, size, zero);
+  }
+
+#if MI_FULL_PAGE_BYTES
+  // Eagerly promote a freshly-filled huge page (1 block per page, in
+  // MI_BIN_HUGE) to MI_BIN_FULL so its bytes get counted.  See the
+  // "Full-page byte accounting" comment block above.
+  if (p != NULL && !mi_page_immediate_available(page)) {
+    mi_page_queue_t* page_pq = mi_page_queue_of(page);
+    if (mi_page_queue_is_huge(page_pq) && !mi_page_is_in_full(page)) {
+      mi_page_to_full(page, page_pq);
+    }
   }
+#endif
+
+  return p;
 }
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index b4fcd365592aa53..d4a751821987fb2 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,30 +17,15 @@
 
 #include "pydtrace.h"
 
-// Platform-specific includes for get_process_mem_usage().
-#ifdef _WIN32
-    #include <windows.h>
-    #include <psapi.h> // For GetProcessMemoryInfo
-#elif defined(__linux__)
-    #include <unistd.h> // For sysconf, getpid
-#elif defined(__APPLE__)
-    #include <mach/mach.h>
-    #include <mach/task.h> // Required for TASK_VM_INFO
-    #include <unistd.h> // For sysconf, getpid
-#elif defined(__FreeBSD__)
-    #include <sys/types.h>
-    #include <sys/sysctl.h>
-    #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
-    #include <kvm.h>
-    #include <unistd.h> // For sysconf, getpid
-    #include <fcntl.h> // For O_RDONLY
-    #include <limits.h> // For _POSIX2_LINE_MAX
-#elif defined(__OpenBSD__)
-    #include <sys/types.h>
-    #include <sys/sysctl.h>
-    #include <sys/user.h> // For kinfo_proc
-    #include <unistd.h> // For sysconf, getpid
-#endif
+// Minimum growth in mimalloc heap bytes (estimated from full pages) since the
+// last GC.
+#define GC_HEAP_BYTES_MIN_DELTA (512 * 1024)
+
+// Maximum number of "young" objects before we stop deferring collection due
+// to heap not growing enough.  With the default threshold, this is (40*2000)
+// net new objects.  This is set to 40x because older versions of Python would
+// do full collections after roughly every 70,000 new container objects.
+#define GC_MAX_DEFER_FACTOR 40
 
 // enable the "mark alive" pass of GC
 #define GC_ENABLE_MARK_ALIVE 1
@@ -2016,176 +2001,70 @@ cleanup_worklist(struct worklist *worklist)
     }
 }
 
-// Return the memory usage (typically RSS + swap) of the process, in units of
-// KB.  Returns -1 if this operation is not supported or on failure.
-static Py_ssize_t
-get_process_mem_usage(void)
-{
-#ifdef _WIN32
-    // Windows implementation using GetProcessMemoryInfo
-    // Returns WorkingSetSize + PagefileUsage
-    PROCESS_MEMORY_COUNTERS pmc;
-    HANDLE hProcess = GetCurrentProcess();
-    if (NULL == hProcess) {
-        // Should not happen for the current process
-        return -1;
-    }
-
-    // GetProcessMemoryInfo returns non-zero on success
-    if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
-        // Values are in bytes, convert to KB.
-        return (Py_ssize_t)((pmc.WorkingSetSize + pmc.PagefileUsage) / 1024);
-    }
-    else {
-        return -1;
-    }
-
-#elif __linux__
-    FILE* fp = fopen("/proc/self/status", "r");
-    if (fp == NULL) {
-        return -1;
-    }
-
-    char line_buffer[256];
-    long long rss_kb = -1;
-    long long swap_kb = -1;
-
-    while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) {
-        if (rss_kb == -1 && strncmp(line_buffer, "VmRSS:", 6) == 0) {
-            sscanf(line_buffer + 6, "%lld", &rss_kb);
-        }
-        else if (swap_kb == -1 && strncmp(line_buffer, "VmSwap:", 7) == 0) {
-            sscanf(line_buffer + 7, "%lld", &swap_kb);
+// Return an estimate, in bytes, of how much memory is being used.
+Py_ssize_t
+_PyGC_GetHeapBytes(PyInterpreterState *interp)
+{
+    // Computed from mimalloc full-page byte counters: each mi_heap_t and
+    // mi_abandoned_pool_t carries a `full_page_bytes` field.
+    // Sum:
+    //   - per-tstate heaps for this interpreter (live full pages)
+    //   - the interpreter's abandoned pool (full pages between abandon and reclaim)
+    //   - _mi_heap_main (default heap on the main thread, used pre-tstate and
+    //     for non-Python threads)
+    //   - _mi_abandoned_default (full pages abandoned from default heaps)
+    // Per-thread auto-default heaps used by non-Python threads are not
+    // enumerated; their bytes show up in _mi_abandoned_default once the OS
+    // thread exits. This should be acceptable because almost all Python
+    // allocation is done by tstate-bound heaps.
+    intptr_t total = _Py_atomic_load_intptr_relaxed(
+        (intptr_t *)&interp->mimalloc.abandoned_pool.full_page_bytes);
+    total += _Py_atomic_load_intptr_relaxed(
+        (intptr_t *)&_mi_abandoned_default.full_page_bytes);
+    total += _Py_atomic_load_intptr_relaxed(
+        (intptr_t *)&_mi_heap_main_get()->full_page_bytes);
+    HEAD_LOCK(&_PyRuntime);
+    _Py_FOR_EACH_TSTATE_UNLOCKED(interp, p) {
+        _PyThreadStateImpl *t = (_PyThreadStateImpl *)p;
+        if (!_Py_atomic_load_int(&t->mimalloc.initialized)) {
+            continue;
         }
-        if (rss_kb != -1 && swap_kb != -1) {
-            break; // Found both
+        for (int h = 0; h < _Py_MIMALLOC_HEAP_COUNT; h++) {
+            total += _Py_atomic_load_intptr_relaxed(
+                (intptr_t *)&t->mimalloc.heaps[h].full_page_bytes);
         }
     }
-    fclose(fp);
-
-    if (rss_kb != -1 && swap_kb != -1) {
-        return (Py_ssize_t)(rss_kb + swap_kb);
-    }
-    return -1;
-
-#elif defined(__APPLE__)
-    // --- MacOS (Darwin) ---
-    // Returns phys_footprint (RAM + compressed memory)
-    task_vm_info_data_t vm_info;
-    mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
-    kern_return_t kerr;
-
-    kerr = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count);
-    if (kerr != KERN_SUCCESS) {
-        return -1;
-    }
-    // phys_footprint is in bytes. Convert to KB.
-    return (Py_ssize_t)(vm_info.phys_footprint / 1024);
-
-#elif defined(__FreeBSD__)
-    // NOTE: Returns RSS only. Per-process swap usage isn't readily available
-    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
-    if (page_size_kb <= 0) {
-        return -1;
-    }
-
-    // Using /dev/null for vmcore avoids needing dump file.
-    // NULL for kernel file uses running kernel.
-    char errbuf[_POSIX2_LINE_MAX]; // For kvm error messages
-    kvm_t *kd = kvm_openfiles(NULL, "/dev/null", NULL, O_RDONLY, errbuf);
-    if (kd == NULL) {
-        return -1;
-    }
-
-    // KERN_PROC_PID filters for the specific process ID
-    // n_procs will contain the number of processes returned (should be 1 or 0)
-    pid_t pid = getpid();
-    int n_procs;
-    struct kinfo_proc *kp = kvm_getprocs(kd, KERN_PROC_PID, pid, &n_procs);
-    if (kp == NULL) {
-        kvm_close(kd);
-        return -1;
-    }
-
-    Py_ssize_t rss_kb = -1;
-    if (n_procs > 0) {
-        // kp[0] contains the info for our process
-        // ki_rssize is in pages. Convert to KB.
-        rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
-    }
-    else {
-        // Process with PID not found, shouldn't happen for self.
-        rss_kb = -1;
-    }
-
-    kvm_close(kd);
-    return rss_kb;
-
-#elif defined(__OpenBSD__)
-    // NOTE: Returns RSS only. Per-process swap usage isn't readily available
-    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
-    if (page_size_kb <= 0) {
-        return -1;
-    }
-
-    struct kinfo_proc kp;
-    pid_t pid = getpid();
-    int mib[6];
-    size_t len = sizeof(kp);
-
-    mib[0] = CTL_KERN;
-    mib[1] = KERN_PROC;
-    mib[2] = KERN_PROC_PID;
-    mib[3] = pid;
-    mib[4] = sizeof(struct kinfo_proc); // size of the structure we want
-    mib[5] = 1;                         // want 1 structure back
-    if (sysctl(mib, 6, &kp, &len, NULL, 0) == -1) {
-         return -1;
-    }
-
-    if (len > 0) {
-        // p_vm_rssize is in pages on OpenBSD. Convert to KB.
-        return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
-    }
-    else {
-        // Process info not returned
-        return -1;
-    }
-#else
-    // Unsupported platform
-    return -1;
-#endif
+    HEAD_UNLOCK(&_PyRuntime);
+    return (Py_ssize_t)total;
 }
 
+// Decide whether memory usage has grown enough to warrant a collection.
 static bool
-gc_should_collect_mem_usage(GCState *gcstate)
+gc_should_collect_mem_usage(PyThreadState *tstate)
 {
-    Py_ssize_t mem = get_process_mem_usage();
-    if (mem < 0) {
-        // Reading process memory usage is not support or failed.
-        return true;
-    }
+    PyInterpreterState *interp = tstate->interp;
+    GCState *gcstate = &interp->gc;
     int threshold = gcstate->young.threshold;
     Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
-    if (deferred > threshold * 40) {
-        // Too many new container objects since last GC, even though memory use
-        // might not have increased much.  This is intended to avoid resource
-        // exhaustion if some objects consume resources but don't result in a
-        // memory usage increase.  We use 40x as the factor here because older
-        // versions of Python would do full collections after roughly every
-        // 70,000 new container objects.
+    if (deferred > threshold * GC_MAX_DEFER_FACTOR) {
+        // Too many new container objects since last GC, even though memory
+        // use might not have increased much.  This avoids resource exhaustion
+        // if some objects consume resources but don't result in a memory
+        // usage increase.
         return true;
     }
-    Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem);
-    Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128);
-    if ((mem - last_mem) > mem_threshold) {
-        // The process memory usage has increased too much, do a collection.
+    Py_ssize_t cur = _PyGC_GetHeapBytes(interp);
+    Py_ssize_t last = _Py_atomic_load_ssize_relaxed(&gcstate->last_heap_bytes);
+    // Require 20% increase in full mimalloc pages.
+    Py_ssize_t delta = Py_MAX(last / 5, GC_HEAP_BYTES_MIN_DELTA);
+    if ((cur - last) > delta) {
+        // Heap has grown enough, collect.
         return true;
     }
     else {
-        // The memory usage has not increased enough, defer the collection and
-        // clear the young object count so we don't check memory usage again
-        // on the next call to gc_should_collect().
+        // Memory usage has not grown enough.  Defer the collection, rolling the
+        // young count into deferred_count so we don't keep checking on every
+        // call to gc_should_collect().
         PyMutex_Lock(&gcstate->mutex);
         int young_count = _Py_atomic_exchange_int(&gcstate->young.count, 0);
         _Py_atomic_store_ssize_relaxed(&gcstate->deferred_count,
@@ -2196,8 +2075,9 @@ gc_should_collect_mem_usage(GCState *gcstate)
 }
 
 static bool
-gc_should_collect(GCState *gcstate)
+gc_should_collect(PyThreadState *tstate)
 {
+    GCState *gcstate = &tstate->interp->gc;
     int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
     int threshold = gcstate->young.threshold;
     int gc_enabled = _Py_atomic_load_int_relaxed(&gcstate->enabled);
@@ -2214,7 +2094,7 @@ gc_should_collect(GCState *gcstate)
         // objects.
         return false;
     }
-    return gc_should_collect_mem_usage(gcstate);
+    return gc_should_collect_mem_usage(tstate);
 }
 
 static void
@@ -2231,7 +2111,7 @@ record_allocation(PyThreadState *tstate)
         _Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
         gc->alloc_count = 0;
 
-        if (gc_should_collect(gcstate) &&
+        if (gc_should_collect(tstate) &&
             !_Py_atomic_load_int_relaxed(&gcstate->collecting))
         {
             _Py_ScheduleGC(tstate);
@@ -2379,10 +2259,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     // to be freed.
     delete_garbage(state);
 
-    // Store the current memory usage, can be smaller now if breaking cycles
-    // freed some memory.
-    Py_ssize_t last_mem = get_process_mem_usage();
-    _Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem);
+    // Record the current heap bytes estimate as new baseline.
+    Py_ssize_t last_heap_bytes = _PyGC_GetHeapBytes(interp);
+    _Py_atomic_store_ssize_relaxed(&state->gcstate->last_heap_bytes, last_heap_bytes);
 
     // Append objects with legacy finalizers to the "gc.garbage" list.
     handle_legacy_finalizers(state);
@@ -2423,7 +2302,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
         return 0;
     }
 
-    if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) {
+    if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(tstate)) {
         // Don't collect if the threshold is not exceeded.
         _Py_atomic_store_int(&gcstate->collecting, 0);
         return 0;