gh-128923: Use zero to indicate unassigned unique id (#128925)

In the free threading build, the per thread reference counting uses a
unique id for some objects to index into the local reference count
table. Use 0 instead of -1 to indicate that the id is not assigned. This
avoids bugs where zero-initialized heap type objects look like they have
a unique id assigned.
This commit is contained in:
Sam Gross 2025-01-17 10:42:27 -05:00 committed by GitHub
parent 767c89ba7c
commit d66c08aa75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 110 additions and 33 deletions

View File

@ -347,8 +347,7 @@ PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *);
static inline Py_ssize_t static inline Py_ssize_t
_PyDict_UniqueId(PyDictObject *mp) _PyDict_UniqueId(PyDictObject *mp)
{ {
// Offset by one so that _ma_watcher_tag=0 represents an unassigned id return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT);
return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1;
} }
static inline void static inline void

View File

@ -336,20 +336,20 @@ _Py_THREAD_INCREF_OBJECT(PyObject *obj, Py_ssize_t unique_id)
{ {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
// Unsigned comparison so that `unique_id=-1`, which indicates that // The table index is `unique_id - 1` because 0 is not a valid unique id.
// per-thread refcounting has been disabled on this object, is handled by // Unsigned comparison so that `idx=-1` is handled by the "else".
// the "else". size_t idx = (size_t)(unique_id - 1);
if ((size_t)unique_id < (size_t)tstate->refcounts.size) { if (idx < (size_t)tstate->refcounts.size) {
# ifdef Py_REF_DEBUG # ifdef Py_REF_DEBUG
_Py_INCREF_IncRefTotal(); _Py_INCREF_IncRefTotal();
# endif # endif
_Py_INCREF_STAT_INC(); _Py_INCREF_STAT_INC();
tstate->refcounts.values[unique_id]++; tstate->refcounts.values[idx]++;
} }
else { else {
// The slow path resizes the per-thread refcount array if necessary. // The slow path resizes the per-thread refcount array if necessary.
// It handles the unique_id=-1 case to keep the inlinable function smaller. // It handles the unique_id=0 case to keep the inlinable function smaller.
_PyObject_ThreadIncrefSlow(obj, unique_id); _PyObject_ThreadIncrefSlow(obj, idx);
} }
} }
@ -386,15 +386,15 @@ _Py_THREAD_DECREF_OBJECT(PyObject *obj, Py_ssize_t unique_id)
{ {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
// Unsigned comparison so that `unique_id=-1`, which indicates that // The table index is `unique_id - 1` because 0 is not a valid unique id.
// per-thread refcounting has been disabled on this object, is handled by // Unsigned comparison so that `idx=-1` is handled by the "else".
// the "else". size_t idx = (size_t)(unique_id - 1);
if ((size_t)unique_id < (size_t)tstate->refcounts.size) { if (idx < (size_t)tstate->refcounts.size) {
# ifdef Py_REF_DEBUG # ifdef Py_REF_DEBUG
_Py_DECREF_DecRefTotal(); _Py_DECREF_DecRefTotal();
# endif # endif
_Py_DECREF_STAT_INC(); _Py_DECREF_STAT_INC();
tstate->refcounts.values[unique_id]--; tstate->refcounts.values[idx]--;
} }
else { else {
// Directly decref the object if the id is not assigned or if // Directly decref the object if the id is not assigned or if

View File

@ -16,7 +16,7 @@ extern "C" {
// Per-thread reference counting is used along with deferred reference // Per-thread reference counting is used along with deferred reference
// counting to avoid scaling bottlenecks due to reference count contention. // counting to avoid scaling bottlenecks due to reference count contention.
// //
// An id of -1 is used to indicate that an object doesn't use per-thread // An id of 0 is used to indicate that an object doesn't use per-thread
// refcounting. This value is used when the object is finalized by the GC // refcounting. This value is used when the object is finalized by the GC
// and during interpreter shutdown to allow the object to be // and during interpreter shutdown to allow the object to be
// deallocated promptly when the object's refcount reaches zero. // deallocated promptly when the object's refcount reaches zero.
@ -45,6 +45,8 @@ struct _Py_unique_id_pool {
Py_ssize_t size; Py_ssize_t size;
}; };
#define _Py_INVALID_UNIQUE_ID 0
// Assigns the next id from the pool of ids. // Assigns the next id from the pool of ids.
extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj); extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj);
@ -65,7 +67,7 @@ extern void _PyObject_FinalizePerThreadRefcounts(_PyThreadStateImpl *tstate);
extern void _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp); extern void _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp);
// Increfs the object, resizing the thread-local refcount array if necessary. // Increfs the object, resizing the thread-local refcount array if necessary.
PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id); PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx);
#endif /* Py_GIL_DISABLED */ #endif /* Py_GIL_DISABLED */

View File

@ -67,3 +67,10 @@ class FreezeThis(metaclass=Meta):
Base.value = 3 Base.value = 3
type_freeze(FreezeThis) type_freeze(FreezeThis)
self.assertEqual(FreezeThis.value, 2) self.assertEqual(FreezeThis.value, 2)
def test_manual_heap_type(self):
# gh-128923: test that a manually allocated and initailized heap type
# works correctly
ManualHeapType = _testcapi.ManualHeapType
for i in range(100):
self.assertIsInstance(ManualHeapType(), ManualHeapType)

View File

@ -4149,6 +4149,61 @@ static PyTypeObject ContainerNoGC_type = {
.tp_new = ContainerNoGC_new, .tp_new = ContainerNoGC_new,
}; };
/* Manually allocated heap type */
typedef struct {
PyObject_HEAD
PyObject *dict;
} ManualHeapType;
static int
ManualHeapType_traverse(PyObject *self, visitproc visit, void *arg)
{
ManualHeapType *mht = (ManualHeapType *)self;
Py_VISIT(mht->dict);
return 0;
}
static void
ManualHeapType_dealloc(PyObject *self)
{
ManualHeapType *mht = (ManualHeapType *)self;
PyObject_GC_UnTrack(self);
Py_XDECREF(mht->dict);
PyTypeObject *type = Py_TYPE(self);
Py_TYPE(self)->tp_free(self);
Py_DECREF(type);
}
static PyObject *
create_manual_heap_type(void)
{
// gh-128923: Ensure that a heap type allocated through PyType_Type.tp_alloc
// with minimal initialization works correctly.
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)PyType_Type.tp_alloc(&PyType_Type, 0);
if (heap_type == NULL) {
return NULL;
}
PyTypeObject* type = &heap_type->ht_type;
type->tp_basicsize = sizeof(ManualHeapType);
type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | Py_TPFLAGS_HAVE_GC;
type->tp_new = PyType_GenericNew;
type->tp_name = "ManualHeapType";
type->tp_dictoffset = offsetof(ManualHeapType, dict);
type->tp_traverse = ManualHeapType_traverse;
type->tp_dealloc = ManualHeapType_dealloc;
heap_type->ht_name = PyUnicode_FromString(type->tp_name);
if (!heap_type->ht_name) {
Py_DECREF(type);
return NULL;
}
heap_type->ht_qualname = Py_NewRef(heap_type->ht_name);
if (PyType_Ready(type) < 0) {
Py_DECREF(type);
return NULL;
}
return (PyObject *)type;
}
static struct PyModuleDef _testcapimodule = { static struct PyModuleDef _testcapimodule = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
@ -4283,6 +4338,15 @@ PyInit__testcapi(void)
(PyObject *) &ContainerNoGC_type) < 0) (PyObject *) &ContainerNoGC_type) < 0)
return NULL; return NULL;
PyObject *manual_heap_type = create_manual_heap_type();
if (manual_heap_type == NULL) {
return NULL;
}
if (PyModule_Add(m, "ManualHeapType", manual_heap_type) < 0) {
return NULL;
}
/* Include tests from the _testcapi/ directory */ /* Include tests from the _testcapi/ directory */
if (_PyTestCapi_Init_Vectorcall(m) < 0) { if (_PyTestCapi_Init_Vectorcall(m) < 0) {
return NULL; return NULL;

View File

@ -1911,7 +1911,7 @@ code_dealloc(PyObject *self)
Py_XDECREF(co->co_linetable); Py_XDECREF(co->co_linetable);
Py_XDECREF(co->co_exceptiontable); Py_XDECREF(co->co_exceptiontable);
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
assert(co->_co_unique_id == -1); assert(co->_co_unique_id == _Py_INVALID_UNIQUE_ID);
#endif #endif
if (co->_co_cached != NULL) { if (co->_co_cached != NULL) {
Py_XDECREF(co->_co_cached->_co_code); Py_XDECREF(co->_co_cached->_co_code);

View File

@ -1659,6 +1659,9 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
assert(PyDict_Check(op)); assert(PyDict_Check(op));
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
Py_ssize_t id = _PyObject_AssignUniqueId(op); Py_ssize_t id = _PyObject_AssignUniqueId(op);
if (id == _Py_INVALID_UNIQUE_ID) {
return;
}
if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) { if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) {
_PyObject_ReleaseUniqueId(id); _PyObject_ReleaseUniqueId(id);
return; return;
@ -1666,8 +1669,7 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
PyDictObject *mp = (PyDictObject *)op; PyDictObject *mp = (PyDictObject *)op;
assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0); assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0);
// Plus 1 so that _ma_watcher_tag=0 represents an unassigned id mp->_ma_watcher_tag += (uint64_t)id << DICT_UNIQUE_ID_SHIFT;
mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT;
#endif #endif
} }

View File

@ -6160,7 +6160,7 @@ type_dealloc(PyObject *self)
Py_XDECREF(et->ht_module); Py_XDECREF(et->ht_module);
PyMem_Free(et->_ht_tpname); PyMem_Free(et->_ht_tpname);
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
assert(et->unique_id == -1); assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
#endif #endif
et->ht_token = NULL; et->ht_token = NULL;
Py_TYPE(type)->tp_free((PyObject *)type); Py_TYPE(type)->tp_free((PyObject *)type);

View File

@ -86,7 +86,7 @@ _PyObject_AssignUniqueId(PyObject *obj)
if (pool->freelist == NULL) { if (pool->freelist == NULL) {
if (resize_interp_type_id_pool(pool) < 0) { if (resize_interp_type_id_pool(pool) < 0) {
UNLOCK_POOL(pool); UNLOCK_POOL(pool);
return -1; return _Py_INVALID_UNIQUE_ID;
} }
} }
@ -94,7 +94,9 @@ _PyObject_AssignUniqueId(PyObject *obj)
pool->freelist = entry->next; pool->freelist = entry->next;
entry->obj = obj; entry->obj = obj;
_PyObject_SetDeferredRefcount(obj); _PyObject_SetDeferredRefcount(obj);
Py_ssize_t unique_id = (entry - pool->table); // The unique id is one plus the index of the entry in the table.
Py_ssize_t unique_id = (entry - pool->table) + 1;
assert(unique_id > 0);
UNLOCK_POOL(pool); UNLOCK_POOL(pool);
return unique_id; return unique_id;
} }
@ -106,8 +108,9 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
struct _Py_unique_id_pool *pool = &interp->unique_ids; struct _Py_unique_id_pool *pool = &interp->unique_ids;
LOCK_POOL(pool); LOCK_POOL(pool);
assert(unique_id >= 0 && unique_id < pool->size); assert(unique_id > 0 && unique_id <= pool->size);
_Py_unique_id_entry *entry = &pool->table[unique_id]; Py_ssize_t idx = unique_id - 1;
_Py_unique_id_entry *entry = &pool->table[idx];
entry->next = pool->freelist; entry->next = pool->freelist;
pool->freelist = entry; pool->freelist = entry;
UNLOCK_POOL(pool); UNLOCK_POOL(pool);
@ -116,18 +119,18 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
static Py_ssize_t static Py_ssize_t
clear_unique_id(PyObject *obj) clear_unique_id(PyObject *obj)
{ {
Py_ssize_t id = -1; Py_ssize_t id = _Py_INVALID_UNIQUE_ID;
if (PyType_Check(obj)) { if (PyType_Check(obj)) {
if (PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)) { if (PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)) {
PyHeapTypeObject *ht = (PyHeapTypeObject *)obj; PyHeapTypeObject *ht = (PyHeapTypeObject *)obj;
id = ht->unique_id; id = ht->unique_id;
ht->unique_id = -1; ht->unique_id = _Py_INVALID_UNIQUE_ID;
} }
} }
else if (PyCode_Check(obj)) { else if (PyCode_Check(obj)) {
PyCodeObject *co = (PyCodeObject *)obj; PyCodeObject *co = (PyCodeObject *)obj;
id = co->_co_unique_id; id = co->_co_unique_id;
co->_co_unique_id = -1; co->_co_unique_id = _Py_INVALID_UNIQUE_ID;
} }
else if (PyDict_Check(obj)) { else if (PyDict_Check(obj)) {
PyDictObject *mp = (PyDictObject *)obj; PyDictObject *mp = (PyDictObject *)obj;
@ -141,23 +144,23 @@ void
_PyObject_DisablePerThreadRefcounting(PyObject *obj) _PyObject_DisablePerThreadRefcounting(PyObject *obj)
{ {
Py_ssize_t id = clear_unique_id(obj); Py_ssize_t id = clear_unique_id(obj);
if (id >= 0) { if (id != _Py_INVALID_UNIQUE_ID) {
_PyObject_ReleaseUniqueId(id); _PyObject_ReleaseUniqueId(id);
} }
} }
void void
_PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id) _PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx)
{ {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
if (unique_id < 0 || resize_local_refcounts(tstate) < 0) { if (((Py_ssize_t)idx) < 0 || resize_local_refcounts(tstate) < 0) {
// just incref the object directly. // just incref the object directly.
Py_INCREF(obj); Py_INCREF(obj);
return; return;
} }
assert(unique_id < tstate->refcounts.size); assert(idx < (size_t)tstate->refcounts.size);
tstate->refcounts.values[unique_id]++; tstate->refcounts.values[idx]++;
#ifdef Py_REF_DEBUG #ifdef Py_REF_DEBUG
_Py_IncRefTotal((PyThreadState *)tstate); _Py_IncRefTotal((PyThreadState *)tstate);
#endif #endif
@ -217,7 +220,7 @@ _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp)
if (obj != NULL) { if (obj != NULL) {
Py_ssize_t id = clear_unique_id(obj); Py_ssize_t id = clear_unique_id(obj);
(void)id; (void)id;
assert(id == i); assert(id == i + 1);
} }
} }
PyMem_Free(pool->table); PyMem_Free(pool->table);