gh-128923: Use zero to indicate unassigned unique id (#128925)

In the free threading build, the per thread reference counting uses a
unique id for some objects to index into the local reference count
table. Use 0 instead of -1 to indicate that the id is not assigned. This
avoids bugs where zero-initialized heap type objects look like they have
a unique id assigned.
This commit is contained in:
Sam Gross 2025-01-17 10:42:27 -05:00 committed by GitHub
parent 767c89ba7c
commit d66c08aa75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 110 additions and 33 deletions

View File

@ -347,8 +347,7 @@ PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *);
static inline Py_ssize_t
_PyDict_UniqueId(PyDictObject *mp)
{
// Offset by one so that _ma_watcher_tag=0 represents an unassigned id
return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) - 1;
return (Py_ssize_t)(mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT);
}
static inline void

View File

@ -336,20 +336,20 @@ _Py_THREAD_INCREF_OBJECT(PyObject *obj, Py_ssize_t unique_id)
{
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this object, is handled by
// the "else".
if ((size_t)unique_id < (size_t)tstate->refcounts.size) {
// The table index is `unique_id - 1` because 0 is not a valid unique id.
// Unsigned comparison so that `idx=-1` is handled by the "else".
size_t idx = (size_t)(unique_id - 1);
if (idx < (size_t)tstate->refcounts.size) {
# ifdef Py_REF_DEBUG
_Py_INCREF_IncRefTotal();
# endif
_Py_INCREF_STAT_INC();
tstate->refcounts.values[unique_id]++;
tstate->refcounts.values[idx]++;
}
else {
// The slow path resizes the per-thread refcount array if necessary.
// It handles the unique_id=-1 case to keep the inlinable function smaller.
_PyObject_ThreadIncrefSlow(obj, unique_id);
// It handles the unique_id=0 case to keep the inlinable function smaller.
_PyObject_ThreadIncrefSlow(obj, idx);
}
}
@ -386,15 +386,15 @@ _Py_THREAD_DECREF_OBJECT(PyObject *obj, Py_ssize_t unique_id)
{
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
// Unsigned comparison so that `unique_id=-1`, which indicates that
// per-thread refcounting has been disabled on this object, is handled by
// the "else".
if ((size_t)unique_id < (size_t)tstate->refcounts.size) {
// The table index is `unique_id - 1` because 0 is not a valid unique id.
// Unsigned comparison so that `idx=-1` is handled by the "else".
size_t idx = (size_t)(unique_id - 1);
if (idx < (size_t)tstate->refcounts.size) {
# ifdef Py_REF_DEBUG
_Py_DECREF_DecRefTotal();
# endif
_Py_DECREF_STAT_INC();
tstate->refcounts.values[unique_id]--;
tstate->refcounts.values[idx]--;
}
else {
// Directly decref the object if the id is not assigned or if

View File

@ -16,7 +16,7 @@ extern "C" {
// Per-thread reference counting is used along with deferred reference
// counting to avoid scaling bottlenecks due to reference count contention.
//
// An id of -1 is used to indicate that an object doesn't use per-thread
// An id of 0 is used to indicate that an object doesn't use per-thread
// refcounting. This value is used when the object is finalized by the GC
// and during interpreter shutdown to allow the object to be
// deallocated promptly when the object's refcount reaches zero.
@ -45,6 +45,8 @@ struct _Py_unique_id_pool {
Py_ssize_t size;
};
#define _Py_INVALID_UNIQUE_ID 0
// Assigns the next id from the pool of ids.
extern Py_ssize_t _PyObject_AssignUniqueId(PyObject *obj);
@ -65,7 +67,7 @@ extern void _PyObject_FinalizePerThreadRefcounts(_PyThreadStateImpl *tstate);
extern void _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp);
// Increfs the object, resizing the thread-local refcount array if necessary.
PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id);
PyAPI_FUNC(void) _PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx);
#endif /* Py_GIL_DISABLED */

View File

@ -67,3 +67,10 @@ class FreezeThis(metaclass=Meta):
Base.value = 3
type_freeze(FreezeThis)
self.assertEqual(FreezeThis.value, 2)
def test_manual_heap_type(self):
# gh-128923: test that a manually allocated and initailized heap type
# works correctly
ManualHeapType = _testcapi.ManualHeapType
for i in range(100):
self.assertIsInstance(ManualHeapType(), ManualHeapType)

View File

@ -4149,6 +4149,61 @@ static PyTypeObject ContainerNoGC_type = {
.tp_new = ContainerNoGC_new,
};
/* Manually allocated heap type */
typedef struct {
PyObject_HEAD
PyObject *dict;
} ManualHeapType;
static int
ManualHeapType_traverse(PyObject *self, visitproc visit, void *arg)
{
ManualHeapType *mht = (ManualHeapType *)self;
Py_VISIT(mht->dict);
return 0;
}
static void
ManualHeapType_dealloc(PyObject *self)
{
ManualHeapType *mht = (ManualHeapType *)self;
PyObject_GC_UnTrack(self);
Py_XDECREF(mht->dict);
PyTypeObject *type = Py_TYPE(self);
Py_TYPE(self)->tp_free(self);
Py_DECREF(type);
}
static PyObject *
create_manual_heap_type(void)
{
// gh-128923: Ensure that a heap type allocated through PyType_Type.tp_alloc
// with minimal initialization works correctly.
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)PyType_Type.tp_alloc(&PyType_Type, 0);
if (heap_type == NULL) {
return NULL;
}
PyTypeObject* type = &heap_type->ht_type;
type->tp_basicsize = sizeof(ManualHeapType);
type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | Py_TPFLAGS_HAVE_GC;
type->tp_new = PyType_GenericNew;
type->tp_name = "ManualHeapType";
type->tp_dictoffset = offsetof(ManualHeapType, dict);
type->tp_traverse = ManualHeapType_traverse;
type->tp_dealloc = ManualHeapType_dealloc;
heap_type->ht_name = PyUnicode_FromString(type->tp_name);
if (!heap_type->ht_name) {
Py_DECREF(type);
return NULL;
}
heap_type->ht_qualname = Py_NewRef(heap_type->ht_name);
if (PyType_Ready(type) < 0) {
Py_DECREF(type);
return NULL;
}
return (PyObject *)type;
}
static struct PyModuleDef _testcapimodule = {
PyModuleDef_HEAD_INIT,
@ -4283,6 +4338,15 @@ PyInit__testcapi(void)
(PyObject *) &ContainerNoGC_type) < 0)
return NULL;
PyObject *manual_heap_type = create_manual_heap_type();
if (manual_heap_type == NULL) {
return NULL;
}
if (PyModule_Add(m, "ManualHeapType", manual_heap_type) < 0) {
return NULL;
}
/* Include tests from the _testcapi/ directory */
if (_PyTestCapi_Init_Vectorcall(m) < 0) {
return NULL;

View File

@ -1911,7 +1911,7 @@ code_dealloc(PyObject *self)
Py_XDECREF(co->co_linetable);
Py_XDECREF(co->co_exceptiontable);
#ifdef Py_GIL_DISABLED
assert(co->_co_unique_id == -1);
assert(co->_co_unique_id == _Py_INVALID_UNIQUE_ID);
#endif
if (co->_co_cached != NULL) {
Py_XDECREF(co->_co_cached->_co_code);

View File

@ -1659,6 +1659,9 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
assert(PyDict_Check(op));
#ifdef Py_GIL_DISABLED
Py_ssize_t id = _PyObject_AssignUniqueId(op);
if (id == _Py_INVALID_UNIQUE_ID) {
return;
}
if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) {
_PyObject_ReleaseUniqueId(id);
return;
@ -1666,8 +1669,7 @@ _PyDict_EnablePerThreadRefcounting(PyObject *op)
PyDictObject *mp = (PyDictObject *)op;
assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0);
// Plus 1 so that _ma_watcher_tag=0 represents an unassigned id
mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT;
mp->_ma_watcher_tag += (uint64_t)id << DICT_UNIQUE_ID_SHIFT;
#endif
}

View File

@ -6160,7 +6160,7 @@ type_dealloc(PyObject *self)
Py_XDECREF(et->ht_module);
PyMem_Free(et->_ht_tpname);
#ifdef Py_GIL_DISABLED
assert(et->unique_id == -1);
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
#endif
et->ht_token = NULL;
Py_TYPE(type)->tp_free((PyObject *)type);

View File

@ -86,7 +86,7 @@ _PyObject_AssignUniqueId(PyObject *obj)
if (pool->freelist == NULL) {
if (resize_interp_type_id_pool(pool) < 0) {
UNLOCK_POOL(pool);
return -1;
return _Py_INVALID_UNIQUE_ID;
}
}
@ -94,7 +94,9 @@ _PyObject_AssignUniqueId(PyObject *obj)
pool->freelist = entry->next;
entry->obj = obj;
_PyObject_SetDeferredRefcount(obj);
Py_ssize_t unique_id = (entry - pool->table);
// The unique id is one plus the index of the entry in the table.
Py_ssize_t unique_id = (entry - pool->table) + 1;
assert(unique_id > 0);
UNLOCK_POOL(pool);
return unique_id;
}
@ -106,8 +108,9 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
struct _Py_unique_id_pool *pool = &interp->unique_ids;
LOCK_POOL(pool);
assert(unique_id >= 0 && unique_id < pool->size);
_Py_unique_id_entry *entry = &pool->table[unique_id];
assert(unique_id > 0 && unique_id <= pool->size);
Py_ssize_t idx = unique_id - 1;
_Py_unique_id_entry *entry = &pool->table[idx];
entry->next = pool->freelist;
pool->freelist = entry;
UNLOCK_POOL(pool);
@ -116,18 +119,18 @@ _PyObject_ReleaseUniqueId(Py_ssize_t unique_id)
static Py_ssize_t
clear_unique_id(PyObject *obj)
{
Py_ssize_t id = -1;
Py_ssize_t id = _Py_INVALID_UNIQUE_ID;
if (PyType_Check(obj)) {
if (PyType_HasFeature((PyTypeObject *)obj, Py_TPFLAGS_HEAPTYPE)) {
PyHeapTypeObject *ht = (PyHeapTypeObject *)obj;
id = ht->unique_id;
ht->unique_id = -1;
ht->unique_id = _Py_INVALID_UNIQUE_ID;
}
}
else if (PyCode_Check(obj)) {
PyCodeObject *co = (PyCodeObject *)obj;
id = co->_co_unique_id;
co->_co_unique_id = -1;
co->_co_unique_id = _Py_INVALID_UNIQUE_ID;
}
else if (PyDict_Check(obj)) {
PyDictObject *mp = (PyDictObject *)obj;
@ -141,23 +144,23 @@ void
_PyObject_DisablePerThreadRefcounting(PyObject *obj)
{
Py_ssize_t id = clear_unique_id(obj);
if (id >= 0) {
if (id != _Py_INVALID_UNIQUE_ID) {
_PyObject_ReleaseUniqueId(id);
}
}
void
_PyObject_ThreadIncrefSlow(PyObject *obj, Py_ssize_t unique_id)
_PyObject_ThreadIncrefSlow(PyObject *obj, size_t idx)
{
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
if (unique_id < 0 || resize_local_refcounts(tstate) < 0) {
if (((Py_ssize_t)idx) < 0 || resize_local_refcounts(tstate) < 0) {
// just incref the object directly.
Py_INCREF(obj);
return;
}
assert(unique_id < tstate->refcounts.size);
tstate->refcounts.values[unique_id]++;
assert(idx < (size_t)tstate->refcounts.size);
tstate->refcounts.values[idx]++;
#ifdef Py_REF_DEBUG
_Py_IncRefTotal((PyThreadState *)tstate);
#endif
@ -217,7 +220,7 @@ _PyObject_FinalizeUniqueIdPool(PyInterpreterState *interp)
if (obj != NULL) {
Py_ssize_t id = clear_unique_id(obj);
(void)id;
assert(id == i);
assert(id == i + 1);
}
}
PyMem_Free(pool->table);