gh-129701: Fix a data race in `intern_common` in the free threaded build (GH-130089)

* gh-129701: Fix a data race in `intern_common` in the free threaded build

* Use a mutex to avoid potentially returning a non-immortalized string,
  because immortalization happens after the insertion into the interned
  dict.

* Use `Py_DECREF()` calls instead of `Py_SET_REFCNT(s, Py_REFCNT(s) - 2)`
  for thread-safety. This code path isn't performance sensistive, so
  just use `Py_DECREF()` unconditionally for simplicity.
This commit is contained in:
Sam Gross 2025-02-17 08:15:40 -05:00 committed by GitHub
parent fc8c99a8ce
commit b9d2ee687c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 6 deletions

View File

@ -64,6 +64,9 @@ struct _Py_static_objects {
(interp)->cached_objects.NAME
struct _Py_interp_cached_objects {
#ifdef Py_GIL_DISABLED
PyMutex interned_mutex;
#endif
PyObject *interned_strings;
/* object.__reduce__ */

View File

@ -112,6 +112,14 @@ NOTE: In the interpreter's initialization phase, some globals are currently
# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
#endif
#ifdef Py_GIL_DISABLED
# define LOCK_INTERNED(interp) PyMutex_Lock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex))
# define UNLOCK_INTERNED(interp) PyMutex_Unlock(&_Py_INTERP_CACHED_OBJECT(interp, interned_mutex))
#else
# define LOCK_INTERNED(interp)
# define UNLOCK_INTERNED(interp)
#endif
static inline char* _PyUnicode_UTF8(PyObject *op)
{
return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
@ -15814,11 +15822,13 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
PyObject *interned = get_interned_dict(interp);
assert(interned != NULL);
LOCK_INTERNED(interp);
PyObject *t;
{
int res = PyDict_SetDefaultRef(interned, s, s, &t);
if (res < 0) {
PyErr_Clear();
UNLOCK_INTERNED(interp);
return s;
}
else if (res == 1) {
@ -15828,6 +15838,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) {
immortalize_interned(t);
}
UNLOCK_INTERNED(interp);
return t;
}
else {
@ -15844,12 +15855,8 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
if (!_Py_IsImmortal(s)) {
/* The two references in interned dict (key and value) are not counted.
unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
#ifdef Py_REF_DEBUG
/* let's be pedantic with the ref total */
_Py_DecRefTotal(_PyThreadState_GET());
_Py_DecRefTotal(_PyThreadState_GET());
#endif
Py_DECREF(s);
Py_DECREF(s);
}
FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL);
@ -15864,6 +15871,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
immortalize_interned(s);
}
UNLOCK_INTERNED(interp);
return s;
}