diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 8ecb0df2f8e5ddc..060d81415aa1f1b 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -1106,6 +1106,22 @@ def test_failed_import_during_compiling(self): "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) + def test_unicodedata_unload_reload(self): + # gh-149449: dropping unicodedata and running gc must not leave the + # cached _ucnhash_CAPI pointer dangling. + code = ( + "import gc, sys\n" + "assert '\\N{GRINNING FACE}'.encode(" + " 'ascii', errors='namereplace') == b'\\\\N{GRINNING FACE}'\n" + "compile(r\"x = '\\\\N{LATIN CAPITAL LETTER A}'\", '', 'exec')\n" + "del sys.modules['unicodedata']\n" + "gc.collect()\n" + "assert '\\N{WINKING FACE}'.encode(" + " 'ascii', errors='namereplace') == b'\\\\N{WINKING FACE}'\n" + "compile(r\"x = '\\\\N{LATIN CAPITAL LETTER B}'\", '', 'exec')\n" + ) + script_helper.assert_python_ok("-c", code) + def test_decimal_numeric_consistent(self): # Test that decimal and numeric are consistent, # i.e. if a character has a decimal value, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst new file mode 100644 index 000000000000000..7d11442468d2077 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst @@ -0,0 +1,3 @@ +Fix a use-after-free crash when the :mod:`unicodedata` module was removed +from :data:`sys.modules` and garbage-collected between calls that decode +``\N{...}`` escapes or use the ``namereplace`` codec error handler. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 55b33a76e7af8a3..8b8ebb2bd9574c7 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1543,32 +1543,17 @@ capi_getcode(const char* name, int namelen, Py_UCS4* code, return _check_alias_and_seq(code, with_named_seq); } -static void -unicodedata_destroy_capi(PyObject *capsule) -{ - void *capi = PyCapsule_GetPointer(capsule, PyUnicodeData_CAPSULE_NAME); - PyMem_Free(capi); -} - static PyObject * unicodedata_create_capi(void) { - _PyUnicode_Name_CAPI *capi = PyMem_Malloc(sizeof(_PyUnicode_Name_CAPI)); - if (capi == NULL) { - PyErr_NoMemory(); - return NULL; - } - capi->getname = capi_getucname; - capi->getcode = capi_getcode; - - PyObject *capsule = PyCapsule_New(capi, - PyUnicodeData_CAPSULE_NAME, - unicodedata_destroy_capi); - if (capsule == NULL) { - PyMem_Free(capi); - } - return capsule; -}; + // Statically allocated so that any cached pointers stay valid after unicodedata + // is removed from sys.modules and the capsule is gc'd (gh-149449). + static _PyUnicode_Name_CAPI capi = { + .getname = capi_getucname, + .getcode = capi_getcode, + }; + return PyCapsule_New(&capi, PyUnicodeData_CAPSULE_NAME, NULL); +} /* -------------------------------------------------------------------- */ diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index ddfb93a424c0185..bf08e5568205e7a 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -327,6 +327,7 @@ Modules/pyexpat.c - error_info_of - Modules/pyexpat.c - handler_info - Modules/termios.c - termios_constants - Modules/timemodule.c init_timezone YEAR - +Modules/unicodedata.c unicodedata_create_capi capi - Objects/bytearrayobject.c - _PyByteArray_empty_string - Objects/complexobject.c - c_1 - Objects/exceptions.c - static_exceptions -