From ca33d17e282b66d49dea2f9497c75f8d0864e6c0 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sat, 23 May 2026 22:14:17 +0200 Subject: [PATCH 1/3] gh-149449: Fix use-after-free in unicodedata _ucnhash_CAPI capsule The _PyUnicode_Name_CAPI struct was malloc'd per import and freed by the capsule destructor, leaving the per-interpreter cached pointer dangling once unicodedata was removed from sys.modules and gc'd. The \N{...} parser path and the namereplace codec handler then crashed. Allocate the struct in static storage and drop the destructor; the contents are immutable function pointers shared across imports. --- Lib/test/test_unicodedata.py | 16 ++++++++++ ...-05-23-22-08-01.gh-issue-149449.2lhQFF.rst | 3 ++ Modules/unicodedata.c | 31 +++++-------------- 3 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 8ecb0df2f8e5ddc..060d81415aa1f1b 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -1106,6 +1106,22 @@ def test_failed_import_during_compiling(self): "(can't load unicodedata module)" self.assertIn(error, result.err.decode("ascii")) + def test_unicodedata_unload_reload(self): + # gh-149449: dropping unicodedata and running gc must not leave the + # cached _ucnhash_CAPI pointer dangling. + code = ( + "import gc, sys\n" + "assert '\\N{GRINNING FACE}'.encode(" + " 'ascii', errors='namereplace') == b'\\\\N{GRINNING FACE}'\n" + "compile(r\"x = '\\\\N{LATIN CAPITAL LETTER A}'\", '', 'exec')\n" + "del sys.modules['unicodedata']\n" + "gc.collect()\n" + "assert '\\N{WINKING FACE}'.encode(" + " 'ascii', errors='namereplace') == b'\\\\N{WINKING FACE}'\n" + "compile(r\"x = '\\\\N{LATIN CAPITAL LETTER B}'\", '', 'exec')\n" + ) + script_helper.assert_python_ok("-c", code) + def test_decimal_numeric_consistent(self): # Test that decimal and numeric are consistent, # i.e. if a character has a decimal value, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst new file mode 100644 index 000000000000000..7d11442468d2077 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-23-22-08-01.gh-issue-149449.2lhQFF.rst @@ -0,0 +1,3 @@ +Fix a use-after-free crash when the :mod:`unicodedata` module was removed +from :data:`sys.modules` and garbage-collected between calls that decode +``\N{...}`` escapes or use the ``namereplace`` codec error handler. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 55b33a76e7af8a3..ef231ebc9b49942 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1543,32 +1543,17 @@ capi_getcode(const char* name, int namelen, Py_UCS4* code, return _check_alias_and_seq(code, with_named_seq); } -static void -unicodedata_destroy_capi(PyObject *capsule) -{ - void *capi = PyCapsule_GetPointer(capsule, PyUnicodeData_CAPSULE_NAME); - PyMem_Free(capi); -} - static PyObject * unicodedata_create_capi(void) { - _PyUnicode_Name_CAPI *capi = PyMem_Malloc(sizeof(_PyUnicode_Name_CAPI)); - if (capi == NULL) { - PyErr_NoMemory(); - return NULL; - } - capi->getname = capi_getucname; - capi->getcode = capi_getcode; - - PyObject *capsule = PyCapsule_New(capi, - PyUnicodeData_CAPSULE_NAME, - unicodedata_destroy_capi); - if (capsule == NULL) { - PyMem_Free(capi); - } - return capsule; -}; + // Static storage so cached pointers stay valid after unicodedata + // is removed from sys.modules and the capsule is gc'd (gh-149449). + static _PyUnicode_Name_CAPI capi = { + .getname = capi_getucname, + .getcode = capi_getcode, + }; + return PyCapsule_New(&capi, PyUnicodeData_CAPSULE_NAME, NULL); +} /* -------------------------------------------------------------------- */ From 3ccadc9e4a6632b724ae59421b5092d2641a058b Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 24 May 2026 16:45:35 +0200 Subject: [PATCH 2/3] Allow static capi in c-analyzer ignored.tsv --- Tools/c-analyzer/cpython/ignored.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index ddfb93a424c0185..bf08e5568205e7a 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -327,6 +327,7 @@ Modules/pyexpat.c - error_info_of - Modules/pyexpat.c - handler_info - Modules/termios.c - termios_constants - Modules/timemodule.c init_timezone YEAR - +Modules/unicodedata.c unicodedata_create_capi capi - Objects/bytearrayobject.c - _PyByteArray_empty_string - Objects/complexobject.c - c_1 - Objects/exceptions.c - static_exceptions - From b6c2c3a7c35bec68a2745fd108a0c13fa469caf4 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Sun, 24 May 2026 17:49:03 +0200 Subject: [PATCH 3/3] Update Modules/unicodedata.c Co-authored-by: Kumar Aditya --- Modules/unicodedata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index ef231ebc9b49942..8b8ebb2bd9574c7 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1546,7 +1546,7 @@ capi_getcode(const char* name, int namelen, Py_UCS4* code, static PyObject * unicodedata_create_capi(void) { - // Static storage so cached pointers stay valid after unicodedata + // Statically allocated so that any cached pointers stay valid after unicodedata // is removed from sys.modules and the capsule is gc'd (gh-149449). static _PyUnicode_Name_CAPI capi = { .getname = capi_getucname,