From b83ea144ccf39e2c9346d3a53cf67e154d29b4cb Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 20 Nov 2012 22:30:42 +0100 Subject: [PATCH] Issue #1160: Fix compiling large regular expressions on UCS2 builds. Patch by Serhiy Storchaka. --- Lib/test/test_re.py | 6 ++++++ Misc/NEWS | 3 +++ Modules/_sre.c | 11 +++++++---- Modules/sre.h | 6 +----- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0bceaa292fb4..ff2c95351784 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -425,6 +425,12 @@ def test_bigcharset(self): self.assertEqual(re.match(u"([\u2222\u2223])", u"\u2222", re.UNICODE).group(1), u"\u2222") + def test_big_codesize(self): + # Issue #1160 + r = re.compile('|'.join(('%d'%x for x in range(10000)))) + self.assertIsNotNone(r.match('1000')) + self.assertIsNotNone(r.match('9999')) + def test_anyall(self): self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), "a\nb") diff --git a/Misc/NEWS b/Misc/NEWS index b5b7a5042ce3..f69dd3c0976c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -151,6 +151,9 @@ Core and Builtins Library ------- +- Issue #1160: Fix compiling large regular expressions on UCS2 builds. + Patch by Serhiy Storchaka. + - Issue #14313: zipfile now raises NotImplementedError when the compression type is unknown. diff --git a/Modules/_sre.c b/Modules/_sre.c index cd9591796283..ab4f269be58c 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -2675,6 +2675,13 @@ _compile(PyObject* self_, PyObject* args) PyObject *o = PyList_GET_ITEM(code, i); unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o) : PyLong_AsUnsignedLong(o); + if (value == (unsigned long)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + } + break; + } self->code[i] = (SRE_CODE) value; if ((unsigned long) self->code[i] != value) { PyErr_SetString(PyExc_OverflowError, @@ -3035,10 +3042,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-4, groups)) FAIL; code += skip-4; @@ -3056,10 +3061,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) GET_ARG; max = arg; if (min > max) FAIL; -#ifdef Py_UNICODE_WIDE if (max > 65535) FAIL; -#endif if (!_validate_inner(code, code+skip-3, groups)) FAIL; code += skip-3; diff --git a/Modules/sre.h b/Modules/sre.h index d4af05c045e2..9bfdf7fdfcda 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -14,12 +14,8 @@ #include "sre_constants.h" /* size of a code word (must be unsigned short or larger, and - large enough to hold a Py_UNICODE character) */ -#ifdef Py_UNICODE_WIDE + large enough to hold a UCS4 character) */ #define SRE_CODE Py_UCS4 -#else -#define SRE_CODE unsigned short -#endif typedef struct { PyObject_VAR_HEAD