mirror of https://github.com/python/cpython.git
Issue #13169: The maximal repetition number in a regular expression has been
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on 64-bit).
This commit is contained in:
commit
165ee9645b
|
@ -13,6 +13,7 @@
|
||||||
import _sre, sys
|
import _sre, sys
|
||||||
import sre_parse
|
import sre_parse
|
||||||
from sre_constants import *
|
from sre_constants import *
|
||||||
|
from _sre import MAXREPEAT
|
||||||
|
|
||||||
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
||||||
|
|
||||||
|
|
|
@ -15,10 +15,6 @@
|
||||||
|
|
||||||
MAGIC = 20031017
|
MAGIC = 20031017
|
||||||
|
|
||||||
# max code word in this release
|
|
||||||
|
|
||||||
MAXREPEAT = 65535
|
|
||||||
|
|
||||||
# SRE standard exception (access as sre.error)
|
# SRE standard exception (access as sre.error)
|
||||||
# should this really be here?
|
# should this really be here?
|
||||||
|
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from sre_constants import *
|
from sre_constants import *
|
||||||
|
from _sre import MAXREPEAT
|
||||||
|
|
||||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||||
REPEAT_CHARS = "*+?{"
|
REPEAT_CHARS = "*+?{"
|
||||||
|
@ -537,10 +538,14 @@ def _parse(source, state):
|
||||||
continue
|
continue
|
||||||
if lo:
|
if lo:
|
||||||
min = int(lo)
|
min = int(lo)
|
||||||
|
if min >= MAXREPEAT:
|
||||||
|
raise OverflowError("the repetition number is too large")
|
||||||
if hi:
|
if hi:
|
||||||
max = int(hi)
|
max = int(hi)
|
||||||
if max < min:
|
if max >= MAXREPEAT:
|
||||||
raise error("bad repeat interval")
|
raise OverflowError("the repetition number is too large")
|
||||||
|
if max < min:
|
||||||
|
raise error("bad repeat interval")
|
||||||
else:
|
else:
|
||||||
raise error("not supported")
|
raise error("not supported")
|
||||||
# figure out which item to repeat
|
# figure out which item to repeat
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
|
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
|
||||||
|
cpython_only
|
||||||
import io
|
import io
|
||||||
import re
|
import re
|
||||||
from re import Scanner
|
from re import Scanner
|
||||||
|
@ -980,6 +981,37 @@ def test_bug_16688(self):
|
||||||
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
|
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
|
||||||
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
|
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
|
||||||
|
|
||||||
|
def test_repeat_minmax_overflow(self):
|
||||||
|
# Issue #13169
|
||||||
|
string = "x" * 100000
|
||||||
|
self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
|
||||||
|
self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
|
||||||
|
self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
|
||||||
|
self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
|
||||||
|
self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
|
||||||
|
self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
|
||||||
|
# 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
|
||||||
|
|
||||||
|
@cpython_only
|
||||||
|
def test_repeat_minmax_overflow_maxrepeat(self):
|
||||||
|
try:
|
||||||
|
from _sre import MAXREPEAT
|
||||||
|
except ImportError:
|
||||||
|
self.skipTest('requires _sre.MAXREPEAT constant')
|
||||||
|
string = "x" * 100000
|
||||||
|
self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
|
||||||
|
self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
|
||||||
|
(0, 100000))
|
||||||
|
self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
|
||||||
|
self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
|
||||||
|
|
||||||
|
|
||||||
def run_re_tests():
|
def run_re_tests():
|
||||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|
|
@ -253,6 +253,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13169: The maximal repetition number in a regular expression has been
|
||||||
|
increased from 65534 to 2147483647 (on 32-bit platform) or 4294967294 (on
|
||||||
|
64-bit).
|
||||||
|
|
||||||
- Issue #17143: Fix a missing import in the trace module. Initial patch by
|
- Issue #17143: Fix a missing import in the trace module. Initial patch by
|
||||||
Berker Peksag.
|
Berker Peksag.
|
||||||
|
|
||||||
|
|
|
@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
/* adjust end */
|
/* adjust end */
|
||||||
if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
|
if (maxcount < (end - ptr) / state->charsize && maxcount != SRE_MAXREPEAT)
|
||||||
end = ptr + maxcount*state->charsize;
|
end = ptr + maxcount*state->charsize;
|
||||||
|
|
||||||
switch (pattern[0]) {
|
switch (pattern[0]) {
|
||||||
|
@ -1109,7 +1109,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
} else {
|
} else {
|
||||||
/* general case */
|
/* general case */
|
||||||
LASTMARK_SAVE();
|
LASTMARK_SAVE();
|
||||||
while ((Py_ssize_t)ctx->pattern[2] == 65535
|
while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
|
||||||
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
|
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
|
||||||
state->ptr = ctx->ptr;
|
state->ptr = ctx->ptr;
|
||||||
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
|
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
|
||||||
|
@ -1195,7 +1195,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((ctx->count < ctx->u.rep->pattern[2] ||
|
if ((ctx->count < ctx->u.rep->pattern[2] ||
|
||||||
ctx->u.rep->pattern[2] == 65535) &&
|
ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
|
||||||
state->ptr != ctx->u.rep->last_ptr) {
|
state->ptr != ctx->u.rep->last_ptr) {
|
||||||
/* we may have enough matches, but if we can
|
/* we may have enough matches, but if we can
|
||||||
match another item, do so */
|
match another item, do so */
|
||||||
|
@ -1273,7 +1273,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
LASTMARK_RESTORE();
|
LASTMARK_RESTORE();
|
||||||
|
|
||||||
if (ctx->count >= ctx->u.rep->pattern[2]
|
if (ctx->count >= ctx->u.rep->pattern[2]
|
||||||
&& ctx->u.rep->pattern[2] != 65535)
|
&& ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
|
||||||
RETURN_FAILURE;
|
RETURN_FAILURE;
|
||||||
|
|
||||||
ctx->u.rep->count = ctx->count;
|
ctx->u.rep->count = ctx->count;
|
||||||
|
@ -3037,7 +3037,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
|
||||||
GET_ARG; max = arg;
|
GET_ARG; max = arg;
|
||||||
if (min > max)
|
if (min > max)
|
||||||
FAIL;
|
FAIL;
|
||||||
if (max > 65535)
|
if (max > SRE_MAXREPEAT)
|
||||||
FAIL;
|
FAIL;
|
||||||
if (!_validate_inner(code, code+skip-4, groups))
|
if (!_validate_inner(code, code+skip-4, groups))
|
||||||
FAIL;
|
FAIL;
|
||||||
|
@ -3056,7 +3056,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
|
||||||
GET_ARG; max = arg;
|
GET_ARG; max = arg;
|
||||||
if (min > max)
|
if (min > max)
|
||||||
FAIL;
|
FAIL;
|
||||||
if (max > 65535)
|
if (max > SRE_MAXREPEAT)
|
||||||
FAIL;
|
FAIL;
|
||||||
if (!_validate_inner(code, code+skip-3, groups))
|
if (!_validate_inner(code, code+skip-3, groups))
|
||||||
FAIL;
|
FAIL;
|
||||||
|
@ -3942,6 +3942,12 @@ PyMODINIT_FUNC PyInit__sre(void)
|
||||||
Py_DECREF(x);
|
Py_DECREF(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
|
||||||
|
if (x) {
|
||||||
|
PyDict_SetItemString(d, "MAXREPEAT", x);
|
||||||
|
Py_DECREF(x);
|
||||||
|
}
|
||||||
|
|
||||||
x = PyUnicode_FromString(copyright);
|
x = PyUnicode_FromString(copyright);
|
||||||
if (x) {
|
if (x) {
|
||||||
PyDict_SetItemString(d, "copyright", x);
|
PyDict_SetItemString(d, "copyright", x);
|
||||||
|
|
|
@ -16,6 +16,11 @@
|
||||||
/* size of a code word (must be unsigned short or larger, and
|
/* size of a code word (must be unsigned short or larger, and
|
||||||
large enough to hold a UCS4 character) */
|
large enough to hold a UCS4 character) */
|
||||||
#define SRE_CODE Py_UCS4
|
#define SRE_CODE Py_UCS4
|
||||||
|
#if SIZEOF_SIZE_T > 4
|
||||||
|
# define SRE_MAXREPEAT (~(SRE_CODE)0)
|
||||||
|
#else
|
||||||
|
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_VAR_HEAD
|
PyObject_VAR_HEAD
|
||||||
|
|
Loading…
Reference in New Issue