mirror of https://github.com/python/cpython.git
Issue #17043: The unicode-internal decoder no longer read past the end of
input buffer.
This commit is contained in:
commit
d0c79dcda5
|
@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #17043: The unicode-internal decoder no longer read past the end of
|
||||||
|
input buffer.
|
||||||
|
|
||||||
- Issue #17098: All modules now have __loader__ set even if they pre-exist the
|
- Issue #17098: All modules now have __loader__ set even if they pre-exist the
|
||||||
bootstrapping of importlib.
|
bootstrapping of importlib.
|
||||||
|
|
||||||
|
|
|
@ -5976,6 +5976,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
Py_UNICODE uch;
|
Py_UNICODE uch;
|
||||||
Py_UCS4 ch;
|
Py_UCS4 ch;
|
||||||
|
if (end - s < Py_UNICODE_SIZE) {
|
||||||
|
endinpos = end-starts;
|
||||||
|
reason = "truncated input";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
/* We copy the raw representation one byte at a time because the
|
/* We copy the raw representation one byte at a time because the
|
||||||
pointer may be unaligned (see test_codeccallbacks). */
|
pointer may be unaligned (see test_codeccallbacks). */
|
||||||
((char *) &uch)[0] = s[0];
|
((char *) &uch)[0] = s[0];
|
||||||
|
@ -5985,37 +5990,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
((char *) &uch)[3] = s[3];
|
((char *) &uch)[3] = s[3];
|
||||||
#endif
|
#endif
|
||||||
ch = uch;
|
ch = uch;
|
||||||
|
#ifdef Py_UNICODE_WIDE
|
||||||
/* We have to sanity check the raw data, otherwise doom looms for
|
/* We have to sanity check the raw data, otherwise doom looms for
|
||||||
some malformed UCS-4 data. */
|
some malformed UCS-4 data. */
|
||||||
if (
|
if (ch > 0x10ffff) {
|
||||||
#ifdef Py_UNICODE_WIDE
|
endinpos = s - starts + Py_UNICODE_SIZE;
|
||||||
ch > 0x10ffff ||
|
reason = "illegal code point (> 0x10FFFF)";
|
||||||
#endif
|
goto error;
|
||||||
end-s < Py_UNICODE_SIZE
|
|
||||||
)
|
|
||||||
{
|
|
||||||
startinpos = s - starts;
|
|
||||||
if (end-s < Py_UNICODE_SIZE) {
|
|
||||||
endinpos = end-starts;
|
|
||||||
reason = "truncated input";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
endinpos = s - starts + Py_UNICODE_SIZE;
|
|
||||||
reason = "illegal code point (> 0x10FFFF)";
|
|
||||||
}
|
|
||||||
if (unicode_decode_call_errorhandler_writer(
|
|
||||||
errors, &errorHandler,
|
|
||||||
"unicode_internal", reason,
|
|
||||||
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
|
||||||
&writer))
|
|
||||||
goto onError;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
s += Py_UNICODE_SIZE;
|
s += Py_UNICODE_SIZE;
|
||||||
#ifndef Py_UNICODE_WIDE
|
#ifndef Py_UNICODE_WIDE
|
||||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
|
if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
|
||||||
{
|
{
|
||||||
Py_UNICODE uch2;
|
Py_UNICODE uch2;
|
||||||
((char *) &uch2)[0] = s[0];
|
((char *) &uch2)[0] = s[0];
|
||||||
|
@ -6032,6 +6018,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
||||||
goto onError;
|
goto onError;
|
||||||
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
|
PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
|
||||||
writer.pos++;
|
writer.pos++;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
error:
|
||||||
|
startinpos = s - starts;
|
||||||
|
if (unicode_decode_call_errorhandler_writer(
|
||||||
|
errors, &errorHandler,
|
||||||
|
"unicode_internal", reason,
|
||||||
|
&starts, &end, &startinpos, &endinpos, &exc, &s,
|
||||||
|
&writer))
|
||||||
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(errorHandler);
|
||||||
|
|
Loading…
Reference in New Issue