Issue #17043: The unicode-internal decoder no longer read past the end of

input buffer.
2013-02-07 16:26:55 +02:00 · 2013-02-07 16:26:55 +02:00 · d0c79dcda5
parent fe197b78a0 03ee12ed72
commit d0c79dcda5
2 changed files with 25 additions and 26 deletions
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
 Core and Builtins
 -----------------
 - Issue #17043: The unicode-internal decoder no longer read past the end of
  input buffer.
 - Issue #17098: All modules now have __loader__ set even if they pre-exist the
  bootstrapping of importlib.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -5976,6 +5976,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
    while (s < end) {
        Py_UNICODE uch;
        Py_UCS4 ch;
        if (end - s < Py_UNICODE_SIZE) {
            endinpos = end-starts;
            reason = "truncated input";
            goto error;
        }
        /* We copy the raw representation one byte at a time because the
           pointer may be unaligned (see test_codeccallbacks). */
        ((char *) &uch)[0] = s[0];
@ -5985,37 +5990,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
        ((char *) &uch)[3] = s[3];
 #endif
        ch = uch;
-
+#ifdef Py_UNICODE_WIDE
        /* We have to sanity check the raw data, otherwise doom looms for
           some malformed UCS-4 data. */
-        if (
+        if (ch > 0x10ffff) {
-#ifdef Py_UNICODE_WIDE
+            endinpos = s - starts + Py_UNICODE_SIZE;
-            ch > 0x10ffff ||
+            reason = "illegal code point (> 0x10FFFF)";
-#endif
+            goto error;
            end-s < Py_UNICODE_SIZE
            )
        {
            startinpos = s - starts;
            if (end-s < Py_UNICODE_SIZE) {
                endinpos = end-starts;
                reason = "truncated input";
            }
            else {
                endinpos = s - starts + Py_UNICODE_SIZE;
                reason = "illegal code point (> 0x10FFFF)";
            }
            if (unicode_decode_call_errorhandler_writer(
                    errors, &errorHandler,
                    "unicode_internal", reason,
                    &starts, &end, &startinpos, &endinpos, &exc, &s,
                    &writer))
                goto onError;
            continue;
        }
-
+#endif
        s += Py_UNICODE_SIZE;
 #ifndef Py_UNICODE_WIDE
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
        {
            Py_UNICODE uch2;
            ((char *) &uch2)[0] = s[0];
@ -6032,6 +6018,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
            goto onError;
        PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
        writer.pos++;
        continue;
  error:
        startinpos = s - starts;
        if (unicode_decode_call_errorhandler_writer(
                errors, &errorHandler,
                "unicode_internal", reason,
                &starts, &end, &startinpos, &endinpos, &exc, &s,
                &writer))
            goto onError;
    }
    Py_XDECREF(errorHandler);