c_encoding can never be NULL, which allows some code simplification

2016-02-25 22:34:45 -08:00 · 2016-02-25 22:34:45 -08:00 · 202803a0c0
parent 7812dbccf0
commit 202803a0c0
1 changed files with 44 additions and 48 deletions
--- a/Python/ast.c
+++ b/Python/ast.c
@ -4002,55 +4002,52 @@ decode_unicode(struct compiling *c, const char *s, size_t len, const char *encod
    char *p;
    const char *end;
-    if (encoding == NULL) {
+    /* check for integer overflow */
-        u = NULL;
+    if (len > PY_SIZE_MAX / 6)
-    } else {
+        return NULL;
-        /* check for integer overflow */
+    /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
-        if (len > PY_SIZE_MAX / 6)
+       "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
-            return NULL;
+    u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
-        /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+    if (u == NULL)
-           "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+        return NULL;
-        u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
+    p = buf = PyBytes_AsString(u);
-        if (u == NULL)
+    end = s + len;
-            return NULL;
+    while (s < end) {
-        p = buf = PyBytes_AsString(u);
+        if (*s == '\\') {
-        end = s + len;
+            *p++ = *s++;
-        while (s < end) {
+            if (*s & 0x80) {
-            if (*s == '\\') {
+                strcpy(p, "u005c");
-                *p++ = *s++;
+                p += 5;
                if (*s & 0x80) {
                    strcpy(p, "u005c");
                    p += 5;
                }
            }
            if (*s & 0x80) { /* XXX inefficient */
                PyObject *w;
                int kind;
                void *data;
                Py_ssize_t len, i;
                w = decode_utf8(c, &s, end);
                if (w == NULL) {
                    Py_DECREF(u);
                    return NULL;
                }
                kind = PyUnicode_KIND(w);
                data = PyUnicode_DATA(w);
                len = PyUnicode_GET_LENGTH(w);
                for (i = 0; i < len; i++) {
                    Py_UCS4 chr = PyUnicode_READ(kind, data, i);
                    sprintf(p, "\\U%08x", chr);
                    p += 10;
                }
                /* Should be impossible to overflow */
                assert(p - buf <= Py_SIZE(u));
                Py_DECREF(w);
            } else {
                *p++ = *s++;
            }
        }
-        len = p - buf;
+        if (*s & 0x80) { /* XXX inefficient */
-        s = buf;
+            PyObject *w;
            int kind;
            void *data;
            Py_ssize_t len, i;
            w = decode_utf8(c, &s, end);
            if (w == NULL) {
                Py_DECREF(u);
                return NULL;
            }
            kind = PyUnicode_KIND(w);
            data = PyUnicode_DATA(w);
            len = PyUnicode_GET_LENGTH(w);
            for (i = 0; i < len; i++) {
                Py_UCS4 chr = PyUnicode_READ(kind, data, i);
                sprintf(p, "\\U%08x", chr);
                p += 10;
            }
            /* Should be impossible to overflow */
            assert(p - buf <= Py_SIZE(u));
            Py_DECREF(w);
        } else {
            *p++ = *s++;
        }
    }
    len = p - buf;
    s = buf;
    v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
    Py_XDECREF(u);
    return v;
@ -4994,8 +4991,7 @@ parsestr(struct compiling *c, const node *n, int *bytesmode, int *fmode)
            }
        }
    }
-    need_encoding = (!*bytesmode && c->c_encoding != NULL &&
+    need_encoding = !*bytesmode && strcmp(c->c_encoding, "utf-8") != 0;
                     strcmp(c->c_encoding, "utf-8") != 0);
    if (rawmode || strchr(s, '\\') == NULL) {
        if (need_encoding) {
            PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);