Merged revisions 76230 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines

  fix several compile() issues by translating newlines in the tokenizer
........
This commit is contained in:
Benjamin Peterson 2009-11-13 00:17:59 +00:00
parent a1d23326b1
commit aeaa592516
8 changed files with 106 additions and 35 deletions

View File

@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order.
.. note:: .. note::
When compiling a string with multi-line statements, line endings must be When compiling a string with multi-line statements in ``'single'`` or
represented by a single newline character (``'\n'``), and the input must ``'eval'`` mode, input must be terminated by at least one newline
be terminated by at least one newline character. If line endings are character. This is to facilitate detection of incomplete and complete
represented by ``'\r\n'``, use :meth:`str.replace` to change them into statements in the :mod:`code` module.
``'\n'``.
.. versionchanged:: 3.2
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
does not have to end in a newline anymore.
.. function:: complex([real[, imag]]) .. function:: complex([real[, imag]])

View File

@ -295,10 +295,6 @@ def test_filename(self):
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename, self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
compile("a = 1\n", "def", 'single').co_filename) compile("a = 1\n", "def", 'single').co_filename)
def test_no_universal_newlines(self):
code = compile_command("'\rfoo\r'", symbol='eval')
self.assertEqual(eval(code), '\rfoo\r')
def test_main(): def test_main():
run_unittest(CodeopTests) run_unittest(CodeopTests)

View File

@ -5,6 +5,19 @@
class TestSpecifics(unittest.TestCase): class TestSpecifics(unittest.TestCase):
def test_no_ending_newline(self):
compile("hi", "<test>", "exec")
compile("hi\r", "<test>", "exec")
def test_empty(self):
compile("", "<test>", "exec")
def test_other_newlines(self):
compile("\r\n", "<test>", "exec")
compile("\r", "<test>", "exec")
compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
def test_debug_assignment(self): def test_debug_assignment(self):
# catch assignments to __debug__ # catch assignments to __debug__
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single') self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')

View File

@ -237,9 +237,9 @@ def walk(tree):
(14, '+', 2, 13), (14, '+', 2, 13),
(2, '1', 2, 15), (2, '1', 2, 15),
(4, '', 2, 16), (4, '', 2, 16),
(6, '', 2, -1), (6, '', 3, -1),
(4, '', 2, -1), (4, '', 3, -1),
(0, '', 2, -1)], (0, '', 3, -1)],
terminals) terminals)
def test_extended_unpacking(self): def test_extended_unpacking(self):

View File

@ -26,7 +26,7 @@ def test_issue2301(self):
try: try:
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
except SyntaxError as v: except SyntaxError as v:
self.assertEquals(v.text, "print '\u5e74'") self.assertEquals(v.text, "print '\u5e74'\n")
else: else:
self.fail() self.fail()

View File

@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
perrdetail *err_ret, int *flags) perrdetail *err_ret, int *flags)
{ {
struct tok_state *tok; struct tok_state *tok;
int exec_input = start == file_input;
initerr(err_ret, filename); initerr(err_ret, filename);
if (*flags & PyPARSE_IGNORE_COOKIE) if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s); tok = PyTokenizer_FromUTF8(s, exec_input);
else else
tok = PyTokenizer_FromString(s); tok = PyTokenizer_FromString(s, exec_input);
if (tok == NULL) { if (tok == NULL) {
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
return NULL; return NULL;

View File

@ -119,6 +119,7 @@ tok_new(void)
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK; tok->done = E_OK;
tok->fp = NULL; tok->fp = NULL;
tok->input = NULL;
tok->tabsize = TABSIZE; tok->tabsize = TABSIZE;
tok->indent = 0; tok->indent = 0;
tok->indstack[0] = 0; tok->indstack[0] = 0;
@ -145,6 +146,17 @@ tok_new(void)
return tok; return tok;
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
#ifdef PGEN #ifdef PGEN
static char * static char *
@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
return feof(tok->fp); return feof(tok->fp);
} }
static const char * static char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *str, int exec_input, struct tok_state *tok)
{ {
return str; return new_string(str, strlen(str));
} }
#else /* PGEN */ #else /* PGEN */
@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
return NULL; /* as if it were EOF */ return NULL; /* as if it were EOF */
} }
static char *
new_string(const char *s, Py_ssize_t len)
{
char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) {
memcpy(result, s, len);
result[len] = '\0';
}
return result;
}
static char * static char *
get_normal_name(char *s) /* for utf-8 and latin-1 */ get_normal_name(char *s) /* for utf-8 and latin-1 */
@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
return utf8; return utf8;
} }
static char *
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
int skip_next_lf = 0, length = strlen(s), final_length;
char *buf, *current;
char c;
buf = PyMem_MALLOC(length + 2);
if (buf == NULL) {
tok->done = E_NOMEM;
return NULL;
}
for (current = buf; (c = *s++);) {
if (skip_next_lf) {
skip_next_lf = 0;
if (c == '\n') {
c = *s;
s++;
if (!c)
break;
}
}
if (c == '\r') {
skip_next_lf = 1;
c = '\n';
}
*current = c;
current++;
}
/* If this is exec input, add a newline to the end of the file if
there isn't one already. */
if (exec_input && *current != '\n') {
*current = '\n';
current++;
}
*current = '\0';
final_length = current - buf;
if (final_length < length && final_length)
/* should never fail */
buf = PyMem_REALLOC(buf, final_length + 1);
return buf;
}
/* Decode a byte string STR for use as the buffer of TOK. /* Decode a byte string STR for use as the buffer of TOK.
Look for encoding declarations inside STR, and record them Look for encoding declarations inside STR, and record them
inside TOK. */ inside TOK. */
static const char * static const char *
decode_str(const char *str, struct tok_state *tok) decode_str(const char *input, int single, struct tok_state *tok)
{ {
PyObject* utf8 = NULL; PyObject* utf8 = NULL;
const char *str;
const char *s; const char *s;
const char *newl[2] = {NULL, NULL}; const char *newl[2] = {NULL, NULL};
int lineno = 0; int lineno = 0;
tok->input = str = translate_newlines(input, single, tok);
if (str == NULL)
return NULL;
tok->enc = NULL; tok->enc = NULL;
tok->str = str; tok->str = str;
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
/* Set up tokenizer for string */ /* Set up tokenizer for string */
struct tok_state * struct tok_state *
PyTokenizer_FromString(const char *str) PyTokenizer_FromString(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
str = (char *)decode_str(str, tok); str = (char *)decode_str(str, exec_input, tok);
if (str == NULL) { if (str == NULL) {
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return NULL; return NULL;
@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
} }
struct tok_state * struct tok_state *
PyTokenizer_FromUTF8(const char *str) PyTokenizer_FromUTF8(const char *str, int exec_input)
{ {
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
#ifndef PGEN
tok->input = str = translate_newlines(str, exec_input, tok);
#endif
if (str == NULL) {
PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_RAW; tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1; tok->read_coding_spec = 1;
tok->enc = NULL; tok->enc = NULL;
@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
return tok; return tok;
} }
/* Set up tokenizer for file */ /* Set up tokenizer for file */
struct tok_state * struct tok_state *
@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
#endif #endif
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
if (tok->input)
PyMem_FREE((char *)tok->input);
PyMem_FREE(tok); PyMem_FREE(tok);
} }

View File

@ -58,10 +58,11 @@ struct tok_state {
#endif #endif
const char* enc; /* Encoding for the current str. */ const char* enc; /* Encoding for the current str. */
const char* str; const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */
}; };
extern struct tok_state *PyTokenizer_FromString(const char *); extern struct tok_state *PyTokenizer_FromString(const char *, int);
extern struct tok_state *PyTokenizer_FromUTF8(const char *); extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*, extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
char *, char *); char *, char *);
extern void PyTokenizer_Free(struct tok_state *); extern void PyTokenizer_Free(struct tok_state *);