mirror of https://github.com/python/cpython.git
Merged revisions 76230 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines fix several compile() issues by translating newlines in the tokenizer ........
This commit is contained in:
parent
a1d23326b1
commit
aeaa592516
|
@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
When compiling a string with multi-line statements, line endings must be
|
When compiling a string with multi-line statements in ``'single'`` or
|
||||||
represented by a single newline character (``'\n'``), and the input must
|
``'eval'`` mode, input must be terminated by at least one newline
|
||||||
be terminated by at least one newline character. If line endings are
|
character. This is to facilitate detection of incomplete and complete
|
||||||
represented by ``'\r\n'``, use :meth:`str.replace` to change them into
|
statements in the :mod:`code` module.
|
||||||
``'\n'``.
|
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
|
||||||
|
does not have to end in a newline anymore.
|
||||||
|
|
||||||
|
|
||||||
.. function:: complex([real[, imag]])
|
.. function:: complex([real[, imag]])
|
||||||
|
|
|
@ -295,10 +295,6 @@ def test_filename(self):
|
||||||
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
|
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
|
||||||
compile("a = 1\n", "def", 'single').co_filename)
|
compile("a = 1\n", "def", 'single').co_filename)
|
||||||
|
|
||||||
def test_no_universal_newlines(self):
|
|
||||||
code = compile_command("'\rfoo\r'", symbol='eval')
|
|
||||||
self.assertEqual(eval(code), '\rfoo\r')
|
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
run_unittest(CodeopTests)
|
run_unittest(CodeopTests)
|
||||||
|
|
|
@ -5,6 +5,19 @@
|
||||||
|
|
||||||
class TestSpecifics(unittest.TestCase):
|
class TestSpecifics(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_no_ending_newline(self):
|
||||||
|
compile("hi", "<test>", "exec")
|
||||||
|
compile("hi\r", "<test>", "exec")
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
|
compile("", "<test>", "exec")
|
||||||
|
|
||||||
|
def test_other_newlines(self):
|
||||||
|
compile("\r\n", "<test>", "exec")
|
||||||
|
compile("\r", "<test>", "exec")
|
||||||
|
compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
|
||||||
|
compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
|
||||||
|
|
||||||
def test_debug_assignment(self):
|
def test_debug_assignment(self):
|
||||||
# catch assignments to __debug__
|
# catch assignments to __debug__
|
||||||
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
|
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')
|
||||||
|
|
|
@ -237,9 +237,9 @@ def walk(tree):
|
||||||
(14, '+', 2, 13),
|
(14, '+', 2, 13),
|
||||||
(2, '1', 2, 15),
|
(2, '1', 2, 15),
|
||||||
(4, '', 2, 16),
|
(4, '', 2, 16),
|
||||||
(6, '', 2, -1),
|
(6, '', 3, -1),
|
||||||
(4, '', 2, -1),
|
(4, '', 3, -1),
|
||||||
(0, '', 2, -1)],
|
(0, '', 3, -1)],
|
||||||
terminals)
|
terminals)
|
||||||
|
|
||||||
def test_extended_unpacking(self):
|
def test_extended_unpacking(self):
|
||||||
|
|
|
@ -26,7 +26,7 @@ def test_issue2301(self):
|
||||||
try:
|
try:
|
||||||
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
|
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
|
||||||
except SyntaxError as v:
|
except SyntaxError as v:
|
||||||
self.assertEquals(v.text, "print '\u5e74'")
|
self.assertEquals(v.text, "print '\u5e74'\n")
|
||||||
else:
|
else:
|
||||||
self.fail()
|
self.fail()
|
||||||
|
|
||||||
|
|
|
@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
|
||||||
perrdetail *err_ret, int *flags)
|
perrdetail *err_ret, int *flags)
|
||||||
{
|
{
|
||||||
struct tok_state *tok;
|
struct tok_state *tok;
|
||||||
|
int exec_input = start == file_input;
|
||||||
|
|
||||||
initerr(err_ret, filename);
|
initerr(err_ret, filename);
|
||||||
|
|
||||||
if (*flags & PyPARSE_IGNORE_COOKIE)
|
if (*flags & PyPARSE_IGNORE_COOKIE)
|
||||||
tok = PyTokenizer_FromUTF8(s);
|
tok = PyTokenizer_FromUTF8(s, exec_input);
|
||||||
else
|
else
|
||||||
tok = PyTokenizer_FromString(s);
|
tok = PyTokenizer_FromString(s, exec_input);
|
||||||
if (tok == NULL) {
|
if (tok == NULL) {
|
||||||
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
|
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -119,6 +119,7 @@ tok_new(void)
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
|
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
|
||||||
tok->done = E_OK;
|
tok->done = E_OK;
|
||||||
tok->fp = NULL;
|
tok->fp = NULL;
|
||||||
|
tok->input = NULL;
|
||||||
tok->tabsize = TABSIZE;
|
tok->tabsize = TABSIZE;
|
||||||
tok->indent = 0;
|
tok->indent = 0;
|
||||||
tok->indstack[0] = 0;
|
tok->indstack[0] = 0;
|
||||||
|
@ -145,6 +146,17 @@ tok_new(void)
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *
|
||||||
|
new_string(const char *s, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
char* result = (char *)PyMem_MALLOC(len + 1);
|
||||||
|
if (result != NULL) {
|
||||||
|
memcpy(result, s, len);
|
||||||
|
result[len] = '\0';
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef PGEN
|
#ifdef PGEN
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
|
@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
|
||||||
return feof(tok->fp);
|
return feof(tok->fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *
|
static char *
|
||||||
decode_str(const char *str, struct tok_state *tok)
|
decode_str(const char *str, int exec_input, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
return str;
|
return new_string(str, strlen(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* PGEN */
|
#else /* PGEN */
|
||||||
|
@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
|
||||||
return NULL; /* as if it were EOF */
|
return NULL; /* as if it were EOF */
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
|
||||||
new_string(const char *s, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
char* result = (char *)PyMem_MALLOC(len + 1);
|
|
||||||
if (result != NULL) {
|
|
||||||
memcpy(result, s, len);
|
|
||||||
result[len] = '\0';
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
get_normal_name(char *s) /* for utf-8 and latin-1 */
|
get_normal_name(char *s) /* for utf-8 and latin-1 */
|
||||||
|
@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char *
|
||||||
|
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
|
||||||
|
int skip_next_lf = 0, length = strlen(s), final_length;
|
||||||
|
char *buf, *current;
|
||||||
|
char c;
|
||||||
|
buf = PyMem_MALLOC(length + 2);
|
||||||
|
if (buf == NULL) {
|
||||||
|
tok->done = E_NOMEM;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
for (current = buf; (c = *s++);) {
|
||||||
|
if (skip_next_lf) {
|
||||||
|
skip_next_lf = 0;
|
||||||
|
if (c == '\n') {
|
||||||
|
c = *s;
|
||||||
|
s++;
|
||||||
|
if (!c)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (c == '\r') {
|
||||||
|
skip_next_lf = 1;
|
||||||
|
c = '\n';
|
||||||
|
}
|
||||||
|
*current = c;
|
||||||
|
current++;
|
||||||
|
}
|
||||||
|
/* If this is exec input, add a newline to the end of the file if
|
||||||
|
there isn't one already. */
|
||||||
|
if (exec_input && *current != '\n') {
|
||||||
|
*current = '\n';
|
||||||
|
current++;
|
||||||
|
}
|
||||||
|
*current = '\0';
|
||||||
|
final_length = current - buf;
|
||||||
|
if (final_length < length && final_length)
|
||||||
|
/* should never fail */
|
||||||
|
buf = PyMem_REALLOC(buf, final_length + 1);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
/* Decode a byte string STR for use as the buffer of TOK.
|
/* Decode a byte string STR for use as the buffer of TOK.
|
||||||
Look for encoding declarations inside STR, and record them
|
Look for encoding declarations inside STR, and record them
|
||||||
inside TOK. */
|
inside TOK. */
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
decode_str(const char *str, struct tok_state *tok)
|
decode_str(const char *input, int single, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
PyObject* utf8 = NULL;
|
PyObject* utf8 = NULL;
|
||||||
|
const char *str;
|
||||||
const char *s;
|
const char *s;
|
||||||
const char *newl[2] = {NULL, NULL};
|
const char *newl[2] = {NULL, NULL};
|
||||||
int lineno = 0;
|
int lineno = 0;
|
||||||
|
tok->input = str = translate_newlines(input, single, tok);
|
||||||
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
tok->enc = NULL;
|
tok->enc = NULL;
|
||||||
tok->str = str;
|
tok->str = str;
|
||||||
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
|
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
|
||||||
|
@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
|
||||||
/* Set up tokenizer for string */
|
/* Set up tokenizer for string */
|
||||||
|
|
||||||
struct tok_state *
|
struct tok_state *
|
||||||
PyTokenizer_FromString(const char *str)
|
PyTokenizer_FromString(const char *str, int exec_input)
|
||||||
{
|
{
|
||||||
struct tok_state *tok = tok_new();
|
struct tok_state *tok = tok_new();
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
str = (char *)decode_str(str, tok);
|
str = (char *)decode_str(str, exec_input, tok);
|
||||||
if (str == NULL) {
|
if (str == NULL) {
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct tok_state *
|
struct tok_state *
|
||||||
PyTokenizer_FromUTF8(const char *str)
|
PyTokenizer_FromUTF8(const char *str, int exec_input)
|
||||||
{
|
{
|
||||||
struct tok_state *tok = tok_new();
|
struct tok_state *tok = tok_new();
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
#ifndef PGEN
|
||||||
|
tok->input = str = translate_newlines(str, exec_input, tok);
|
||||||
|
#endif
|
||||||
|
if (str == NULL) {
|
||||||
|
PyTokenizer_Free(tok);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
tok->decoding_state = STATE_RAW;
|
tok->decoding_state = STATE_RAW;
|
||||||
tok->read_coding_spec = 1;
|
tok->read_coding_spec = 1;
|
||||||
tok->enc = NULL;
|
tok->enc = NULL;
|
||||||
|
@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Set up tokenizer for file */
|
/* Set up tokenizer for file */
|
||||||
|
|
||||||
struct tok_state *
|
struct tok_state *
|
||||||
|
@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
|
||||||
#endif
|
#endif
|
||||||
if (tok->fp != NULL && tok->buf != NULL)
|
if (tok->fp != NULL && tok->buf != NULL)
|
||||||
PyMem_FREE(tok->buf);
|
PyMem_FREE(tok->buf);
|
||||||
|
if (tok->input)
|
||||||
|
PyMem_FREE((char *)tok->input);
|
||||||
PyMem_FREE(tok);
|
PyMem_FREE(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,10 +58,11 @@ struct tok_state {
|
||||||
#endif
|
#endif
|
||||||
const char* enc; /* Encoding for the current str. */
|
const char* enc; /* Encoding for the current str. */
|
||||||
const char* str;
|
const char* str;
|
||||||
|
const char* input; /* Tokenizer's newline translated copy of the string. */
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct tok_state *PyTokenizer_FromString(const char *);
|
extern struct tok_state *PyTokenizer_FromString(const char *, int);
|
||||||
extern struct tok_state *PyTokenizer_FromUTF8(const char *);
|
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
|
||||||
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
|
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
|
||||||
char *, char *);
|
char *, char *);
|
||||||
extern void PyTokenizer_Free(struct tok_state *);
|
extern void PyTokenizer_Free(struct tok_state *);
|
||||||
|
|
Loading…
Reference in New Issue