Patch #1309009, Fix segfault in pyexpat when the XML document is

in latin_1, but Python incorrectly assumes it is in UTF-8 format

Will backport.
This commit is contained in:
Neal Norwitz 2005-09-30 04:46:49 +00:00
parent aa93517de8
commit 484d9a409a
4 changed files with 19 additions and 1 deletions

View File

@ -889,6 +889,15 @@ def testEncodings():
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>' and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>', and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN") "testEncodings - encoding EURO SIGN")
# Verify that character decoding errors throw exceptions instead of crashing
try:
doc = parseString('<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>')
except UnicodeDecodeError:
pass
else:
print 'parsing with bad encoding should raise a UnicodeDecodeError'
doc.unlink() doc.unlink()
class UserDataHandler: class UserDataHandler:

View File

@ -305,6 +305,7 @@ Flemming Kj
Jiba Jiba
Orjan Johansen Orjan Johansen
Simon Johnston Simon Johnston
Evan Jones
Richard Jones Richard Jones
Irmen de Jong Irmen de Jong
Lucas de Jonge Lucas de Jonge

View File

@ -153,6 +153,9 @@ present).
Extension Modules Extension Modules
----------------- -----------------
- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1,
but Python incorrectly assumes it is in UTF-8 format
- Fix parse errors in the readline module when compiling without threads. - Fix parse errors in the readline module when compiling without threads.
- Patch #1288833: Removed thread lock from socket.getaddrinfo on - Patch #1288833: Removed thread lock from socket.getaddrinfo on

View File

@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str)
{ {
PyObject *result = STRING_CONV_FUNC(str); PyObject *result = STRING_CONV_FUNC(str);
PyObject *value; PyObject *value;
/* result can be NULL if the unicode conversion failed. */
if (!result)
return result;
if (!self->intern) if (!self->intern)
return result; return result;
value = PyDict_GetItem(self->intern, result); value = PyDict_GetItem(self->intern, result);
@ -572,7 +575,9 @@ my_StartElementHandler(void *userData,
Py_DECREF(v); Py_DECREF(v);
} }
} }
args = Py_BuildValue("(NN)", string_intern(self, name), container); args = string_intern(self, name);
if (args != NULL)
args = Py_BuildValue("(NN)", args, container);
if (args == NULL) { if (args == NULL) {
Py_DECREF(container); Py_DECREF(container);
return; return;