Integrate the changes from PyXML's version of pyexpat.c revisions

1.47, 1.48, 1.49 (name interning support).
This commit is contained in:
Fred Drake 2002-06-27 19:40:48 +00:00
parent d805fefff4
commit b91a36b230
1 changed files with 108 additions and 45 deletions

View File

@ -60,6 +60,7 @@ typedef struct {
int ordered_attributes; /* Return attributes as a list. */ int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */ int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */ int in_callback; /* Is a callback active? */
PyObject *intern; /* Dictionary to intern strings */
PyObject **handlers; PyObject **handlers;
} xmlparseobject; } xmlparseobject;
@ -123,7 +124,7 @@ set_error(xmlparseobject *self)
Returns None if str is a null pointer. */ Returns None if str is a null pointer. */
static PyObject * static PyObject *
conv_string_to_unicode(XML_Char *str) conv_string_to_unicode(const XML_Char *str)
{ {
/* XXX currently this code assumes that XML_Char is 8-bit, /* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */ and hence in UTF-8. */
@ -132,8 +133,7 @@ conv_string_to_unicode(XML_Char *str)
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
return PyUnicode_DecodeUTF8((const char *)str, return PyUnicode_DecodeUTF8(str, strlen(str),
strlen((const char *)str),
"strict"); "strict");
} }
@ -155,7 +155,7 @@ conv_string_len_to_unicode(const XML_Char *str, int len)
Returns None if str is a null pointer. */ Returns None if str is a null pointer. */
static PyObject * static PyObject *
conv_string_to_utf8(XML_Char *str) conv_string_to_utf8(const XML_Char *str)
{ {
/* XXX currently this code assumes that XML_Char is 8-bit, /* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */ and hence in UTF-8. */
@ -164,7 +164,7 @@ conv_string_to_utf8(XML_Char *str)
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
return PyString_FromString((const char *)str); return PyString_FromString(str);
} }
static PyObject * static PyObject *
@ -275,6 +275,25 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
? conv_string_to_unicode : conv_string_to_utf8) ? conv_string_to_unicode : conv_string_to_utf8)
#endif #endif
static PyObject*
string_intern(xmlparseobject *self, const char* str)
{
PyObject *result = STRING_CONV_FUNC(str);
PyObject *value;
if (!self->intern)
return result;
value = PyDict_GetItem(self->intern, result);
if (!value) {
if (PyDict_SetItem(self->intern, result, result) == 0)
return result;
else
return NULL;
}
Py_INCREF(value);
Py_DECREF(result);
return value;
}
static void static void
my_StartElementHandler(void *userData, my_StartElementHandler(void *userData,
const XML_Char *name, const XML_Char **atts) const XML_Char *name, const XML_Char **atts)
@ -307,7 +326,7 @@ my_StartElementHandler(void *userData,
return; return;
} }
for (i = 0; i < max; i += 2) { for (i = 0; i < max; i += 2) {
PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]); PyObject *n = string_intern(self, (XML_Char *) atts[i]);
PyObject *v; PyObject *v;
if (n == NULL) { if (n == NULL) {
flag_error(self); flag_error(self);
@ -336,7 +355,7 @@ my_StartElementHandler(void *userData,
Py_DECREF(v); Py_DECREF(v);
} }
} }
args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container); args = Py_BuildValue("(NN)", string_intern(self, name), container);
if (args == NULL) { if (args == NULL) {
Py_DECREF(container); Py_DECREF(container);
return; return;
@ -394,13 +413,13 @@ my_##NAME##Handler PARAMS {\
VOID_HANDLER(EndElement, VOID_HANDLER(EndElement,
(void *userData, const XML_Char *name), (void *userData, const XML_Char *name),
("(O&)", STRING_CONV_FUNC, name)) ("(N)", string_intern(self, name)))
VOID_HANDLER(ProcessingInstruction, VOID_HANDLER(ProcessingInstruction,
(void *userData, (void *userData,
const XML_Char *target, const XML_Char *target,
const XML_Char *data), const XML_Char *data),
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data)) ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
#ifndef Py_USING_UNICODE #ifndef Py_USING_UNICODE
VOID_HANDLER(CharacterData, VOID_HANDLER(CharacterData,
@ -421,10 +440,10 @@ VOID_HANDLER(UnparsedEntityDecl,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName), const XML_Char *notationName),
("(O&O&O&O&O&)", ("(NNNNN)",
STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base, string_intern(self, entityName), string_intern(self, base),
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId, string_intern(self, systemId), string_intern(self, publicId),
STRING_CONV_FUNC,notationName)) string_intern(self, notationName)))
#ifndef Py_USING_UNICODE #ifndef Py_USING_UNICODE
VOID_HANDLER(EntityDecl, VOID_HANDLER(EntityDecl,
@ -437,11 +456,12 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName), const XML_Char *notationName),
("O&iNO&O&O&O&", ("NiNNNNN",
STRING_CONV_FUNC,entityName, is_parameter_entity, string_intern(self, entityName), is_parameter_entity,
conv_string_len_to_utf8(value, value_length), conv_string_len_to_utf8(value, value_length),
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId, string_intern(self, base), string_intern(self, systemId),
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName)) string_intern(self, publicId),
string_intern(self, notationName)))
#else #else
VOID_HANDLER(EntityDecl, VOID_HANDLER(EntityDecl,
(void *userData, (void *userData,
@ -453,13 +473,14 @@ VOID_HANDLER(EntityDecl,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName), const XML_Char *notationName),
("O&iNO&O&O&O&", ("NiNNNNN",
STRING_CONV_FUNC,entityName, is_parameter_entity, string_intern(self, entityName), is_parameter_entity,
(self->returns_unicode (self->returns_unicode
? conv_string_len_to_unicode(value, value_length) ? conv_string_len_to_unicode(value, value_length)
: conv_string_len_to_utf8(value, value_length)), : conv_string_len_to_utf8(value, value_length)),
STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId, string_intern(self, base), string_intern(self, systemId),
STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName)) string_intern(self, publicId),
string_intern(self, notationName)))
#endif #endif
VOID_HANDLER(XmlDecl, VOID_HANDLER(XmlDecl,
@ -473,7 +494,7 @@ VOID_HANDLER(XmlDecl,
static PyObject * static PyObject *
conv_content_model(XML_Content * const model, conv_content_model(XML_Content * const model,
PyObject *(*conv_string)(XML_Char *)) PyObject *(*conv_string)(const XML_Char *))
{ {
PyObject *result = NULL; PyObject *result = NULL;
PyObject *children = PyTuple_New(model->numchildren); PyObject *children = PyTuple_New(model->numchildren);
@ -514,8 +535,8 @@ VOID_HANDLER(ElementDecl,
(void *userData, (void *userData,
const XML_Char *name, const XML_Char *name,
XML_Content *model), XML_Content *model),
("O&O&", ("NO&",
STRING_CONV_FUNC,name, string_intern(self, name),
(self->returns_unicode ? conv_content_model_unicode (self->returns_unicode ? conv_content_model_unicode
: conv_content_model_utf8),model)) : conv_content_model_utf8),model))
#else #else
@ -523,8 +544,8 @@ VOID_HANDLER(ElementDecl,
(void *userData, (void *userData,
const XML_Char *name, const XML_Char *name,
XML_Content *model), XML_Content *model),
("O&O&", ("NO&",
STRING_CONV_FUNC,name, conv_content_model_utf8,model)) string_intern(self, name), conv_content_model_utf8,model))
#endif #endif
VOID_HANDLER(AttlistDecl, VOID_HANDLER(AttlistDecl,
@ -534,8 +555,8 @@ VOID_HANDLER(AttlistDecl,
const XML_Char *att_type, const XML_Char *att_type,
const XML_Char *dflt, const XML_Char *dflt,
int isrequired), int isrequired),
("(O&O&O&O&i)", ("(NNO&O&i)",
STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname, string_intern(self, elname), string_intern(self, attname),
STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt, STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
isrequired)) isrequired))
@ -545,24 +566,25 @@ VOID_HANDLER(NotationDecl,
const XML_Char *base, const XML_Char *base,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId), const XML_Char *publicId),
("(O&O&O&O&)", ("(NNNN)",
STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base, string_intern(self, notationName), string_intern(self, base),
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId)) string_intern(self, systemId), string_intern(self, publicId)))
VOID_HANDLER(StartNamespaceDecl, VOID_HANDLER(StartNamespaceDecl,
(void *userData, (void *userData,
const XML_Char *prefix, const XML_Char *prefix,
const XML_Char *uri), const XML_Char *uri),
("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri)) ("(NN)",
string_intern(self, prefix), string_intern(self, uri)))
VOID_HANDLER(EndNamespaceDecl, VOID_HANDLER(EndNamespaceDecl,
(void *userData, (void *userData,
const XML_Char *prefix), const XML_Char *prefix),
("(O&)", STRING_CONV_FUNC,prefix)) ("(N)", string_intern(self, prefix)))
VOID_HANDLER(Comment, VOID_HANDLER(Comment,
(void *userData, const XML_Char *prefix), (void *userData, const XML_Char *data),
("(O&)", STRING_CONV_FUNC,prefix)) ("(O&)", STRING_CONV_FUNC,data))
VOID_HANDLER(StartCdataSection, VOID_HANDLER(StartCdataSection,
(void *userData), (void *userData),
@ -605,9 +627,9 @@ RC_HANDLER(int, ExternalEntityRef,
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId), const XML_Char *publicId),
int rc=0;, int rc=0;,
("(O&O&O&O&)", ("(O&NNN)",
STRING_CONV_FUNC,context, STRING_CONV_FUNC,base, STRING_CONV_FUNC,context, string_intern(self, base),
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId), string_intern(self, systemId), string_intern(self, publicId)),
rc = PyInt_AsLong(rv);, rc, rc = PyInt_AsLong(rv);, rc,
XML_GetUserData(parser)) XML_GetUserData(parser))
@ -617,8 +639,8 @@ VOID_HANDLER(StartDoctypeDecl,
(void *userData, const XML_Char *doctypeName, (void *userData, const XML_Char *doctypeName,
const XML_Char *sysid, const XML_Char *pubid, const XML_Char *sysid, const XML_Char *pubid,
int has_internal_subset), int has_internal_subset),
("(O&O&O&i)", STRING_CONV_FUNC,doctypeName, ("(NNNi)", string_intern(self, doctypeName),
STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid, string_intern(self, sysid), string_intern(self, pubid),
has_internal_subset)) has_internal_subset))
VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
@ -856,6 +878,8 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding); encoding);
new_parser->handlers = 0; new_parser->handlers = 0;
new_parser->intern = self->intern;
Py_XINCREF(new_parser->intern);
#ifdef Py_TPFLAGS_HAVE_GC #ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(new_parser); PyObject_GC_Track(new_parser);
#else #else
@ -988,7 +1012,7 @@ XML_Encoding * info)
#endif #endif
static PyObject * static PyObject *
newxmlparseobject(char *encoding, char *namespace_separator) newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
{ {
int i; int i;
xmlparseobject *self; xmlparseobject *self;
@ -1022,6 +1046,8 @@ newxmlparseobject(char *encoding, char *namespace_separator)
else { else {
self->itself = XML_ParserCreate(encoding); self->itself = XML_ParserCreate(encoding);
} }
self->intern = intern;
Py_XINCREF(self->intern);
#ifdef Py_TPFLAGS_HAVE_GC #ifdef Py_TPFLAGS_HAVE_GC
PyObject_GC_Track(self); PyObject_GC_Track(self);
#else #else
@ -1074,6 +1100,7 @@ xmlparse_dealloc(xmlparseobject *self)
} }
free(self->handlers); free(self->handlers);
} }
Py_XDECREF(self->intern);
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6 #if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
/* Code for versions before 1.6 */ /* Code for versions before 1.6 */
free(self); free(self);
@ -1118,6 +1145,16 @@ xmlparse_getattr(xmlparseobject *self, char *name)
return PyInt_FromLong((long) self->returns_unicode); return PyInt_FromLong((long) self->returns_unicode);
if (strcmp(name, "specified_attributes") == 0) if (strcmp(name, "specified_attributes") == 0)
return PyInt_FromLong((long) self->specified_attributes); return PyInt_FromLong((long) self->specified_attributes);
if (strcmp(name, "intern") == 0) {
if (self->intern == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
else {
Py_INCREF(self->intern);
return self->intern;
}
}
handlernum = handlername2int(name); handlernum = handlername2int(name);
@ -1138,6 +1175,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)
PyList_Append(rc, PyString_FromString("ordered_attributes")); PyList_Append(rc, PyString_FromString("ordered_attributes"));
PyList_Append(rc, PyString_FromString("returns_unicode")); PyList_Append(rc, PyString_FromString("returns_unicode"));
PyList_Append(rc, PyString_FromString("specified_attributes")); PyList_Append(rc, PyString_FromString("specified_attributes"));
PyList_Append(rc, PyString_FromString("intern"));
return rc; return rc;
} }
@ -1221,6 +1259,8 @@ static int
xmlparse_clear(xmlparseobject *op) xmlparse_clear(xmlparseobject *op)
{ {
clear_handlers(op, 0); clear_handlers(op, 0);
Py_XDECREF(op->intern);
op->intern = 0;
return 0; return 0;
} }
#endif #endif
@ -1275,10 +1315,14 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
{ {
char *encoding = NULL; char *encoding = NULL;
char *namespace_separator = NULL; char *namespace_separator = NULL;
static char *kwlist[] = {"encoding", "namespace_separator", NULL}; PyObject *intern = NULL;
PyObject *result;
int intern_decref = 0;
static char *kwlist[] = {"encoding", "namespace_separator",
"intern", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist, if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
&encoding, &namespace_separator)) &encoding, &namespace_separator, &intern))
return NULL; return NULL;
if (namespace_separator != NULL if (namespace_separator != NULL
&& strlen(namespace_separator) > 1) { && strlen(namespace_separator) > 1) {
@ -1287,7 +1331,26 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
" character, omitted, or None"); " character, omitted, or None");
return NULL; return NULL;
} }
return newxmlparseobject(encoding, namespace_separator); /* Explicitly passing None means no interning is desired.
Not passing anything means that a new dictionary is used. */
if (intern == Py_None)
intern = NULL;
else if (intern == NULL) {
intern = PyDict_New();
if (!intern)
return NULL;
intern_decref = 1;
}
else if (!PyDict_Check(intern)) {
PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
return NULL;
}
result = newxmlparseobject(encoding, namespace_separator, intern);
if (intern_decref) {
Py_DECREF(intern);
}
return result;
} }
PyDoc_STRVAR(pyexpat_ErrorString__doc__, PyDoc_STRVAR(pyexpat_ErrorString__doc__,