Commit 52467b16 authored by Eli Bendersky's avatar Eli Bendersky

Issue #14007: make XMLParser a real subclassable type exported from _elementtree. +cleanups

parent 7e0229e9
...@@ -646,8 +646,8 @@ ElementTree Objects ...@@ -646,8 +646,8 @@ ElementTree Objects
Loads an external XML section into this element tree. *source* is a file Loads an external XML section into this element tree. *source* is a file
name or :term:`file object`. *parser* is an optional parser instance. name or :term:`file object`. *parser* is an optional parser instance.
If not given, the standard XMLParser parser is used. Returns the section If not given, the standard :class:`XMLParser` parser is used. Returns the
root element. section root element.
.. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml") .. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml")
...@@ -767,9 +767,9 @@ XMLParser Objects ...@@ -767,9 +767,9 @@ XMLParser Objects
:class:`Element` structure builder for XML source data, based on the expat :class:`Element` structure builder for XML source data, based on the expat
parser. *html* are predefined HTML entities. This flag is not supported by parser. *html* are predefined HTML entities. This flag is not supported by
the current implementation. *target* is the target object. If omitted, the the current implementation. *target* is the target object. If omitted, the
builder uses an instance of the standard TreeBuilder class. *encoding* [1]_ builder uses an instance of the standard :class:`TreeBuilder` class.
is optional. If given, the value overrides the encoding specified in the *encoding* [1]_ is optional. If given, the value overrides the encoding
XML file. specified in the XML file.
.. method:: close() .. method:: close()
......
...@@ -2028,6 +2028,34 @@ class TreeBuilderTest(unittest.TestCase): ...@@ -2028,6 +2028,34 @@ class TreeBuilderTest(unittest.TestCase):
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
class XMLParserTest(unittest.TestCase):
sample1 = '<file><line>22</line></file>'
def _check_sample_element(self, e):
self.assertEqual(e.tag, 'file')
self.assertEqual(e[0].tag, 'line')
self.assertEqual(e[0].text, '22')
def test_constructor_args(self):
# Positional args. The first (html) is not supported, but should be
# nevertheless correctly accepted.
parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
parser.feed(self.sample1)
self._check_sample_element(parser.close())
# Now as keyword args.
parser2 = ET.XMLParser(encoding='utf-8', html=[{}], target=ET.TreeBuilder())
parser2.feed(self.sample1)
self._check_sample_element(parser2.close())
def test_subclass(self):
class MyParser(ET.XMLParser):
pass
parser = MyParser()
parser.feed(self.sample1)
self._check_sample_element(parser.close())
class NoAcceleratorTest(unittest.TestCase): class NoAcceleratorTest(unittest.TestCase):
# Test that the C accelerator was not imported for pyET # Test that the C accelerator was not imported for pyET
def test_correct_import_pyET(self): def test_correct_import_pyET(self):
...@@ -2245,6 +2273,7 @@ def test_main(module=pyET): ...@@ -2245,6 +2273,7 @@ def test_main(module=pyET):
ElementTreeTest, ElementTreeTest,
NamespaceParseTest, NamespaceParseTest,
TreeBuilderTest, TreeBuilderTest,
XMLParserTest,
KeywordArgsTest] KeywordArgsTest]
if module is pyET: if module is pyET:
# Run the tests specific to the Python implementation # Run the tests specific to the Python implementation
......
...@@ -2257,6 +2257,9 @@ static struct PyExpat_CAPI* expat_capi; ...@@ -2257,6 +2257,9 @@ static struct PyExpat_CAPI* expat_capi;
#define EXPAT(func) (XML_##func) #define EXPAT(func) (XML_##func)
#endif #endif
static XML_Memory_Handling_Suite ExpatMemoryHandler = {
PyObject_Malloc, PyObject_Realloc, PyObject_Free};
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
...@@ -2671,121 +2674,125 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, ...@@ -2671,121 +2674,125 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
} }
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* constructor and destructor */
static PyObject* static PyObject *
xmlparser(PyObject* self_, PyObject* args, PyObject* kw) xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{ {
XMLParserObject* self; XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
/* FIXME: does this need to be static? */ if (self) {
static XML_Memory_Handling_Suite memory_handler; self->parser = NULL;
self->target = self->entity = self->names = NULL;
PyObject* target = NULL; self->handle_start = self->handle_data = self->handle_end = NULL;
char* encoding = NULL; self->handle_comment = self->handle_pi = self->handle_close = NULL;
static char* kwlist[] = { "target", "encoding", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
&target, &encoding))
return NULL;
#if defined(USE_PYEXPAT_CAPI)
if (!expat_capi) {
PyErr_SetString(
PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
);
return NULL;
} }
#endif return (PyObject *)self;
}
self = PyObject_New(XMLParserObject, &XMLParser_Type); static int
if (self == NULL) xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
return NULL; {
XMLParserObject *self_xp = (XMLParserObject *)self;
PyObject *target = NULL, *html = NULL;
char *encoding = NULL;
static char *kwlist[] = {"html", "target", "encoding"};
self->entity = PyDict_New(); if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
if (!self->entity) { &html, &target, &encoding)) {
PyObject_Del(self); return -1;
return NULL;
} }
self->names = PyDict_New(); self_xp->entity = PyDict_New();
if (!self->names) { if (!self_xp->entity)
PyObject_Del(self->entity); return -1;
PyObject_Del(self);
return NULL;
}
memory_handler.malloc_fcn = PyObject_Malloc; self_xp->names = PyDict_New();
memory_handler.realloc_fcn = PyObject_Realloc; if (!self_xp->names) {
memory_handler.free_fcn = PyObject_Free; Py_XDECREF(self_xp->entity);
return -1;
}
self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}"); self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
if (!self->parser) { if (!self_xp->parser) {
PyObject_Del(self->names); Py_XDECREF(self_xp->entity);
PyObject_Del(self->entity); Py_XDECREF(self_xp->names);
PyObject_Del(self);
PyErr_NoMemory(); PyErr_NoMemory();
return NULL; return -1;
} }
/* setup target handlers */ if (target) {
if (!target) { Py_INCREF(target);
} else {
target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
if (!target) { if (!target) {
EXPAT(ParserFree)(self->parser); Py_XDECREF(self_xp->entity);
PyObject_Del(self->names); Py_XDECREF(self_xp->names);
PyObject_Del(self->entity); EXPAT(ParserFree)(self_xp->parser);
PyObject_Del(self); return -1;
return NULL;
} }
} else }
Py_INCREF(target); self_xp->target = target;
self->target = target;
self->handle_start = PyObject_GetAttrString(target, "start"); self_xp->handle_start = PyObject_GetAttrString(target, "start");
self->handle_data = PyObject_GetAttrString(target, "data"); self_xp->handle_data = PyObject_GetAttrString(target, "data");
self->handle_end = PyObject_GetAttrString(target, "end"); self_xp->handle_end = PyObject_GetAttrString(target, "end");
self->handle_comment = PyObject_GetAttrString(target, "comment"); self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
self->handle_pi = PyObject_GetAttrString(target, "pi"); self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
self->handle_close = PyObject_GetAttrString(target, "close"); self_xp->handle_close = PyObject_GetAttrString(target, "close");
PyErr_Clear(); PyErr_Clear();
/* configure parser */ /* configure parser */
EXPAT(SetUserData)(self->parser, self); EXPAT(SetUserData)(self_xp->parser, self_xp);
EXPAT(SetElementHandler)( EXPAT(SetElementHandler)(
self->parser, self_xp->parser,
(XML_StartElementHandler) expat_start_handler, (XML_StartElementHandler) expat_start_handler,
(XML_EndElementHandler) expat_end_handler (XML_EndElementHandler) expat_end_handler
); );
EXPAT(SetDefaultHandlerExpand)( EXPAT(SetDefaultHandlerExpand)(
self->parser, self_xp->parser,
(XML_DefaultHandler) expat_default_handler (XML_DefaultHandler) expat_default_handler
); );
EXPAT(SetCharacterDataHandler)( EXPAT(SetCharacterDataHandler)(
self->parser, self_xp->parser,
(XML_CharacterDataHandler) expat_data_handler (XML_CharacterDataHandler) expat_data_handler
); );
if (self->handle_comment) if (self_xp->handle_comment)
EXPAT(SetCommentHandler)( EXPAT(SetCommentHandler)(
self->parser, self_xp->parser,
(XML_CommentHandler) expat_comment_handler (XML_CommentHandler) expat_comment_handler
); );
if (self->handle_pi) if (self_xp->handle_pi)
EXPAT(SetProcessingInstructionHandler)( EXPAT(SetProcessingInstructionHandler)(
self->parser, self_xp->parser,
(XML_ProcessingInstructionHandler) expat_pi_handler (XML_ProcessingInstructionHandler) expat_pi_handler
); );
EXPAT(SetUnknownEncodingHandler)( EXPAT(SetUnknownEncodingHandler)(
self->parser, self_xp->parser,
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
); );
ALLOC(sizeof(XMLParserObject), "create expatparser"); return 0;
}
return (PyObject*) self; static int
xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->handle_close);
Py_VISIT(self->handle_pi);
Py_VISIT(self->handle_comment);
Py_VISIT(self->handle_end);
Py_VISIT(self->handle_data);
Py_VISIT(self->handle_start);
Py_VISIT(self->target);
Py_VISIT(self->entity);
Py_VISIT(self->names);
return 0;
} }
static void static int
xmlparser_dealloc(XMLParserObject* self) xmlparser_gc_clear(XMLParserObject *self)
{ {
EXPAT(ParserFree)(self->parser); EXPAT(ParserFree)(self->parser);
...@@ -2796,17 +2803,20 @@ xmlparser_dealloc(XMLParserObject* self) ...@@ -2796,17 +2803,20 @@ xmlparser_dealloc(XMLParserObject* self)
Py_XDECREF(self->handle_data); Py_XDECREF(self->handle_data);
Py_XDECREF(self->handle_start); Py_XDECREF(self->handle_start);
Py_DECREF(self->target); Py_XDECREF(self->target);
Py_DECREF(self->entity); Py_XDECREF(self->entity);
Py_DECREF(self->names); Py_XDECREF(self->names);
RELEASE(sizeof(XMLParserObject), "destroy expatparser"); return 0;
PyObject_Del(self);
} }
/* -------------------------------------------------------------------- */ static void
/* methods (in alphabetical order) */ xmlparser_dealloc(XMLParserObject* self)
{
PyObject_GC_UnTrack(self);
xmlparser_gc_clear(self);
Py_TYPE(self)->tp_free((PyObject *)self);
}
LOCAL(PyObject*) LOCAL(PyObject*)
expat_parse(XMLParserObject* self, char* data, int data_len, int final) expat_parse(XMLParserObject* self, char* data, int data_len, int final)
...@@ -3098,16 +3108,27 @@ static PyTypeObject XMLParser_Type = { ...@@ -3098,16 +3108,27 @@ static PyTypeObject XMLParser_Type = {
(getattrofunc)xmlparser_getattro, /* tp_getattro */ (getattrofunc)xmlparser_getattro, /* tp_getattro */
0, /* tp_setattro */ 0, /* tp_setattro */
0, /* tp_as_buffer */ 0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
/* tp_flags */
0, /* tp_doc */ 0, /* tp_doc */
0, /* tp_traverse */ (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
0, /* tp_clear */ (inquiry)xmlparser_gc_clear, /* tp_clear */
0, /* tp_richcompare */ 0, /* tp_richcompare */
0, /* tp_weaklistoffset */ 0, /* tp_weaklistoffset */
0, /* tp_iter */ 0, /* tp_iter */
0, /* tp_iternext */ 0, /* tp_iternext */
xmlparser_methods, /* tp_methods */ xmlparser_methods, /* tp_methods */
0, /* tp_members */ 0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)xmlparser_init, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
xmlparser_new, /* tp_new */
0, /* tp_free */
}; };
#endif #endif
...@@ -3117,9 +3138,6 @@ static PyTypeObject XMLParser_Type = { ...@@ -3117,9 +3138,6 @@ static PyTypeObject XMLParser_Type = {
static PyMethodDef _functions[] = { static PyMethodDef _functions[] = {
{"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
#if defined(USE_EXPAT)
{"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
#endif
{NULL, NULL} {NULL, NULL}
}; };
...@@ -3214,9 +3232,16 @@ PyInit__elementtree(void) ...@@ -3214,9 +3232,16 @@ PyInit__elementtree(void)
expat_capi->size < sizeof(struct PyExpat_CAPI) || expat_capi->size < sizeof(struct PyExpat_CAPI) ||
expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
expat_capi->MINOR_VERSION != XML_MINOR_VERSION || expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
expat_capi->MICRO_VERSION != XML_MICRO_VERSION) expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
expat_capi = NULL; expat_capi = NULL;
} }
}
if (!expat_capi) {
PyErr_SetString(
PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
);
return NULL;
}
#endif #endif
elementtree_parseerror_obj = PyErr_NewException( elementtree_parseerror_obj = PyErr_NewException(
...@@ -3231,5 +3256,10 @@ PyInit__elementtree(void) ...@@ -3231,5 +3256,10 @@ PyInit__elementtree(void)
Py_INCREF((PyObject *)&TreeBuilder_Type); Py_INCREF((PyObject *)&TreeBuilder_Type);
PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type); PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
#if defined(USE_EXPAT)
Py_INCREF((PyObject *)&XMLParser_Type);
PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
#endif
return m; return m;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment