Commit e9927e18 authored by Stefan Behnel's avatar Stefan Behnel Committed by GitHub

bpo-30485: support a default prefix mapping in ElementPath by passing None as prefix (#1823)

parent ffca16e2
...@@ -764,7 +764,8 @@ Element Objects ...@@ -764,7 +764,8 @@ Element Objects
Finds the first subelement matching *match*. *match* may be a tag name Finds the first subelement matching *match*. *match* may be a tag name
or a :ref:`path <elementtree-xpath>`. Returns an element instance or a :ref:`path <elementtree-xpath>`. Returns an element instance
or ``None``. *namespaces* is an optional mapping from namespace prefix or ``None``. *namespaces* is an optional mapping from namespace prefix
to full name. to full name. Pass ``None`` as prefix to move all unprefixed tag names
in the expression into the given namespace.
.. method:: findall(match, namespaces=None) .. method:: findall(match, namespaces=None)
...@@ -772,7 +773,8 @@ Element Objects ...@@ -772,7 +773,8 @@ Element Objects
Finds all matching subelements, by tag name or Finds all matching subelements, by tag name or
:ref:`path <elementtree-xpath>`. Returns a list containing all matching :ref:`path <elementtree-xpath>`. Returns a list containing all matching
elements in document order. *namespaces* is an optional mapping from elements in document order. *namespaces* is an optional mapping from
namespace prefix to full name. namespace prefix to full name. Pass ``None`` as prefix to move all
unprefixed tag names in the expression into the given namespace.
.. method:: findtext(match, default=None, namespaces=None) .. method:: findtext(match, default=None, namespaces=None)
...@@ -782,7 +784,8 @@ Element Objects ...@@ -782,7 +784,8 @@ Element Objects
of the first matching element, or *default* if no element was found. of the first matching element, or *default* if no element was found.
Note that if the matching element has no text content an empty string Note that if the matching element has no text content an empty string
is returned. *namespaces* is an optional mapping from namespace prefix is returned. *namespaces* is an optional mapping from namespace prefix
to full name. to full name. Pass ``None`` as prefix to move all unprefixed tag names
in the expression into the given namespace.
.. method:: getchildren() .. method:: getchildren()
......
...@@ -2463,6 +2463,12 @@ class ElementFindTest(unittest.TestCase): ...@@ -2463,6 +2463,12 @@ class ElementFindTest(unittest.TestCase):
nsmap = {'xx': 'Y'} nsmap = {'xx': 'Y'}
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
nsmap = {'xx': 'X', None: 'Y'}
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
nsmap = {'xx': 'X', '': 'Y'}
with self.assertRaisesRegex(ValueError, 'namespace prefix'):
root.findall(".//xx:b", namespaces=nsmap)
def test_bad_find(self): def test_bad_find(self):
e = ET.XML(SAMPLE_XML) e = ET.XML(SAMPLE_XML)
......
...@@ -71,16 +71,22 @@ xpath_tokenizer_re = re.compile( ...@@ -71,16 +71,22 @@ xpath_tokenizer_re = re.compile(
) )
def xpath_tokenizer(pattern, namespaces=None): def xpath_tokenizer(pattern, namespaces=None):
default_namespace = namespaces.get(None) if namespaces else None
for token in xpath_tokenizer_re.findall(pattern): for token in xpath_tokenizer_re.findall(pattern):
tag = token[1] tag = token[1]
if tag and tag[0] != "{" and ":" in tag: if tag and tag[0] != "{":
try: if ":" in tag:
prefix, uri = tag.split(":", 1) prefix, uri = tag.split(":", 1)
try:
if not namespaces: if not namespaces:
raise KeyError raise KeyError
yield token[0], "{%s}%s" % (namespaces[prefix], uri) yield token[0], "{%s}%s" % (namespaces[prefix], uri)
except KeyError: except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
elif default_namespace:
yield token[0], "{%s}%s" % (default_namespace, tag)
else:
yield token
else: else:
yield token yield token
...@@ -264,10 +270,19 @@ class _SelectorContext: ...@@ -264,10 +270,19 @@ class _SelectorContext:
def iterfind(elem, path, namespaces=None): def iterfind(elem, path, namespaces=None):
# compile selector pattern # compile selector pattern
cache_key = (path, None if namespaces is None
else tuple(sorted(namespaces.items())))
if path[-1:] == "/": if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?) path = path + "*" # implicit all (FIXME: keep this?)
cache_key = (path,)
if namespaces:
if '' in namespaces:
raise ValueError("empty namespace prefix must be passed as None, not the empty string")
if None in namespaces:
cache_key += (namespaces[None],) + tuple(sorted(
item for item in namespaces.items() if item[0] is not None))
else:
cache_key += tuple(sorted(namespaces.items()))
try: try:
selector = _cache[cache_key] selector = _cache[cache_key]
except KeyError: except KeyError:
......
Path expressions in xml.etree.ElementTree can now avoid explicit namespace
prefixes for tags (or the "{namespace}tag" notation) by passing a default
namespace with a 'None' prefix.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment