Commit 91c8f590 authored by Guido van Rossum's avatar Guido van Rossum

Sjoerd Mullender:

- Fixed a bug where a syntax error was reported when a document
  started with white space.  (White space at the start of a document
  is valid if there is no XML declaration.)
- Improved the speed quite a bit for documents that don't make use of
  namespaces.
parent 5bc4abe0
...@@ -100,6 +100,7 @@ class XMLParser: ...@@ -100,6 +100,7 @@ class XMLParser:
self.__at_start = 1 self.__at_start = 1
self.__seen_doctype = None self.__seen_doctype = None
self.__seen_starttag = 0 self.__seen_starttag = 0
self.__use_namespaces = 0
self.__namespaces = {'xml':None} # xml is implicitly declared self.__namespaces = {'xml':None} # xml is implicitly declared
# For derived classes only -- enter literal mode (CDATA) till EOF # For derived classes only -- enter literal mode (CDATA) till EOF
...@@ -183,10 +184,10 @@ class XMLParser: ...@@ -183,10 +184,10 @@ class XMLParser:
else: else:
j = n j = n
if i < j: if i < j:
if self.__at_start: data = rawdata[i:j]
if self.__at_start and space.match(data) is None:
self.syntax_error('illegal data at start of file') self.syntax_error('illegal data at start of file')
self.__at_start = 0 self.__at_start = 0
data = rawdata[i:j]
if not self.stack and space.match(data) is None: if not self.stack and space.match(data) is None:
self.syntax_error('data not in content') self.syntax_error('data not in content')
if illegal.search(data): if illegal.search(data):
...@@ -439,6 +440,7 @@ class XMLParser: ...@@ -439,6 +440,7 @@ class XMLParser:
name = res.group(0) name = res.group(0)
if name == 'xml:namespace': if name == 'xml:namespace':
self.syntax_error('old-fashioned namespace declaration') self.syntax_error('old-fashioned namespace declaration')
self.__use_namespaces = -1
# namespace declaration # namespace declaration
# this must come after the <?xml?> declaration (if any) # this must come after the <?xml?> declaration (if any)
# and before the <!DOCTYPE> (if any). # and before the <!DOCTYPE> (if any).
...@@ -489,6 +491,8 @@ class XMLParser: ...@@ -489,6 +491,8 @@ class XMLParser:
# namespace declaration # namespace declaration
ncname = res.group('ncname') ncname = res.group('ncname')
namespace[ncname or ''] = attrvalue or None namespace[ncname or ''] = attrvalue or None
if not self.__use_namespaces:
self.__use_namespaces = len(self.stack)+1
continue continue
if '<' in attrvalue: if '<' in attrvalue:
self.syntax_error("`<' illegal in attribute value") self.syntax_error("`<' illegal in attribute value")
...@@ -518,7 +522,10 @@ class XMLParser: ...@@ -518,7 +522,10 @@ class XMLParser:
k, j = tag.span('attrs') k, j = tag.span('attrs')
attrdict, nsdict, k = self.parse_attributes(tagname, k, j) attrdict, nsdict, k = self.parse_attributes(tagname, k, j)
self.stack.append((tagname, nsdict, nstag)) self.stack.append((tagname, nsdict, nstag))
res = qname.match(tagname) if self.__use_namespaces:
res = qname.match(tagname)
else:
res = None
if res is not None: if res is not None:
prefix, nstag = res.group('prefix', 'local') prefix, nstag = res.group('prefix', 'local')
if prefix is None: if prefix is None:
...@@ -535,27 +542,28 @@ class XMLParser: ...@@ -535,27 +542,28 @@ class XMLParser:
nstag = prefix + ':' + nstag # undo split nstag = prefix + ':' + nstag # undo split
self.stack[-1] = tagname, nsdict, nstag self.stack[-1] = tagname, nsdict, nstag
# translate namespace of attributes # translate namespace of attributes
nattrdict = {} if self.__use_namespaces:
for key, val in attrdict.items(): nattrdict = {}
res = qname.match(key) for key, val in attrdict.items():
if res is not None: res = qname.match(key)
aprefix, key = res.group('prefix', 'local') if res is not None:
if aprefix is None: aprefix, key = res.group('prefix', 'local')
aprefix = '' if aprefix is None:
ans = None aprefix = ''
for t, d, nst in self.stack: ans = None
if d.has_key(aprefix): for t, d, nst in self.stack:
ans = d[aprefix] if d.has_key(aprefix):
if ans is None and aprefix != '': ans = d[aprefix]
ans = self.__namespaces.get(aprefix) if ans is None and aprefix != '':
if ans is not None: ans = self.__namespaces.get(aprefix)
key = ans + ' ' + key if ans is not None:
elif aprefix != '': key = ans + ' ' + key
key = aprefix + ':' + key elif aprefix != '':
elif ns is not None: key = aprefix + ':' + key
key = ns + ' ' + key elif ns is not None:
nattrdict[key] = val key = ns + ' ' + key
attrdict = nattrdict nattrdict[key] = val
attrdict = nattrdict
attributes = self.attributes.get(nstag) attributes = self.attributes.get(nstag)
if attributes is not None: if attributes is not None:
for key in attrdict.keys(): for key in attrdict.keys():
...@@ -634,6 +642,8 @@ class XMLParser: ...@@ -634,6 +642,8 @@ class XMLParser:
self.handle_endtag(nstag, method) self.handle_endtag(nstag, method)
else: else:
self.unknown_endtag(nstag) self.unknown_endtag(nstag)
if self.__use_namespaces == len(self.stack):
self.__use_namespaces = 0
del self.stack[-1] del self.stack[-1]
# Overridable -- handle xml processing instruction # Overridable -- handle xml processing instruction
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment