Commit dccc3d47 authored by Sjoerd Mullender's avatar Sjoerd Mullender

Better error handling of bad entity references. Before when an & in

an attribute value was not escaped, you could get two syntax errors:
one about a missing semicolon and one about an unknown entity.  Now
you get only one about a bogus ampersand.
parent 43d2f758
...@@ -181,37 +181,46 @@ class XMLParser: ...@@ -181,37 +181,46 @@ class XMLParser:
res = amp.search(data, i) res = amp.search(data, i)
if res is None: if res is None:
return data return data
res = ref.match(data, res.start(0)) s = res.start(0)
res = ref.match(data, s)
if res is None: if res is None:
self.syntax_error("bogus `&'") self.syntax_error("bogus `&'")
i =i+1 i = s+1
continue continue
i = res.end(0) i = res.end(0)
if data[i - 1] != ';':
self.syntax_error("`;' missing after entity/char reference")
i = i-1
str = res.group(1) str = res.group(1)
pre = data[:res.start(0)] rescan = 0
post = data[i:]
if str[0] == '#': if str[0] == '#':
if str[1] == 'x': if str[1] == 'x':
str = chr(string.atoi(str[2:], 16)) str = chr(string.atoi(str[2:], 16))
else: else:
str = chr(string.atoi(str[1:])) str = chr(string.atoi(str[1:]))
data = pre + str + post if data[i - 1] != ';':
i = res.start(0)+len(str) self.syntax_error("`;' missing after char reference")
i = i-1
elif all: elif all:
if self.entitydefs.has_key(str): if self.entitydefs.has_key(str):
data = pre + self.entitydefs[str] + post str = self.entitydefs[str]
i = res.start(0) # rescan substituted text rescan = 1
elif data[i - 1] != ';':
self.syntax_error("bogus `&'")
i = s + 1 # just past the &
continue
else: else:
self.syntax_error("reference to unknown entity `&%s;'" % str) self.syntax_error("reference to unknown entity `&%s;'" % str)
# can't do it, so keep the entity ref in str = '&' + str + ';'
data = pre + '&' + str + ';' + post elif data[i - 1] != ';':
i = res.start(0) + len(str) + 2 self.syntax_error("bogus `&'")
i = s + 1 # just past the &
continue
# when we get here, str contains the translated text and i points
# to the end of the string that is to be replaced
data = data[:s] + str + data[i:]
if rescan:
i = s
else: else:
# just translating character references i = s + len(str)
pass # i is already postioned correctly
# Internal -- handle data as far as reasonable. May leave state # Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is # and data to be processed by a subsequent call. If 'end' is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment