Commit ce56c377 authored by Mark Hammond's avatar Mark Hammond

When bad HTML is encountered, ignore the page rather than failing with

a traceback.
parent 05595e9d
...@@ -400,7 +400,15 @@ class Checker: ...@@ -400,7 +400,15 @@ class Checker:
if local_fragment and self.nonames: if local_fragment and self.nonames:
self.markdone(url_pair) self.markdone(url_pair)
return return
page = self.getpage(url_pair) try:
page = self.getpage(url_pair)
except sgmllib.SGMLParseError, msg:
msg = self.sanitize(msg)
self.note(0, "Error parsing %s: %s",
self.format_url(url_pair), msg)
# Dont actually mark the URL as bad - it exists, just
# we can't parse it!
page = None
if page: if page:
# Store the page which corresponds to this URL. # Store the page which corresponds to this URL.
self.name_table[url] = page self.name_table[url] = page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment