Commit 2a002013 authored by Fred Drake's avatar Fred Drake

Add support for reasonable closing of open elements for which end tags
are considered optional.  This does not always place the closing tags
as a human editor would, but makes the structure correct.

Removed support for the empty start & end tags which are no longer reported
by nsgmllib (an SGML feature not used with HTML).
parent d12ac69e
......@@ -7,15 +7,34 @@ BOOLEAN_HTML_ATTRS = [
"compact", "nowrap", "ismap", "declare", "noshade", "checked",
"disabled", "readonly", "multiple", "selected", "noresize",
"defer"
]
]
EMPTY_HTML_TAGS = [
# List of HTML tags with an empty content model; these are
# rendered in minimized form, e.g. <img />.
# From http://www.w3.org/TR/xhtml1/#dtds
"base", "meta", "link", "hr", "br", "param", "img", "area",
"input", "col", "basefont", "isindex", "frame",
]
"input", "col", "basefont", "isindex", "frame",
]
PARA_LEVEL_HTML_TAGS = [
# List of HTML elements that close open paragraph-level elements
# and are themselves paragraph-level.
"h1", "h2", "h3", "h4", "h5", "h6", "p",
]
CLOSING_BLOCK_LEVEL_HTML_TAGS = [
# These are HTML tags that close others in this list, but are not
# closed by paragraph-level tags. They don't close across other
# block-level boundaries.
"li", "dt", "dd", "td", "th", "tr",
]
BLOCK_LEVEL_HTML_TAGS = [
# List of HTML tags that denote larger sections than paragraphs.
"blockquote", "table", "tr", "th", "td", "thead", "tfoot", "tbody",
"noframe", "ul", "ol", "li", "dl", "dt", "dd", "div",
]
from TALGenerator import TALGenerator
......@@ -40,8 +59,8 @@ class HTMLTALParser(SGMLParser):
self.close()
while self.tagstack:
self.finish_endtag(None)
assert self.tagstack == []
assert self.nsstack == []
assert self.tagstack == [], self.tagstack
assert self.nsstack == [], self.nsstack
assert self.nsdict == {}, self.nsdict
def getCode(self):
......@@ -68,21 +87,46 @@ class HTMLTALParser(SGMLParser):
def finish_starttag(self, tag, attrs):
self.scan_xmlns(attrs)
if tag not in EMPTY_HTML_TAGS:
if tag in EMPTY_HTML_TAGS:
print "<%s>" % tag
self.pop_xmlns()
elif tag in CLOSING_BLOCK_LEVEL_HTML_TAGS:
close_to = -1
for i in range(len(self.tagstack)):
t = self.tagstack[i]
if t in CLOSING_BLOCK_LEVEL_HTML_TAGS:
close_to = i
elif t in BLOCK_LEVEL_HTML_TAGS:
close_to = -1
self._close_to_level(close_to)
self.tagstack.append(tag)
elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
close_to = -1
for i in range(len(self.tagstack)):
if self.tagstack[i] in BLOCK_LEVEL_HTML_TAGS:
close_to = -1
elif self.tagstack[i] in PARA_LEVEL_HTML_TAGS:
if close_to == -1:
close_to = i
self.tagstack.append(tag)
self._close_to_level(close_to)
else:
self.pop_xmlns()
self.tagstack.append(tag)
self.gen.emitStartTag(tag, attrs)
def _close_to_level(self, close_to):
if close_to > -1:
closing = self.tagstack[close_to:]
closing.reverse()
for t in closing:
self.finish_endtag(t)
def finish_endtag(self, tag):
if tag not in EMPTY_HTML_TAGS:
if not tag:
tag = self.tagstack.pop()
else:
assert tag in self.tagstack
while self.tagstack[-1] != tag:
self.finish_endtag(None)
self.tagstack.pop()
assert tag in self.tagstack
while self.tagstack[-1] != tag:
self.finish_endtag(self.tagstack[-1])
self.tagstack.pop()
self.pop_xmlns()
self.gen.emitEndTag(tag)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment