Commit 062277d2 authored by Guido van Rossum's avatar Guido van Rossum

Pile of major changes -- the tests should all succeed again now:

- NestingError derives from HTMLParseError, and is hence simplified.

- Thread the input position through all the code generation routines;
  all compile-time exceptions now possess lineno and offset.

- Restructured the code that inserts implied end-tags, and made it
  generate output that is the same as the input more often.  This was
  the hardest to get right, and I expect to be working more on it.
parent 167a4f4e
...@@ -91,7 +91,7 @@ import string ...@@ -91,7 +91,7 @@ import string
from TALGenerator import TALGenerator from TALGenerator import TALGenerator
from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError from TALDefs import ZOPE_METAL_NS, ZOPE_TAL_NS, METALError, TALError
from HTMLParser import HTMLParser from HTMLParser import HTMLParser, HTMLParseError
BOOLEAN_HTML_ATTRS = [ BOOLEAN_HTML_ATTRS = [
# List of Boolean attributes in HTML that may be given in # List of Boolean attributes in HTML that may be given in
...@@ -135,18 +135,12 @@ TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS ...@@ -135,18 +135,12 @@ TIGHTEN_IMPLICIT_CLOSE_TAGS = (PARA_LEVEL_HTML_TAGS
+ BLOCK_CLOSING_TAG_MAP.keys()) + BLOCK_CLOSING_TAG_MAP.keys())
class NestingError(Exception): class NestingError(HTMLParseError):
"""Exception raised when elements aren't properly nested.""" """Exception raised when elements aren't properly nested."""
def __init__(self, tag, lineno, offset): def __init__(self, tag, position=(None, None)):
self.tag = tag self.tag = tag
self.lineno = lineno HTMLParseError.__init__(self, "unmatched </%s>" % tag, position)
self.offset = offset
def __str__(self):
s = "line %d, offset %d: unmatched </%s>" % (
self.lineno, self.offset, self.tag)
return s
class HTMLTALParser(HTMLParser): class HTMLTALParser(HTMLParser):
...@@ -156,7 +150,7 @@ class HTMLTALParser(HTMLParser): ...@@ -156,7 +150,7 @@ class HTMLTALParser(HTMLParser):
def __init__(self, gen=None): def __init__(self, gen=None):
HTMLParser.__init__(self) HTMLParser.__init__(self)
if gen is None: if gen is None:
gen = TALGenerator() gen = TALGenerator(xml=0)
self.gen = gen self.gen = gen
self.tagstack = [] self.tagstack = []
self.nsstack = [] self.nsstack = []
...@@ -172,7 +166,7 @@ class HTMLTALParser(HTMLParser): ...@@ -172,7 +166,7 @@ class HTMLTALParser(HTMLParser):
self.feed(data) self.feed(data)
self.close() self.close()
while self.tagstack: while self.tagstack:
self.finish_endtag(self.tagstack[-1]) self.implied_endtag(self.tagstack[-1], 2)
assert self.nsstack == [], self.nsstack assert self.nsstack == [], self.nsstack
assert self.nsdict == {}, self.nsdict assert self.nsdict == {}, self.nsdict
...@@ -182,12 +176,43 @@ class HTMLTALParser(HTMLParser): ...@@ -182,12 +176,43 @@ class HTMLTALParser(HTMLParser):
# Overriding HTMLParser methods # Overriding HTMLParser methods
def finish_starttag(self, tag, attrs): def finish_starttag(self, tag, attrs):
self.close_para_tags(tag)
self.tagstack.append(tag)
self.scan_xmlns(attrs)
attrlist, taldict, metaldict = self.extract_attrs(attrs)
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
self.getpos())
if tag in EMPTY_HTML_TAGS:
self.implied_endtag(tag, -1)
def finish_startendtag(self, tag, attrs):
self.close_para_tags(tag)
self.scan_xmlns(attrs) self.scan_xmlns(attrs)
attrlist, taldict, metaldict = self.extract_attrs(attrs)
if taldict.get("replace") or taldict.get("content"):
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
self.getpos())
self.gen.emitEndElement(tag)
else:
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
self.getpos(), isend=1)
self.pop_xmlns()
def finish_endtag(self, tag):
if tag in EMPTY_HTML_TAGS: if tag in EMPTY_HTML_TAGS:
self.pop_xmlns() # </img> etc. in the source is an error
elif BLOCK_CLOSING_TAG_MAP.has_key(tag): raise NestingError(tag, self.getpos())
self.close_enclosed_tags(tag)
self.gen.emitEndElement(tag)
self.pop_xmlns()
self.tagstack.pop()
def close_para_tags(self, tag):
if tag in EMPTY_HTML_TAGS:
return
close_to = -1
if BLOCK_CLOSING_TAG_MAP.has_key(tag):
blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag] blocks_to_close = BLOCK_CLOSING_TAG_MAP[tag]
close_to = -1
for i in range(len(self.tagstack)): for i in range(len(self.tagstack)):
t = self.tagstack[i] t = self.tagstack[i]
if t in blocks_to_close: if t in blocks_to_close:
...@@ -195,51 +220,39 @@ class HTMLTALParser(HTMLParser): ...@@ -195,51 +220,39 @@ class HTMLTALParser(HTMLParser):
close_to = i close_to = i
elif t in BLOCK_LEVEL_HTML_TAGS: elif t in BLOCK_LEVEL_HTML_TAGS:
close_to = -1 close_to = -1
self.close_to_level(close_to)
self.tagstack.append(tag)
elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS: elif tag in PARA_LEVEL_HTML_TAGS + BLOCK_LEVEL_HTML_TAGS:
close_to = -1
for i in range(len(self.tagstack)): for i in range(len(self.tagstack)):
if self.tagstack[i] in BLOCK_LEVEL_HTML_TAGS: if self.tagstack[i] in BLOCK_LEVEL_HTML_TAGS:
close_to = -1 close_to = -1
elif self.tagstack[i] in PARA_LEVEL_HTML_TAGS: elif self.tagstack[i] in PARA_LEVEL_HTML_TAGS:
if close_to == -1: if close_to == -1:
close_to = i close_to = i
self.close_to_level(close_to) if close_to >= 0:
self.tagstack.append(tag) while len(self.tagstack) > close_to:
else: self.implied_endtag(self.tagstack[-1], 1)
self.tagstack.append(tag)
attrlist, taldict, metaldict = self.extract_attrs(attrs)
self.gen.emitStartElement(tag, attrlist, taldict, metaldict,
self.getpos())
def finish_endtag(self, tag, implied=0): def close_enclosed_tags(self, tag):
if tag in EMPTY_HTML_TAGS:
return
if tag not in self.tagstack: if tag not in self.tagstack:
lineno, offset = self.getpos() raise NestingError(tag, self.getpos())
raise NestingError(tag, lineno, offset) while tag != self.tagstack[-1]:
while self.tagstack[-1] != tag: self.implied_endtag(self.tagstack[-1], 1)
self.finish_endtag(self.tagstack[-1], implied=1) assert self.tagstack[-1] == tag
def implied_endtag(self, tag, implied):
assert tag == self.tagstack[-1]
assert implied in (-1, 1, 2)
if implied > 0:
if tag in TIGHTEN_IMPLICIT_CLOSE_TAGS:
# Pick out trailing whitespace from the program, and
# insert the close tag before the whitespace.
white = self.gen.unEmitWhitespace()
self.gen.emitEndElement(tag)
if white:
self.gen.emitRawText(white)
else:
self.gen.emitEndElement(tag)
self.tagstack.pop() self.tagstack.pop()
self.pop_xmlns() self.pop_xmlns()
if implied \
and tag in TIGHTEN_IMPLICIT_CLOSE_TAGS \
and self.gen.program \
and self.gen.program[-1][0] == "rawtext":
# Pick out trailing whitespace from the last instruction,
# if it was a "rawtext" instruction, and insert the close
# tag before the whitespace.
data = self.gen.program.pop()[1]
prefix = string.rstrip(data)
white = data[len(prefix):]
if data:
self.gen.emitRawText(prefix)
self.gen.emitEndElement(tag)
if white:
self.gen.emitRawText(white)
else:
self.gen.emitEndElement(tag)
def handle_charref(self, name): def handle_charref(self, name):
self.gen.emitRawText("&#%s;" % name) self.gen.emitRawText("&#%s;" % name)
...@@ -301,10 +314,3 @@ class HTMLTALParser(HTMLParser): ...@@ -301,10 +314,3 @@ class HTMLTALParser(HTMLParser):
taldict[suffix] = value taldict[suffix] = value
attrlist.append(item) attrlist.append(item)
return attrlist, taldict, metaldict return attrlist, taldict, metaldict
def close_to_level(self, close_to):
if close_to > -1:
closing = self.tagstack[close_to:]
closing.reverse()
for t in closing:
self.finish_endtag(t, implied=1)
...@@ -80,20 +80,12 @@ TO DO ...@@ -80,20 +80,12 @@ TO DO
- Bring DummyEngine.py up to specs. - Bring DummyEngine.py up to specs.
- Disallow TAL on start-tags whose end-tag is implied.
- The call to emitSubstitution() in emitEndElement() for "replace"
doesn't pass in anything for attrDict.
- Finish implementing insertStructure(): attribute replacement isn't - Finish implementing insertStructure(): attribute replacement isn't
implemented yet. implemented yet.
- TALInterpreter currently always uses an XML parser to parse inserted - TALInterpreter currently always uses an XML parser to parse inserted
structure; it should use a parser appropriate to the mode. structure; it should use a parser appropriate to the mode.
- Incorporate line number and offset information into remaining
compile-time exceptions.
- HTMLTALParser.py and TALParser.py are silly names. Should be - HTMLTALParser.py and TALParser.py are silly names. Should be
HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened, HTMLTALCompiler.py and XMLTALCompiler.py (or maybe shortened,
without "TAL"?) without "TAL"?)
......
...@@ -99,7 +99,7 @@ class DummyCompiler: ...@@ -99,7 +99,7 @@ class DummyCompiler:
class TALGenerator: class TALGenerator:
def __init__(self, expressionCompiler=None): def __init__(self, expressionCompiler=None, xml=1):
if not expressionCompiler: if not expressionCompiler:
expressionCompiler = DummyCompiler() expressionCompiler = DummyCompiler()
self.expressionCompiler = expressionCompiler self.expressionCompiler = expressionCompiler
...@@ -109,6 +109,7 @@ class TALGenerator: ...@@ -109,6 +109,7 @@ class TALGenerator:
self.macros = {} self.macros = {}
self.slots = {} self.slots = {}
self.slotStack = [] self.slotStack = []
self.xml = xml
def getCode(self): def getCode(self):
return self.optimize(self.program), self.macros return self.optimize(self.program), self.macros
...@@ -186,11 +187,15 @@ class TALGenerator: ...@@ -186,11 +187,15 @@ class TALGenerator:
def emit(self, *instruction): def emit(self, *instruction):
self.program.append(instruction) self.program.append(instruction)
def emitStartTag(self, name, attrlist): def emitStartTag(self, name, attrlist, isend=0):
self.program.append(("startTag", name, attrlist)) if isend:
opcode = "startEndTag"
else:
opcode = "startTag"
self.program.append((opcode, name, attrlist))
def emitEndTag(self, name): def emitEndTag(self, name):
if self.program and self.program[-1][0] == "startTag": if self.xml and self.program and self.program[-1][0] == "startTag":
# Minimize empty element # Minimize empty element
self.program[-1] = ("startEndTag",) + self.program[-1][1:] self.program[-1] = ("startEndTag",) + self.program[-1][1:]
else: else:
...@@ -207,8 +212,7 @@ class TALGenerator: ...@@ -207,8 +212,7 @@ class TALGenerator:
m = re.match( m = re.match(
r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part) r"(?s)\s*(?:(global|local)\s+)?(%s)\s+(.*)\Z" % NAME_RE, part)
if not m: if not m:
raise TALError("invalid define syntax: " + `part`, raise TALError("invalid define syntax: " + `part`, position)
position)
scope, name, expr = m.group(1, 2, 3) scope, name, expr = m.group(1, 2, 3)
scope = scope or "local" scope = scope or "local"
cexpr = self.compileExpression(expr) cexpr = self.compileExpression(expr)
...@@ -222,19 +226,19 @@ class TALGenerator: ...@@ -222,19 +226,19 @@ class TALGenerator:
program = self.popProgram() program = self.popProgram()
self.emit("condition", cexpr, program) self.emit("condition", cexpr, program)
def emitRepeat(self, arg): def emitRepeat(self, arg, position=(None, None)):
m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg) m = re.match("(?s)\s*(%s)\s+(.*)\Z" % NAME_RE, arg)
if not m: if not m:
raise TALError("invalid repeat syntax: " + `repeat`) raise TALError("invalid repeat syntax: " + `repeat`, position)
name, expr = m.group(1, 2) name, expr = m.group(1, 2)
cexpr = self.compileExpression(expr) cexpr = self.compileExpression(expr)
program = self.popProgram() program = self.popProgram()
self.emit("loop", name, cexpr, program) self.emit("loop", name, cexpr, program)
def emitSubstitution(self, arg, attrDict={}): def emitSubstitution(self, arg, attrDict={}, position=(None, None)):
key, expr = parseSubstitution(arg) key, expr = parseSubstitution(arg)
if not key: if not key:
raise TALError("Bad syntax in insert/replace: " + `arg`) raise TALError("Bad syntax in content/replace: " + `arg`, position)
cexpr = self.compileExpression(expr) cexpr = self.compileExpression(expr)
program = self.popProgram() program = self.popProgram()
if key == "text": if key == "text":
...@@ -243,10 +247,11 @@ class TALGenerator: ...@@ -243,10 +247,11 @@ class TALGenerator:
assert key == "structure" assert key == "structure"
self.emit("insertStructure", cexpr, attrDict, program) self.emit("insertStructure", cexpr, attrDict, program)
def emitDefineMacro(self, macroName): def emitDefineMacro(self, macroName, position=(None, None)):
program = self.popProgram() program = self.popProgram()
if self.macros.has_key(macroName): if self.macros.has_key(macroName):
raise METALError("duplicate macro definition: %s" % macroName) raise METALError("duplicate macro definition: %s" % macroName,
position)
self.macros[macroName] = program self.macros[macroName] = program
self.emit("defineMacro", macroName, program) self.emit("defineMacro", macroName, program)
...@@ -259,13 +264,36 @@ class TALGenerator: ...@@ -259,13 +264,36 @@ class TALGenerator:
program = self.popProgram() program = self.popProgram()
self.emit("defineSlot", slotName, program) self.emit("defineSlot", slotName, program)
def emitFillSlot(self, slotName): def emitFillSlot(self, slotName, position=(None, None)):
program = self.popProgram() program = self.popProgram()
if self.slots.has_key(slotName): if self.slots.has_key(slotName):
raise METALError("duplicate slot definition: %s" % slotName) raise METALError("duplicate slot definition: %s" % slotName,
position)
self.slots[slotName] = program self.slots[slotName] = program
self.emit("fillSlot", slotName, program) self.emit("fillSlot", slotName, program)
def unEmitWhitespace(self):
collect = []
i = len(self.program) - 1
while i >= 0:
item = self.program[i]
if item[0] != "rawtext":
break
text = item[1]
if not re.match(r"\A\s*\Z", text):
break
collect.append(text)
i = i-1
del self.program[i+1:]
if i >= 0 and self.program[i][0] == "rawtext":
text = self.program[i][1]
m = re.search(r"\s+\Z", text)
if m:
self.program[i] = ("rawtext", text[:m.start()])
collect.append(m.group())
collect.reverse()
return string.join(collect, "")
def unEmitNewlineWhitespace(self): def unEmitNewlineWhitespace(self):
collect = [] collect = []
i = len(self.program) i = len(self.program)
...@@ -306,7 +334,7 @@ class TALGenerator: ...@@ -306,7 +334,7 @@ class TALGenerator:
return newlist return newlist
def emitStartElement(self, name, attrlist, taldict, metaldict, def emitStartElement(self, name, attrlist, taldict, metaldict,
position=(None, None)): position=(None, None), isend=0):
for key in taldict.keys(): for key in taldict.keys():
if key not in KNOWN_TAL_ATTRIBUTES: if key not in KNOWN_TAL_ATTRIBUTES:
raise TALError("bad TAL attribute: " + `key`, position) raise TALError("bad TAL attribute: " + `key`, position)
...@@ -380,46 +408,56 @@ class TALGenerator: ...@@ -380,46 +408,56 @@ class TALGenerator:
if replace: if replace:
todo["repldict"] = repldict todo["repldict"] = repldict
repldict = {} repldict = {}
self.emitStartTag(name, self.replaceAttrs(attrlist, repldict)) self.emitStartTag(name, self.replaceAttrs(attrlist, repldict), isend)
if content: if content:
self.pushProgram() self.pushProgram()
if todo and position != (None, None):
todo["position"] = position
self.todoPush(todo) self.todoPush(todo)
if isend:
self.emitEndElement(name, isend)
def emitEndElement(self, name): def emitEndElement(self, name, isend=0):
todo = self.todoPop() todo = self.todoPop()
if not todo: if not todo:
# Shortcut # Shortcut
self.emitEndTag(name) if not isend:
self.emitEndTag(name)
return return
position = todo.get("position", (None, None))
defineMacro = todo.get("defineMacro")
useMacro = todo.get("useMacro")
defineSlot = todo.get("defineSlot")
fillSlot = todo.get("fillSlot")
content = todo.get("content") content = todo.get("content")
if content:
self.emitSubstitution(content)
self.emitEndTag(name)
repeat = todo.get("repeat") repeat = todo.get("repeat")
replace = todo.get("replace")
condition = todo.get("condition")
define = todo.get("define")
repldict = todo.get("repldict", {})
if content:
self.emitSubstitution(content, {}, position)
if not isend:
self.emitEndTag(name)
if repeat: if repeat:
self.emitRepeat(repeat) self.emitRepeat(repeat, position)
self.emit("endScope") self.emit("endScope")
replace = todo.get("replace")
if replace: if replace:
repldict = todo.get("repldict", {}) self.emitSubstitution(replace, repldict, position)
self.emitSubstitution(replace, repldict)
condition = todo.get("condition")
if condition: if condition:
self.emitCondition(condition) self.emitCondition(condition)
if todo.get("define"): if define:
self.emit("endScope") self.emit("endScope")
defineMacro = todo.get("defineMacro")
useMacro = todo.get("useMacro")
defineSlot = todo.get("defineSlot")
fillSlot = todo.get("fillSlot")
if defineMacro: if defineMacro:
self.emitDefineMacro(defineMacro) self.emitDefineMacro(defineMacro, position)
if useMacro: if useMacro:
self.emitUseMacro(useMacro) self.emitUseMacro(useMacro)
if defineSlot: if defineSlot:
self.emitDefineSlot(defineSlot) self.emitDefineSlot(defineSlot)
if fillSlot: if fillSlot:
self.emitFillSlot(fillSlot) self.emitFillSlot(fillSlot, position)
def test(): def test():
t = TALGenerator() t = TALGenerator()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment