Commit 0b9cbf72 authored by Fred Drake's avatar Fred Drake

Define & use a Conversion object. It's still really ugly, but at

least there's a token object in here now!  ;-)
parent 16da72a7
...@@ -27,7 +27,7 @@ class LaTeXFormatError(Error): ...@@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
_begin_env_rx = re.compile(r"[\\]begin{([^}]*)}") _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
_end_env_rx = re.compile(r"[\\]end{([^}]*)}") _end_env_rx = re.compile(r"[\\]end{([^}]*)}")
_begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)") _begin_macro_rx = re.compile("[\\\\]([a-zA-Z]+[*]?)({|\\s*\n?)")
_comment_rx = re.compile("%+ ?(.*)\n *") _comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
_text_rx = re.compile(r"[^]%\\{}]+") _text_rx = re.compile(r"[^]%\\{}]+")
_optional_rx = re.compile(r"\s*[[]([^]]*)[]]") _optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
# _parameter_rx is this complicated to allow {...} inside a parameter; # _parameter_rx is this complicated to allow {...} inside a parameter;
...@@ -50,21 +50,44 @@ def popping(name, point, depth): ...@@ -50,21 +50,44 @@ def popping(name, point, depth):
sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point)) sys.stderr.write("%s</%s> at %s\n" % (" "*depth, name, point))
def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): class Conversion:
def __init__(self, ifp, ofp, table=None, discards=(), autoclosing=()):
self.ofp_stack = [ofp]
self.pop_output()
self.table = table
self.discards = discards
self.autoclosing = autoclosing
self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
self.err_write = sys.stderr.write
self.preamble = 1
def push_output(self, ofp):
self.ofp_stack.append(self.ofp)
self.ofp = ofp
self.write = ofp.write
def pop_output(self):
self.ofp = self.ofp_stack.pop()
self.write = self.ofp.write
def subconvert(self, endchar=None, depth=0):
if DEBUG and endchar: if DEBUG and endchar:
sys.stderr.write("subconvert(%s, ..., endchar=%s)\n" self.err_write(
% (`line[:20]`, `endchar`)) "subconvert(%s)\n line = %s\n" % (`endchar`, `line[:20]`))
stack = [] stack = []
line = self.line
while line: while line:
if line[0] == endchar and not stack: if line[0] == endchar and not stack:
if DEBUG: if DEBUG:
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) self.err_write("subconvert() --> %s\n" % `line[1:21]`)
return line[1:] self.line = line
return line
m = _comment_rx.match(line) m = _comment_rx.match(line)
if m: if m:
text = m.group(1) text = m.group(1)
if text: if text:
ofp.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n" % encode(text)) self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
% encode(text))
line = line[m.end():] line = line[m.end():]
continue continue
m = _begin_env_rx.match(line) m = _begin_env_rx.match(line)
...@@ -79,17 +102,18 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -79,17 +102,18 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
if envname == "document": if envname == "document":
# special magic # special magic
for n in stack[1:]: for n in stack[1:]:
if n not in autoclosing: if n not in self.autoclosing:
raise LaTeXFormatError("open element on stack: " + `n`) raise LaTeXFormatError(
"open element on stack: " + `n`)
# should be more careful, but this is easier to code: # should be more careful, but this is easier to code:
stack = [] stack = []
ofp.write(")document\n") self.write(")document\n")
elif envname == stack[-1]: elif envname == stack[-1]:
ofp.write(")%s\n" % envname) self.write(")%s\n" % envname)
del stack[-1] del stack[-1]
popping(envname, "a", len(stack) + depth) popping(envname, "a", len(stack) + depth)
else: else:
sys.stderr.write("stack: %s\n" % `stack`) self.err_write("stack: %s\n" % `stack`)
raise LaTeXFormatError( raise LaTeXFormatError(
"environment close for %s doesn't match" % envname) "environment close for %s doesn't match" % envname)
line = line[m.end():] line = line[m.end():]
...@@ -102,42 +126,39 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -102,42 +126,39 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
# really magic case! # really magic case!
pos = string.find(line, "\\end{verbatim}") pos = string.find(line, "\\end{verbatim}")
text = line[m.end(1):pos] text = line[m.end(1):pos]
ofp.write("(verbatim\n") self.write("(verbatim\n")
ofp.write("-%s\n" % encode(text)) self.write("-%s\n" % encode(text))
ofp.write(")verbatim\n") self.write(")verbatim\n")
line = line[pos + len("\\end{verbatim}"):] line = line[pos + len("\\end{verbatim}"):]
continue continue
numbered = 1 numbered = 1
opened = 0
if macroname[-1] == "*": if macroname[-1] == "*":
macroname = macroname[:-1] macroname = macroname[:-1]
numbered = 0 numbered = 0
if macroname in autoclosing and macroname in stack: if macroname in self.autoclosing and macroname in stack:
while stack[-1] != macroname: while stack[-1] != macroname:
if stack[-1] and stack[-1] not in discards: top = stack.pop()
ofp.write(")%s\n-\\n\n" % stack[-1]) if top and top not in self.discards:
popping(stack[-1], "b", len(stack) + depth - 1) self.write(")%s\n-\\n\n" % top)
del stack[-1] popping(top, "b", len(stack) + depth)
if macroname not in discards: if macroname not in self.discards:
ofp.write("-\\n\n)%s\n-\\n\n" % macroname) self.write("-\\n\n)%s\n-\\n\n" % macroname)
popping(macroname, "c", len(stack) + depth - 1) popping(macroname, "c", len(stack) + depth - 1)
del stack[-1] del stack[-1]
real_ofp = ofp
if macroname in discards:
ofp = StringIO.StringIO()
# #
conversion = table.get(macroname, ([], 0, 0, 0, 0)) if macroname in self.discards:
params, optional, empty, environ, nocontent = conversion self.push_output(StringIO.StringIO())
if empty: else:
ofp.write("e\n") self.push_output(self.ofp)
elif nocontent: #
empty = 1 params, optional, empty, environ = self.start_macro(macroname)
if not numbered: if not numbered:
ofp.write("Anumbered TOKEN no\n") self.write("Anumbered TOKEN no\n")
opened = 0
# rip off the macroname # rip off the macroname
if params: if params:
if optional and len(params) == 1: if optional and len(params) == 1:
line = line = line[m.end():] line = line[m.end():]
else: else:
line = line[m.end(1):] line = line[m.end(1):]
elif empty: elif empty:
...@@ -145,21 +166,20 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -145,21 +166,20 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
else: else:
line = line[m.end():] line = line[m.end():]
# #
# Very ugly special case to deal with \item[]. The catch is that # Very ugly special case to deal with \item[]. The catch
# this needs to occur outside the for loop that handles attribute # is that this needs to occur outside the for loop that
# parsing so we can 'continue' the outer loop. # handles attribute parsing so we can 'continue' the outer
# loop.
# #
if optional and type(params[0]) is type(()): if optional and type(params[0]) is type(()):
# the attribute name isn't used in this special case # the attribute name isn't used in this special case
pushing(macroname, "a", depth + len(stack)) pushing(macroname, "a", depth + len(stack))
stack.append(macroname) stack.append(macroname)
ofp.write("(%s\n" % macroname) self.write("(%s\n" % macroname)
m = _start_optional_rx.match(line) m = _start_optional_rx.match(line)
if m: if m:
line = line[m.end():] self.line = line[m.end():]
line = subconvert(line, ofp, table, discards, line = self.subconvert("]", depth + len(stack))
autoclosing, endchar="]",
depth=depth + len(stack))
line = "}" + line line = "}" + line
continue continue
# handle attribute mappings here: # handle attribute mappings here:
...@@ -170,14 +190,14 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -170,14 +190,14 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
m = _optional_rx.match(line) m = _optional_rx.match(line)
if m: if m:
line = line[m.end():] line = line[m.end():]
ofp.write("A%s TOKEN %s\n" self.write("A%s TOKEN %s\n"
% (attrname, encode(m.group(1)))) % (attrname, encode(m.group(1))))
elif type(attrname) is type(()): elif type(attrname) is type(()):
# This is a sub-element; but don't place the # This is a sub-element; but don't place the
# element we found on the stack (\section-like) # element we found on the stack (\section-like)
pushing(macroname, "b", len(stack) + depth) pushing(macroname, "b", len(stack) + depth)
stack.append(macroname) stack.append(macroname)
ofp.write("(%s\n" % macroname) self.write("(%s\n" % macroname)
macroname = attrname[0] macroname = attrname[0]
m = _start_group_rx.match(line) m = _start_group_rx.match(line)
if m: if m:
...@@ -187,15 +207,14 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -187,15 +207,14 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
attrname = attrname[0] attrname = attrname[0]
if not opened: if not opened:
opened = 1 opened = 1
ofp.write("(%s\n" % macroname) self.write("(%s\n" % macroname)
pushing(macroname, "c", len(stack) + depth) pushing(macroname, "c", len(stack) + depth)
ofp.write("(%s\n" % attrname) self.write("(%s\n" % attrname)
pushing(attrname, "sub-elem", len(stack) + depth + 1) pushing(attrname, "sub-elem", len(stack) + depth + 1)
line = subconvert(skip_white(line)[1:], ofp, table, self.line = skip_white(line)[1:]
discards, autoclosing, endchar="}", line = subconvert("}", depth + len(stack) + 2)
depth=depth + len(stack) + 2)
popping(attrname, "sub-elem", len(stack) + depth + 1) popping(attrname, "sub-elem", len(stack) + depth + 1)
ofp.write(")%s\n" % attrname) self.write(")%s\n" % attrname)
else: else:
m = _parameter_rx.match(line) m = _parameter_rx.match(line)
if not m: if not m:
...@@ -207,7 +226,7 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -207,7 +226,7 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
dtype = "TOKEN" dtype = "TOKEN"
else: else:
dtype = "CDATA" dtype = "CDATA"
ofp.write("A%s %s %s\n" self.write("A%s %s %s\n"
% (attrname, dtype, encode(value))) % (attrname, dtype, encode(value)))
line = line[m.end():] line = line[m.end():]
if params and type(params[-1]) is type('') \ if params and type(params[-1]) is type('') \
...@@ -220,26 +239,27 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -220,26 +239,27 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
% (macroname, line[:12])) % (macroname, line[:12]))
line = line[m.end():] line = line[m.end():]
if not opened: if not opened:
ofp.write("(%s\n" % macroname) self.write("(%s\n" % macroname)
pushing(macroname, "d", len(stack) + depth) pushing(macroname, "d", len(stack) + depth)
if empty: if empty:
line = "}" + line line = "}" + line
stack.append(macroname) stack.append(macroname)
ofp = real_ofp self.pop_output()
continue continue
if line[0] == endchar and not stack: if line[0] == endchar and not stack:
if DEBUG: if DEBUG:
sys.stderr.write("subconvert() --> %s\n" % `line[1:21]`) self.err_write("subconvert() --> %s\n" % `line[1:21]`)
return line[1:] self.line = line[1:]
return self.line
if line[0] == "}": if line[0] == "}":
# end of macro # end of macro or group
macroname = stack[-1] macroname = stack[-1]
conversion = table.get(macroname) conversion = self.table.get(macroname)
if macroname \ if macroname \
and macroname not in discards \ and macroname not in self.discards \
and type(conversion) is not type(""): and type(conversion) is not type(""):
# otherwise, it was just a bare group # otherwise, it was just a bare group
ofp.write(")%s\n" % stack[-1]) self.write(")%s\n" % stack[-1])
popping(macroname, "d", len(stack) + depth - 1) popping(macroname, "d", len(stack) + depth - 1)
del stack[-1] del stack[-1]
line = line[1:] line = line[1:]
...@@ -250,22 +270,22 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -250,22 +270,22 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
line = line[1:] line = line[1:]
continue continue
if line[0] == "\\" and line[1] in ESCAPED_CHARS: if line[0] == "\\" and line[1] in ESCAPED_CHARS:
ofp.write("-%s\n" % encode(line[1])) self.write("-%s\n" % encode(line[1]))
line = line[2:] line = line[2:]
continue continue
if line[:2] == r"\\": if line[:2] == r"\\":
ofp.write("(BREAK\n)BREAK\n") self.write("(BREAK\n)BREAK\n")
line = line[2:] line = line[2:]
continue continue
m = _text_rx.match(line) m = _text_rx.match(line)
if m: if m:
text = encode(m.group()) text = encode(m.group())
ofp.write("-%s\n" % text) self.write("-%s\n" % text)
line = line[m.end():] line = line[m.end():]
continue continue
# special case because of \item[] # special case because of \item[]
if line[0] == "]": if line[0] == "]":
ofp.write("-]\n") self.write("-]\n")
line = line[1:] line = line[1:]
continue continue
# avoid infinite loops # avoid infinite loops
...@@ -274,24 +294,32 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0): ...@@ -274,24 +294,32 @@ def subconvert(line, ofp, table, discards, autoclosing, endchar=None, depth=0):
extra = "..." extra = "..."
raise LaTeXFormatError("could not identify markup: %s%s" raise LaTeXFormatError("could not identify markup: %s%s"
% (`line[:100]`, extra)) % (`line[:100]`, extra))
while stack and stack[-1] in autoclosing: while stack and stack[-1] in self.autoclosing:
ofp.write("-\\n\n") self.write("-\\n\n")
ofp.write(")%s\n" % stack[-1]) self.write(")%s\n" % stack[-1])
popping(stack[-1], "e", len(stack) + depth - 1) popping(stack.pop(), "e", len(stack) + depth - 1)
del stack[-1]
if stack: if stack:
raise LaTeXFormatError("elements remain on stack: " raise LaTeXFormatError("elements remain on stack: "
+ string.join(stack)) + string.join(stack, ", "))
# otherwise we just ran out of input here... # otherwise we just ran out of input here...
def convert(self):
self.subconvert()
def start_macro(self, name):
conversion = self.table.get(name, ([], 0, 0, 0, 0))
params, optional, empty, environ, nocontent = conversion
if empty:
self.write("e\n")
elif nocontent:
empty = 1
return params, optional, empty, environ
def convert(ifp, ofp, table={}, discards=(), autoclosing=()): def convert(ifp, ofp, table={}, discards=(), autoclosing=()):
lines = string.split(ifp.read(), "\n") c = Conversion(ifp, ofp, table, discards, autoclosing)
for i in range(len(lines)):
lines[i] = string.rstrip(lines[i])
data = string.join(lines, "\n")
try: try:
subconvert(data, ofp, table, discards, autoclosing) c.convert()
except IOError, (err, msg): except IOError, (err, msg):
if err != errno.EPIPE: if err != errno.EPIPE:
raise raise
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment