Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
0b9cbf72
Commit
0b9cbf72
authored
May 07, 1999
by
Fred Drake
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Define & use a Conversion object. It's still really ugly, but at
least there's a token object in here now! ;-)
parent
16da72a7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
255 additions
and
227 deletions
+255
-227
Doc/tools/sgmlconv/latex2esis.py
Doc/tools/sgmlconv/latex2esis.py
+255
-227
No files found.
Doc/tools/sgmlconv/latex2esis.py
View file @
0b9cbf72
...
@@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
...
@@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
_begin_env_rx
=
re
.
compile
(
r"[\\]begin{([^}]*)}"
)
_begin_env_rx
=
re
.
compile
(
r"[\\]begin{([^}]*)}"
)
_end_env_rx
=
re
.
compile
(
r"[\\]end{([^}]*)}"
)
_end_env_rx
=
re
.
compile
(
r"[\\]end{([^}]*)}"
)
_begin_macro_rx
=
re
.
compile
(
"[
\
\
\
\
]([a-zA-Z]+[*]?)({|
\
\
s*
\
n
?)"
)
_begin_macro_rx
=
re
.
compile
(
"[
\
\
\
\
]([a-zA-Z]+[*]?)({|
\
\
s*
\
n
?)"
)
_comment_rx
=
re
.
compile
(
"%+ ?(.*)
\
n
*"
)
_comment_rx
=
re
.
compile
(
"%+ ?(.*)
\
n
[
\
t
]
*"
)
_text_rx
=
re
.
compile
(
r"[^]%\\{}]+"
)
_text_rx
=
re
.
compile
(
r"[^]%\\{}]+"
)
_optional_rx
=
re
.
compile
(
r"\
s*[[]([^]]*)[]]
")
_optional_rx
=
re
.
compile
(
r"\
s*[[]([^]]*)[]]
")
# _parameter_rx is this complicated to allow {...} inside a parameter;
# _parameter_rx is this complicated to allow {...} inside a parameter;
...
@@ -50,248 +50,276 @@ def popping(name, point, depth):
...
@@ -50,248 +50,276 @@ def popping(name, point, depth):
sys
.
stderr
.
write
(
"%s</%s> at %s
\
n
"
%
(
" "
*
depth
,
name
,
point
))
sys
.
stderr
.
write
(
"%s</%s> at %s
\
n
"
%
(
" "
*
depth
,
name
,
point
))
def
subconvert
(
line
,
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
None
,
depth
=
0
):
class
Conversion
:
if
DEBUG
and
endchar
:
def
__init__
(
self
,
ifp
,
ofp
,
table
=
None
,
discards
=
(),
autoclosing
=
()):
sys
.
stderr
.
write
(
"subconvert(%s, ..., endchar=%s)
\
n
"
self
.
ofp_stack
=
[
ofp
]
%
(
`line[:20]`
,
`endchar`
))
self
.
pop_output
()
stack
=
[]
self
.
table
=
table
while
line
:
self
.
discards
=
discards
if
line
[
0
]
==
endchar
and
not
stack
:
self
.
autoclosing
=
autoclosing
if
DEBUG
:
self
.
line
=
string
.
join
(
map
(
string
.
rstrip
,
ifp
.
readlines
()),
"
\
n
"
)
sys
.
stderr
.
write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
self
.
err_write
=
sys
.
stderr
.
write
return
line
[
1
:]
self
.
preamble
=
1
m
=
_comment_rx
.
match
(
line
)
if
m
:
def
push_output
(
self
,
ofp
):
text
=
m
.
group
(
1
)
self
.
ofp_stack
.
append
(
self
.
ofp
)
if
text
:
self
.
ofp
=
ofp
ofp
.
write
(
"(COMMENT
\
n
- %s
\
n
)COMMENT
\
n
-
\
\
n
\
n
"
%
encode
(
text
))
self
.
write
=
ofp
.
write
line
=
line
[
m
.
end
():]
continue
def
pop_output
(
self
):
m
=
_begin_env_rx
.
match
(
line
)
self
.
ofp
=
self
.
ofp_stack
.
pop
()
if
m
:
self
.
write
=
self
.
ofp
.
write
# re-write to use the macro handler
line
=
r"\
%s %s
" % (m.group(1), line[m.end():])
def
subconvert
(
self
,
endchar
=
None
,
depth
=
0
):
continue
if
DEBUG
and
endchar
:
m = _end_env_rx.match(line)
self
.
err_write
(
if m:
"subconvert(%s)
\
n
line = %s
\
n
"
%
(
`endchar`
,
`line[:20]`
))
# end of environment
stack
=
[]
envname = m.group(1)
line
=
self
.
line
if envname == "
document
":
while
line
:
# special magic
if
line
[
0
]
==
endchar
and
not
stack
:
for n in stack[1:]:
if
DEBUG
:
if n not in autoclosing:
self
.
err_write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
raise LaTeXFormatError("
open
element
on
stack
:
" + `n`)
self
.
line
=
line
# should be more careful, but this is easier to code:
return
line
stack = []
m
=
_comment_rx
.
match
(
line
)
ofp.write("
)
document
\
n
")
if
m
:
elif envname == stack[-1]:
text
=
m
.
group
(
1
)
ofp.write("
)
%
s
\
n
" % envname)
if
text
:
del stack[-1]
self
.
write
(
"(COMMENT
\
n
- %s
\
n
)COMMENT
\
n
-
\
\
n
\
n
"
popping(envname, "
a
", len(stack) + depth)
%
encode
(
text
))
else:
line
=
line
[
m
.
end
():]
sys.stderr.write("
stack
:
%
s
\
n
" % `stack`)
raise LaTeXFormatError(
"
environment
close
for
%
s
doesn
't match" % envname)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
if macroname == "verbatim":
# really magic case!
pos = string.find(line, "
\
\
end{verbatim}")
text = line[m.end(1):pos]
ofp.write("(verbatim
\
n
")
ofp.write("-%s
\
n
" % encode(text))
ofp.write(")verbatim
\
n
")
line = line[pos + len("
\
\
end{verbatim}"):]
continue
continue
numbered = 1
m
=
_begin_env_rx
.
match
(
line
)
if macroname[-1] == "*":
if
m
:
macroname = macroname[:-1]
# re-write to use the macro handler
numbered = 0
line
=
r"\
%s %s
" % (m.group(1), line[m.end():])
if macroname in autoclosing and macroname in stack:
continue
while stack[-1] != macroname:
m = _end_env_rx.match(line)
if stack[-1] and stack[-1] not in discards:
if m:
ofp.write(")%s
\
n
-
\
\
n
\
n
" % stack[-1])
# end of environment
popping(stack[-1], "b", len(stack) + depth - 1)
envname = m.group(1)
if envname == "
document
":
# special magic
for n in stack[1:]:
if n not in self.autoclosing:
raise LaTeXFormatError(
"
open
element
on
stack
:
" + `n`)
# should be more careful, but this is easier to code:
stack = []
self.write("
)
document
\
n
")
elif envname == stack[-1]:
self.write("
)
%
s
\
n
" % envname)
del stack[-1]
del stack[-1]
if macroname not in discards:
popping(envname, "
a
", len(stack) + depth)
ofp.write("-
\
\
n
\
n
)%s
\
n
-
\
\
n
\
n
" % macroname)
popping(macroname, "c", len(stack) + depth - 1)
del stack[-1]
real_ofp = ofp
if macroname in discards:
ofp = StringIO.StringIO()
#
conversion = table.get(macroname, ([], 0, 0, 0, 0))
params, optional, empty, environ, nocontent = conversion
if empty:
ofp.write("e
\
n
")
elif nocontent:
empty = 1
if not numbered:
ofp.write("Anumbered TOKEN no
\
n
")
opened = 0
# rip off the macroname
if params:
if optional and len(params) == 1:
line = line = line[m.end():]
else:
else:
line = line[m.end(1):]
self.err_write("
stack
:
%
s
\
n
" % `stack`)
elif empty:
raise LaTeXFormatError(
line = line[m.end(1):]
"
environment
close
for
%
s
doesn
't match" % envname)
else:
line = line[m.end():]
line = line[m.end():]
#
# Very ugly special case to deal with
\
i
t
em[]. The catch is that
# this needs to occur outside the for loop that handles attribute
# parsing so we can '
continue
' the outer loop.
#
if optional and type(params[0]) is type(()):
# the attribute name isn'
t
used
in
this
special
case
pushing
(
macroname
,
"a"
,
depth
+
len
(
stack
))
stack
.
append
(
macroname
)
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
m
=
_start_optional_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
line
=
subconvert
(
line
,
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
"]"
,
depth
=
depth
+
len
(
stack
))
line
=
"}"
+
line
continue
continue
# handle attribute mappings here:
m = _begin_macro_rx.match(line)
for
attrname
in
params
:
if m:
if
optional
:
# start of macro
optional
=
0
macroname = m.group(1)
if
type
(
attrname
)
is
type
(
""
):
if macroname == "verbatim":
m
=
_optional_rx
.
match
(
line
)
# really magic case!
if
m
:
pos = string.find(line, "
\
\
end{verbatim}")
line
=
line
[
m
.
end
():]
text = line[m.end(1):pos]
ofp
.
write
(
"A%s TOKEN %s
\
n
"
self.write("(verbatim
\
n
")
%
(
attrname
,
encode
(
m
.
group
(
1
))))
self.write("-%s
\
n
" % encode(text))
elif
type
(
attrname
)
is
type
(()):
self.write(")verbatim
\
n
")
# This is a sub-element; but don't place the
line = line[pos + len("
\
\
end{verbatim}"):]
# element we found on the stack (\section-like)
continue
pushing
(
macroname
,
"b"
,
len
(
stack
)
+
depth
)
numbered = 1
opened = 0
if macroname[-1] == "*":
macroname = macroname[:-1]
numbered = 0
if macroname in self.autoclosing and macroname in stack:
while stack[-1] != macroname:
top = stack.pop()
if top and top not in self.discards:
self.write(")%s
\
n
-
\
\
n
\
n
" % top)
popping(top, "b", len(stack) + depth)
if macroname not in self.discards:
self.write("-
\
\
n
\
n
)%s
\
n
-
\
\
n
\
n
" % macroname)
popping(macroname, "c", len(stack) + depth - 1)
del stack[-1]
#
if macroname in self.discards:
self.push_output(StringIO.StringIO())
else:
self.push_output(self.ofp)
#
params, optional, empty, environ = self.start_macro(macroname)
if not numbered:
self.write("Anumbered TOKEN no
\
n
")
# rip off the macroname
if params:
if optional and len(params) == 1:
line = line[m.end():]
else:
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
line = line[m.end():]
#
# Very ugly special case to deal with
\
i
t
em[]. The catch
# is that this needs to occur outside the for loop that
# handles attribute parsing so we can '
continue
' the outer
# loop.
#
if optional and type(params[0]) is type(()):
# the attribute name isn'
t
used
in
this
special
case
pushing
(
macroname
,
"a"
,
depth
+
len
(
stack
))
stack
.
append
(
macroname
)
stack
.
append
(
macroname
)
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
self
.
write
(
"(%s
\
n
"
%
macroname
)
macroname
=
attrname
[
0
]
m
=
_start_optional_rx
.
match
(
line
)
m
=
_start_group_rx
.
match
(
line
)
if
m
:
if
m
:
self
.
line
=
line
[
m
.
end
():]
line
=
self
.
subconvert
(
"]"
,
depth
+
len
(
stack
))
line
=
"}"
+
line
continue
# handle attribute mappings here:
for
attrname
in
params
:
if
optional
:
optional
=
0
if
type
(
attrname
)
is
type
(
""
):
m
=
_optional_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
self
.
write
(
"A%s TOKEN %s
\
n
"
%
(
attrname
,
encode
(
m
.
group
(
1
))))
elif
type
(
attrname
)
is
type
(()):
# This is a sub-element; but don't place the
# element we found on the stack (\section-like)
pushing
(
macroname
,
"b"
,
len
(
stack
)
+
depth
)
stack
.
append
(
macroname
)
self
.
write
(
"(%s
\
n
"
%
macroname
)
macroname
=
attrname
[
0
]
m
=
_start_group_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
elif
type
(
attrname
)
is
type
([]):
# A normal subelement.
attrname
=
attrname
[
0
]
if
not
opened
:
opened
=
1
self
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"c"
,
len
(
stack
)
+
depth
)
self
.
write
(
"(%s
\
n
"
%
attrname
)
pushing
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
self
.
line
=
skip_white
(
line
)[
1
:]
line
=
subconvert
(
"}"
,
depth
+
len
(
stack
)
+
2
)
popping
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
self
.
write
(
")%s
\
n
"
%
attrname
)
else
:
m
=
_parameter_rx
.
match
(
line
)
if
not
m
:
raise
LaTeXFormatError
(
"could not extract parameter %s for %s: %s"
%
(
attrname
,
macroname
,
`line[:100]`
))
value
=
m
.
group
(
1
)
if
_token_rx
.
match
(
value
):
dtype
=
"TOKEN"
else
:
dtype
=
"CDATA"
self
.
write
(
"A%s %s %s
\
n
"
%
(
attrname
,
dtype
,
encode
(
value
)))
line
=
line
[
m
.
end
():]
line
=
line
[
m
.
end
():]
elif
type
(
attrname
)
is
type
([]):
if
params
and
type
(
params
[
-
1
])
is
type
(
''
)
\
# A normal subelement.
and
(
not
empty
)
and
not
environ
:
attrname
=
attrname
[
0
]
# attempt to strip off next '{'
if
not
opened
:
m
=
_start_group_rx
.
match
(
line
)
opened
=
1
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"c"
,
len
(
stack
)
+
depth
)
ofp
.
write
(
"(%s
\
n
"
%
attrname
)
pushing
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
line
=
subconvert
(
skip_white
(
line
)[
1
:],
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
"}"
,
depth
=
depth
+
len
(
stack
)
+
2
)
popping
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
ofp
.
write
(
")%s
\
n
"
%
attrname
)
else
:
m
=
_parameter_rx
.
match
(
line
)
if
not
m
:
if
not
m
:
raise
LaTeXFormatError
(
raise
LaTeXFormatError
(
"could not extract parameter %s for %s: %s"
"non-empty element '%s' has no content: %s"
%
(
attrname
,
macroname
,
`line[:100]`
))
%
(
macroname
,
line
[:
12
]))
value
=
m
.
group
(
1
)
if
_token_rx
.
match
(
value
):
dtype
=
"TOKEN"
else
:
dtype
=
"CDATA"
ofp
.
write
(
"A%s %s %s
\
n
"
%
(
attrname
,
dtype
,
encode
(
value
)))
line
=
line
[
m
.
end
():]
line
=
line
[
m
.
end
():]
if
params
and
type
(
params
[
-
1
])
is
type
(
''
)
\
if
not
opened
:
and
(
not
empty
)
and
not
environ
:
self
.
write
(
"(%s
\
n
"
%
macroname
)
# attempt to strip off next '{'
pushing
(
macroname
,
"d"
,
len
(
stack
)
+
depth
)
m
=
_start_group_rx
.
match
(
line
)
if
empty
:
if
not
m
:
line
=
"}"
+
line
raise
LaTeXFormatError
(
stack
.
append
(
macroname
)
"non-empty element '%s' has no content: %s"
self
.
pop_output
()
%
(
macroname
,
line
[:
12
]))
continue
if
line
[
0
]
==
endchar
and
not
stack
:
if
DEBUG
:
self
.
err_write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
self
.
line
=
line
[
1
:]
return
self
.
line
if
line
[
0
]
==
"}"
:
# end of macro or group
macroname
=
stack
[
-
1
]
conversion
=
self
.
table
.
get
(
macroname
)
if
macroname
\
and
macroname
not
in
self
.
discards
\
and
type
(
conversion
)
is
not
type
(
""
):
# otherwise, it was just a bare group
self
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
macroname
,
"d"
,
len
(
stack
)
+
depth
-
1
)
del
stack
[
-
1
]
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"{"
:
pushing
(
""
,
"e"
,
len
(
stack
)
+
depth
)
stack
.
append
(
""
)
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"
\
\
"
and
line
[
1
]
in
ESCAPED_CHARS
:
self
.
write
(
"-%s
\
n
"
%
encode
(
line
[
1
]))
line
=
line
[
2
:]
continue
if
line
[:
2
]
==
r"\\"
:
self
.
write
(
"(BREAK
\
n
)BREAK
\
n
"
)
line
=
line
[
2
:]
continue
m
=
_text_rx
.
match
(
line
)
if
m
:
text
=
encode
(
m
.
group
())
self
.
write
(
"-%s
\
n
"
%
text
)
line
=
line
[
m
.
end
():]
line
=
line
[
m
.
end
():]
if
not
opened
:
continue
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
# special case because of \item[]
pushing
(
macroname
,
"d"
,
len
(
stack
)
+
depth
)
if
line
[
0
]
==
"]"
:
if
empty
:
self
.
write
(
"-]
\
n
"
)
line
=
"}"
+
line
line
=
line
[
1
:]
stack
.
append
(
macroname
)
continue
ofp
=
real_ofp
# avoid infinite loops
continue
extra
=
""
if
line
[
0
]
==
endchar
and
not
stack
:
if
len
(
line
)
>
100
:
if
DEBUG
:
extra
=
"..."
sys
.
stderr
.
write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
raise
LaTeXFormatError
(
"could not identify markup: %s%s"
return
line
[
1
:]
%
(
`line[:100]`
,
extra
))
if
line
[
0
]
==
"}"
:
while
stack
and
stack
[
-
1
]
in
self
.
autoclosing
:
# end of macro
self
.
write
(
"-
\
\
n
\
n
"
)
macroname
=
stack
[
-
1
]
self
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
conversion
=
table
.
get
(
macroname
)
popping
(
stack
.
pop
(),
"e"
,
len
(
stack
)
+
depth
-
1
)
if
macroname
\
if
stack
:
and
macroname
not
in
discards
\
raise
LaTeXFormatError
(
"elements remain on stack: "
and
type
(
conversion
)
is
not
type
(
""
):
+
string
.
join
(
stack
,
", "
))
# otherwise, it was just a bare group
# otherwise we just ran out of input here...
ofp
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
macroname
,
"d"
,
len
(
stack
)
+
depth
-
1
)
def
convert
(
self
):
del
stack
[
-
1
]
self
.
subconvert
()
line
=
line
[
1
:]
continue
def
start_macro
(
self
,
name
):
if
line
[
0
]
==
"{"
:
conversion
=
self
.
table
.
get
(
name
,
([],
0
,
0
,
0
,
0
))
pushing
(
""
,
"e"
,
len
(
stack
)
+
depth
)
params
,
optional
,
empty
,
environ
,
nocontent
=
conversion
stack
.
append
(
""
)
if
empty
:
line
=
line
[
1
:]
self
.
write
(
"e
\
n
"
)
continue
elif
nocontent
:
if
line
[
0
]
==
"
\
\
"
and
line
[
1
]
in
ESCAPED_CHARS
:
empty
=
1
ofp
.
write
(
"-%s
\
n
"
%
encode
(
line
[
1
]))
return
params
,
optional
,
empty
,
environ
line
=
line
[
2
:]
continue
if
line
[:
2
]
==
r"\\"
:
ofp
.
write
(
"(BREAK
\
n
)BREAK
\
n
"
)
line
=
line
[
2
:]
continue
m
=
_text_rx
.
match
(
line
)
if
m
:
text
=
encode
(
m
.
group
())
ofp
.
write
(
"-%s
\
n
"
%
text
)
line
=
line
[
m
.
end
():]
continue
# special case because of \item[]
if
line
[
0
]
==
"]"
:
ofp
.
write
(
"-]
\
n
"
)
line
=
line
[
1
:]
continue
# avoid infinite loops
extra
=
""
if
len
(
line
)
>
100
:
extra
=
"..."
raise
LaTeXFormatError
(
"could not identify markup: %s%s"
%
(
`line[:100]`
,
extra
))
while
stack
and
stack
[
-
1
]
in
autoclosing
:
ofp
.
write
(
"-
\
\
n
\
n
"
)
ofp
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
stack
[
-
1
],
"e"
,
len
(
stack
)
+
depth
-
1
)
del
stack
[
-
1
]
if
stack
:
raise
LaTeXFormatError
(
"elements remain on stack: "
+
string
.
join
(
stack
))
# otherwise we just ran out of input here...
def
convert
(
ifp
,
ofp
,
table
=
{},
discards
=
(),
autoclosing
=
()):
def
convert
(
ifp
,
ofp
,
table
=
{},
discards
=
(),
autoclosing
=
()):
lines
=
string
.
split
(
ifp
.
read
(),
"
\
n
"
)
c
=
Conversion
(
ifp
,
ofp
,
table
,
discards
,
autoclosing
)
for
i
in
range
(
len
(
lines
)):
lines
[
i
]
=
string
.
rstrip
(
lines
[
i
])
data
=
string
.
join
(
lines
,
"
\
n
"
)
try
:
try
:
subconvert
(
data
,
ofp
,
table
,
discards
,
autoclosing
)
c
.
convert
(
)
except
IOError
,
(
err
,
msg
):
except
IOError
,
(
err
,
msg
):
if
err
!=
errno
.
EPIPE
:
if
err
!=
errno
.
EPIPE
:
raise
raise
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment