Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
96c00b0b
Commit
96c00b0b
authored
May 07, 1999
by
Fred Drake
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Define & use a Conversion object. It's still really ugly, but at
least there's a token object in here now! ;-)
parent
b0bc7f2d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
255 additions
and
227 deletions
+255
-227
Doc/tools/sgmlconv/latex2esis.py
Doc/tools/sgmlconv/latex2esis.py
+255
-227
No files found.
Doc/tools/sgmlconv/latex2esis.py
View file @
96c00b0b
...
...
@@ -27,7 +27,7 @@ class LaTeXFormatError(Error):
_begin_env_rx
=
re
.
compile
(
r"[\\]begin{([^}]*)}"
)
_end_env_rx
=
re
.
compile
(
r"[\\]end{([^}]*)}"
)
_begin_macro_rx
=
re
.
compile
(
"[
\
\
\
\
]([a-zA-Z]+[*]?)({|
\
\
s*
\
n
?)"
)
_comment_rx
=
re
.
compile
(
"%+ ?(.*)
\
n
*"
)
_comment_rx
=
re
.
compile
(
"%+ ?(.*)
\
n
[
\
t
]
*"
)
_text_rx
=
re
.
compile
(
r"[^]%\\{}]+"
)
_optional_rx
=
re
.
compile
(
r"\
s*[[]([^]]*)[]]
")
# _parameter_rx is this complicated to allow {...} inside a parameter;
...
...
@@ -50,248 +50,276 @@ def popping(name, point, depth):
sys
.
stderr
.
write
(
"%s</%s> at %s
\
n
"
%
(
" "
*
depth
,
name
,
point
))
def
subconvert
(
line
,
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
None
,
depth
=
0
):
if
DEBUG
and
endchar
:
sys
.
stderr
.
write
(
"subconvert(%s, ..., endchar=%s)
\
n
"
%
(
`line[:20]`
,
`endchar`
))
stack
=
[]
while
line
:
if
line
[
0
]
==
endchar
and
not
stack
:
if
DEBUG
:
sys
.
stderr
.
write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
return
line
[
1
:]
m
=
_comment_rx
.
match
(
line
)
if
m
:
text
=
m
.
group
(
1
)
if
text
:
ofp
.
write
(
"(COMMENT
\
n
- %s
\
n
)COMMENT
\
n
-
\
\
n
\
n
"
%
encode
(
text
))
line
=
line
[
m
.
end
():]
continue
m
=
_begin_env_rx
.
match
(
line
)
if
m
:
# re-write to use the macro handler
line
=
r"\
%s %s
" % (m.group(1), line[m.end():])
continue
m = _end_env_rx.match(line)
if m:
# end of environment
envname = m.group(1)
if envname == "
document
":
# special magic
for n in stack[1:]:
if n not in autoclosing:
raise LaTeXFormatError("
open
element
on
stack
:
" + `n`)
# should be more careful, but this is easier to code:
stack = []
ofp.write("
)
document
\
n
")
elif envname == stack[-1]:
ofp.write("
)
%
s
\
n
" % envname)
del stack[-1]
popping(envname, "
a
", len(stack) + depth)
else:
sys.stderr.write("
stack
:
%
s
\
n
" % `stack`)
raise LaTeXFormatError(
"
environment
close
for
%
s
doesn
't match" % envname)
line = line[m.end():]
continue
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
if macroname == "verbatim":
# really magic case!
pos = string.find(line, "
\
\
end{verbatim}")
text = line[m.end(1):pos]
ofp.write("(verbatim
\
n
")
ofp.write("-%s
\
n
" % encode(text))
ofp.write(")verbatim
\
n
")
line = line[pos + len("
\
\
end{verbatim}"):]
class
Conversion
:
def
__init__
(
self
,
ifp
,
ofp
,
table
=
None
,
discards
=
(),
autoclosing
=
()):
self
.
ofp_stack
=
[
ofp
]
self
.
pop_output
()
self
.
table
=
table
self
.
discards
=
discards
self
.
autoclosing
=
autoclosing
self
.
line
=
string
.
join
(
map
(
string
.
rstrip
,
ifp
.
readlines
()),
"
\
n
"
)
self
.
err_write
=
sys
.
stderr
.
write
self
.
preamble
=
1
def
push_output
(
self
,
ofp
):
self
.
ofp_stack
.
append
(
self
.
ofp
)
self
.
ofp
=
ofp
self
.
write
=
ofp
.
write
def
pop_output
(
self
):
self
.
ofp
=
self
.
ofp_stack
.
pop
()
self
.
write
=
self
.
ofp
.
write
def
subconvert
(
self
,
endchar
=
None
,
depth
=
0
):
if
DEBUG
and
endchar
:
self
.
err_write
(
"subconvert(%s)
\
n
line = %s
\
n
"
%
(
`endchar`
,
`line[:20]`
))
stack
=
[]
line
=
self
.
line
while
line
:
if
line
[
0
]
==
endchar
and
not
stack
:
if
DEBUG
:
self
.
err_write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
self
.
line
=
line
return
line
m
=
_comment_rx
.
match
(
line
)
if
m
:
text
=
m
.
group
(
1
)
if
text
:
self
.
write
(
"(COMMENT
\
n
- %s
\
n
)COMMENT
\
n
-
\
\
n
\
n
"
%
encode
(
text
))
line
=
line
[
m
.
end
():]
continue
numbered = 1
if macroname[-1] == "*":
macroname = macroname[:-1]
numbered = 0
if macroname in autoclosing and macroname in stack:
while stack[-1] != macroname:
if stack[-1] and stack[-1] not in discards:
ofp.write(")%s
\
n
-
\
\
n
\
n
" % stack[-1])
popping(stack[-1], "b", len(stack) + depth - 1)
m
=
_begin_env_rx
.
match
(
line
)
if
m
:
# re-write to use the macro handler
line
=
r"\
%s %s
" % (m.group(1), line[m.end():])
continue
m = _end_env_rx.match(line)
if m:
# end of environment
envname = m.group(1)
if envname == "
document
":
# special magic
for n in stack[1:]:
if n not in self.autoclosing:
raise LaTeXFormatError(
"
open
element
on
stack
:
" + `n`)
# should be more careful, but this is easier to code:
stack = []
self.write("
)
document
\
n
")
elif envname == stack[-1]:
self.write("
)
%
s
\
n
" % envname)
del stack[-1]
if macroname not in discards:
ofp.write("-
\
\
n
\
n
)%s
\
n
-
\
\
n
\
n
" % macroname)
popping(macroname, "c", len(stack) + depth - 1)
del stack[-1]
real_ofp = ofp
if macroname in discards:
ofp = StringIO.StringIO()
#
conversion = table.get(macroname, ([], 0, 0, 0, 0))
params, optional, empty, environ, nocontent = conversion
if empty:
ofp.write("e
\
n
")
elif nocontent:
empty = 1
if not numbered:
ofp.write("Anumbered TOKEN no
\
n
")
opened = 0
# rip off the macroname
if params:
if optional and len(params) == 1:
line = line = line[m.end():]
popping(envname, "
a
", len(stack) + depth)
else:
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
self.err_write("
stack
:
%
s
\
n
" % `stack`)
raise LaTeXFormatError(
"
environment
close
for
%
s
doesn
't match" % envname)
line = line[m.end():]
#
# Very ugly special case to deal with
\
i
t
em[]. The catch is that
# this needs to occur outside the for loop that handles attribute
# parsing so we can '
continue
' the outer loop.
#
if optional and type(params[0]) is type(()):
# the attribute name isn'
t
used
in
this
special
case
pushing
(
macroname
,
"a"
,
depth
+
len
(
stack
))
stack
.
append
(
macroname
)
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
m
=
_start_optional_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
line
=
subconvert
(
line
,
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
"]"
,
depth
=
depth
+
len
(
stack
))
line
=
"}"
+
line
continue
# handle attribute mappings here:
for
attrname
in
params
:
if
optional
:
optional
=
0
if
type
(
attrname
)
is
type
(
""
):
m
=
_optional_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
ofp
.
write
(
"A%s TOKEN %s
\
n
"
%
(
attrname
,
encode
(
m
.
group
(
1
))))
elif
type
(
attrname
)
is
type
(()):
# This is a sub-element; but don't place the
# element we found on the stack (\section-like)
pushing
(
macroname
,
"b"
,
len
(
stack
)
+
depth
)
m = _begin_macro_rx.match(line)
if m:
# start of macro
macroname = m.group(1)
if macroname == "verbatim":
# really magic case!
pos = string.find(line, "
\
\
end{verbatim}")
text = line[m.end(1):pos]
self.write("(verbatim
\
n
")
self.write("-%s
\
n
" % encode(text))
self.write(")verbatim
\
n
")
line = line[pos + len("
\
\
end{verbatim}"):]
continue
numbered = 1
opened = 0
if macroname[-1] == "*":
macroname = macroname[:-1]
numbered = 0
if macroname in self.autoclosing and macroname in stack:
while stack[-1] != macroname:
top = stack.pop()
if top and top not in self.discards:
self.write(")%s
\
n
-
\
\
n
\
n
" % top)
popping(top, "b", len(stack) + depth)
if macroname not in self.discards:
self.write("-
\
\
n
\
n
)%s
\
n
-
\
\
n
\
n
" % macroname)
popping(macroname, "c", len(stack) + depth - 1)
del stack[-1]
#
if macroname in self.discards:
self.push_output(StringIO.StringIO())
else:
self.push_output(self.ofp)
#
params, optional, empty, environ = self.start_macro(macroname)
if not numbered:
self.write("Anumbered TOKEN no
\
n
")
# rip off the macroname
if params:
if optional and len(params) == 1:
line = line[m.end():]
else:
line = line[m.end(1):]
elif empty:
line = line[m.end(1):]
else:
line = line[m.end():]
#
# Very ugly special case to deal with
\
i
t
em[]. The catch
# is that this needs to occur outside the for loop that
# handles attribute parsing so we can '
continue
' the outer
# loop.
#
if optional and type(params[0]) is type(()):
# the attribute name isn'
t
used
in
this
special
case
pushing
(
macroname
,
"a"
,
depth
+
len
(
stack
))
stack
.
append
(
macroname
)
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
macroname
=
attrname
[
0
]
m
=
_start_group_rx
.
match
(
line
)
self
.
write
(
"(%s
\
n
"
%
macroname
)
m
=
_start_optional_rx
.
match
(
line
)
if
m
:
self
.
line
=
line
[
m
.
end
():]
line
=
self
.
subconvert
(
"]"
,
depth
+
len
(
stack
))
line
=
"}"
+
line
continue
# handle attribute mappings here:
for
attrname
in
params
:
if
optional
:
optional
=
0
if
type
(
attrname
)
is
type
(
""
):
m
=
_optional_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
self
.
write
(
"A%s TOKEN %s
\
n
"
%
(
attrname
,
encode
(
m
.
group
(
1
))))
elif
type
(
attrname
)
is
type
(()):
# This is a sub-element; but don't place the
# element we found on the stack (\section-like)
pushing
(
macroname
,
"b"
,
len
(
stack
)
+
depth
)
stack
.
append
(
macroname
)
self
.
write
(
"(%s
\
n
"
%
macroname
)
macroname
=
attrname
[
0
]
m
=
_start_group_rx
.
match
(
line
)
if
m
:
line
=
line
[
m
.
end
():]
elif
type
(
attrname
)
is
type
([]):
# A normal subelement.
attrname
=
attrname
[
0
]
if
not
opened
:
opened
=
1
self
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"c"
,
len
(
stack
)
+
depth
)
self
.
write
(
"(%s
\
n
"
%
attrname
)
pushing
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
self
.
line
=
skip_white
(
line
)[
1
:]
line
=
subconvert
(
"}"
,
depth
+
len
(
stack
)
+
2
)
popping
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
self
.
write
(
")%s
\
n
"
%
attrname
)
else
:
m
=
_parameter_rx
.
match
(
line
)
if
not
m
:
raise
LaTeXFormatError
(
"could not extract parameter %s for %s: %s"
%
(
attrname
,
macroname
,
`line[:100]`
))
value
=
m
.
group
(
1
)
if
_token_rx
.
match
(
value
):
dtype
=
"TOKEN"
else
:
dtype
=
"CDATA"
self
.
write
(
"A%s %s %s
\
n
"
%
(
attrname
,
dtype
,
encode
(
value
)))
line
=
line
[
m
.
end
():]
elif
type
(
attrname
)
is
type
([]):
# A normal subelement.
attrname
=
attrname
[
0
]
if
not
opened
:
opened
=
1
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"c"
,
len
(
stack
)
+
depth
)
ofp
.
write
(
"(%s
\
n
"
%
attrname
)
pushing
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
line
=
subconvert
(
skip_white
(
line
)[
1
:],
ofp
,
table
,
discards
,
autoclosing
,
endchar
=
"}"
,
depth
=
depth
+
len
(
stack
)
+
2
)
popping
(
attrname
,
"sub-elem"
,
len
(
stack
)
+
depth
+
1
)
ofp
.
write
(
")%s
\
n
"
%
attrname
)
else
:
m
=
_parameter_rx
.
match
(
line
)
if
params
and
type
(
params
[
-
1
])
is
type
(
''
)
\
and
(
not
empty
)
and
not
environ
:
# attempt to strip off next '{'
m
=
_start_group_rx
.
match
(
line
)
if
not
m
:
raise
LaTeXFormatError
(
"could not extract parameter %s for %s: %s"
%
(
attrname
,
macroname
,
`line[:100]`
))
value
=
m
.
group
(
1
)
if
_token_rx
.
match
(
value
):
dtype
=
"TOKEN"
else
:
dtype
=
"CDATA"
ofp
.
write
(
"A%s %s %s
\
n
"
%
(
attrname
,
dtype
,
encode
(
value
)))
"non-empty element '%s' has no content: %s"
%
(
macroname
,
line
[:
12
]))
line
=
line
[
m
.
end
():]
if
params
and
type
(
params
[
-
1
])
is
type
(
''
)
\
and
(
not
empty
)
and
not
environ
:
# attempt to strip off next '{'
m
=
_start_group_rx
.
match
(
line
)
if
not
m
:
raise
LaTeXFormatError
(
"non-empty element '%s' has no content: %s"
%
(
macroname
,
line
[:
12
]))
if
not
opened
:
self
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"d"
,
len
(
stack
)
+
depth
)
if
empty
:
line
=
"}"
+
line
stack
.
append
(
macroname
)
self
.
pop_output
()
continue
if
line
[
0
]
==
endchar
and
not
stack
:
if
DEBUG
:
self
.
err_write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
self
.
line
=
line
[
1
:]
return
self
.
line
if
line
[
0
]
==
"}"
:
# end of macro or group
macroname
=
stack
[
-
1
]
conversion
=
self
.
table
.
get
(
macroname
)
if
macroname
\
and
macroname
not
in
self
.
discards
\
and
type
(
conversion
)
is
not
type
(
""
):
# otherwise, it was just a bare group
self
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
macroname
,
"d"
,
len
(
stack
)
+
depth
-
1
)
del
stack
[
-
1
]
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"{"
:
pushing
(
""
,
"e"
,
len
(
stack
)
+
depth
)
stack
.
append
(
""
)
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"
\
\
"
and
line
[
1
]
in
ESCAPED_CHARS
:
self
.
write
(
"-%s
\
n
"
%
encode
(
line
[
1
]))
line
=
line
[
2
:]
continue
if
line
[:
2
]
==
r"\\"
:
self
.
write
(
"(BREAK
\
n
)BREAK
\
n
"
)
line
=
line
[
2
:]
continue
m
=
_text_rx
.
match
(
line
)
if
m
:
text
=
encode
(
m
.
group
())
self
.
write
(
"-%s
\
n
"
%
text
)
line
=
line
[
m
.
end
():]
if
not
opened
:
ofp
.
write
(
"(%s
\
n
"
%
macroname
)
pushing
(
macroname
,
"d"
,
len
(
stack
)
+
depth
)
if
empty
:
line
=
"}"
+
line
stack
.
append
(
macroname
)
ofp
=
real_ofp
continue
if
line
[
0
]
==
endchar
and
not
stack
:
if
DEBUG
:
sys
.
stderr
.
write
(
"subconvert() --> %s
\
n
"
%
`line[1:21]`
)
return
line
[
1
:]
if
line
[
0
]
==
"}"
:
# end of macro
macroname
=
stack
[
-
1
]
conversion
=
table
.
get
(
macroname
)
if
macroname
\
and
macroname
not
in
discards
\
and
type
(
conversion
)
is
not
type
(
""
):
# otherwise, it was just a bare group
ofp
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
macroname
,
"d"
,
len
(
stack
)
+
depth
-
1
)
del
stack
[
-
1
]
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"{"
:
pushing
(
""
,
"e"
,
len
(
stack
)
+
depth
)
stack
.
append
(
""
)
line
=
line
[
1
:]
continue
if
line
[
0
]
==
"
\
\
"
and
line
[
1
]
in
ESCAPED_CHARS
:
ofp
.
write
(
"-%s
\
n
"
%
encode
(
line
[
1
]))
line
=
line
[
2
:]
continue
if
line
[:
2
]
==
r"\\"
:
ofp
.
write
(
"(BREAK
\
n
)BREAK
\
n
"
)
line
=
line
[
2
:]
continue
m
=
_text_rx
.
match
(
line
)
if
m
:
text
=
encode
(
m
.
group
())
ofp
.
write
(
"-%s
\
n
"
%
text
)
line
=
line
[
m
.
end
():]
continue
# special case because of \item[]
if
line
[
0
]
==
"]"
:
ofp
.
write
(
"-]
\
n
"
)
line
=
line
[
1
:]
continue
# avoid infinite loops
extra
=
""
if
len
(
line
)
>
100
:
extra
=
"..."
raise
LaTeXFormatError
(
"could not identify markup: %s%s"
%
(
`line[:100]`
,
extra
))
while
stack
and
stack
[
-
1
]
in
autoclosing
:
ofp
.
write
(
"-
\
\
n
\
n
"
)
ofp
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
stack
[
-
1
],
"e"
,
len
(
stack
)
+
depth
-
1
)
del
stack
[
-
1
]
if
stack
:
raise
LaTeXFormatError
(
"elements remain on stack: "
+
string
.
join
(
stack
))
# otherwise we just ran out of input here...
continue
# special case because of \item[]
if
line
[
0
]
==
"]"
:
self
.
write
(
"-]
\
n
"
)
line
=
line
[
1
:]
continue
# avoid infinite loops
extra
=
""
if
len
(
line
)
>
100
:
extra
=
"..."
raise
LaTeXFormatError
(
"could not identify markup: %s%s"
%
(
`line[:100]`
,
extra
))
while
stack
and
stack
[
-
1
]
in
self
.
autoclosing
:
self
.
write
(
"-
\
\
n
\
n
"
)
self
.
write
(
")%s
\
n
"
%
stack
[
-
1
])
popping
(
stack
.
pop
(),
"e"
,
len
(
stack
)
+
depth
-
1
)
if
stack
:
raise
LaTeXFormatError
(
"elements remain on stack: "
+
string
.
join
(
stack
,
", "
))
# otherwise we just ran out of input here...
def
convert
(
self
):
self
.
subconvert
()
def
start_macro
(
self
,
name
):
conversion
=
self
.
table
.
get
(
name
,
([],
0
,
0
,
0
,
0
))
params
,
optional
,
empty
,
environ
,
nocontent
=
conversion
if
empty
:
self
.
write
(
"e
\
n
"
)
elif
nocontent
:
empty
=
1
return
params
,
optional
,
empty
,
environ
def
convert
(
ifp
,
ofp
,
table
=
{},
discards
=
(),
autoclosing
=
()):
lines
=
string
.
split
(
ifp
.
read
(),
"
\
n
"
)
for
i
in
range
(
len
(
lines
)):
lines
[
i
]
=
string
.
rstrip
(
lines
[
i
])
data
=
string
.
join
(
lines
,
"
\
n
"
)
c
=
Conversion
(
ifp
,
ofp
,
table
,
discards
,
autoclosing
)
try
:
subconvert
(
data
,
ofp
,
table
,
discards
,
autoclosing
)
c
.
convert
(
)
except
IOError
,
(
err
,
msg
):
if
err
!=
errno
.
EPIPE
:
raise
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment