Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
dd21c321
Commit
dd21c321
authored
Jan 08, 1999
by
Fred Drake
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
It's finally gone!
parent
f2b5374b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
2406 deletions
+0
-2406
Doc/tools/partparse.py
Doc/tools/partparse.py
+0
-2406
No files found.
Doc/tools/partparse.py
deleted
100644 → 0
View file @
f2b5374b
#
# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
# and generate texinfo source.
#
# This is *not* a good example of good programming practices. In fact, this
# file could use a complete rewrite, in order to become faster, more
# easily extensible and maintainable.
#
# However, I added some comments on a few places for the pityful person who
# would ever need to take a look into this file.
#
# Have I been clear enough??
#
# -jh
#
# Yup. I made some performance improvements and hope this lasts a while;
# I don't want to be the schmuck who ends up re-writting it!
#
# -fld
#
# (sometime later...)
#
# Ok, I've re-worked substantial chunks of this. It's only getting worse.
# It just might be gone before the next source release. (Yeah!)
#
# -fld
import
sys
,
string
,
regex
,
getopt
,
os
from
types
import
IntType
,
ListType
,
StringType
,
TupleType
release_version
=
string
.
split
(
sys
.
version
)[
0
]
# Different parse modes for phase 1
MODE_REGULAR
=
0
MODE_VERBATIM
=
1
MODE_CS_SCAN
=
2
MODE_COMMENT
=
3
MODE_MATH
=
4
MODE_DMATH
=
5
MODE_GOBBLEWHITE
=
6
the_modes
=
(
MODE_REGULAR
,
MODE_VERBATIM
,
MODE_CS_SCAN
,
MODE_COMMENT
,
MODE_MATH
,
MODE_DMATH
,
MODE_GOBBLEWHITE
)
# Show the neighbourhood of the scanned buffer
def
epsilon
(
buf
,
where
):
wmt
,
wpt
=
where
-
10
,
where
+
10
if
wmt
<
0
:
wmt
=
0
if
wpt
>
len
(
buf
):
wpt
=
len
(
buf
)
return
' Context '
+
`buf[wmt:where]`
+
'.'
+
`buf[where:wpt]`
+
'.'
# Should return the line number. never worked
def
lin
():
global
lineno
return
' Line '
+
`lineno`
+
'.'
# Displays the recursion level.
def
lv
(
lvl
):
return
' Level '
+
`lvl`
+
'.'
# Combine the three previous functions. Used often.
def
lle
(
lvl
,
buf
,
where
):
return
lv
(
lvl
)
+
lin
()
+
epsilon
(
buf
,
where
)
# This class is only needed for _symbolic_ representation of the parse mode.
class
Mode
:
def
__init__
(
self
,
arg
):
if
arg
not
in
the_modes
:
raise
ValueError
,
'mode not in the_modes'
self
.
mode
=
arg
def
__cmp__
(
self
,
other
):
if
type
(
self
)
!=
type
(
other
):
other
=
mode
[
other
]
return
cmp
(
self
.
mode
,
other
.
mode
)
def
__repr__
(
self
):
if
self
.
mode
==
MODE_REGULAR
:
return
'MODE_REGULAR'
elif
self
.
mode
==
MODE_VERBATIM
:
return
'MODE_VERBATIM'
elif
self
.
mode
==
MODE_CS_SCAN
:
return
'MODE_CS_SCAN'
elif
self
.
mode
==
MODE_COMMENT
:
return
'MODE_COMMENT'
elif
self
.
mode
==
MODE_MATH
:
return
'MODE_MATH'
elif
self
.
mode
==
MODE_DMATH
:
return
'MODE_DMATH'
elif
self
.
mode
==
MODE_GOBBLEWHITE
:
return
'MODE_GOBBLEWHITE'
else
:
raise
ValueError
,
'mode not in the_modes'
# just a wrapper around a class initialisation
mode
=
{}
for
t
in
the_modes
:
mode
[
t
]
=
Mode
(
t
)
# After phase 1, the text consists of chunks, with a certain type
# this type will be assigned to the chtype member of the chunk
# the where-field contains the file position where this is found
# and the data field contains (1): a tuple describing start- end end
# positions of the substring (can be used as slice for the buf-variable),
# (2) just a string, mostly generated by the changeit routine,
# or (3) a list, describing a (recursive) subgroup of chunks
PLAIN
=
0
# ASSUME PLAINTEXT, data = the text
GROUP
=
1
# GROUP ({}), data = [chunk, chunk,..]
CSNAME
=
2
# CONTROL SEQ TOKEN, data = the command
COMMENT
=
3
# data is the actual comment
DMATH
=
4
# DISPLAYMATH, data = [chunk, chunk,..]
MATH
=
5
# MATH, see DISPLAYMATH
OTHER
=
6
# CHAR WITH CATCODE OTHER, data = char
ACTIVE
=
7
# ACTIVE CHAR
GOBBLEDWHITE
=
8
# Gobbled LWSP, after CSNAME
ENDLINE
=
9
# END-OF-LINE, data = '\n'
DENDLINE
=
10
# DOUBLE EOL, data='\n', indicates \par
ENV
=
11
# LaTeX-environment
# data =(envname,[ch,ch,ch,.])
CSLINE
=
12
# for texi: next chunk will be one group
# of args. Will be set all on 1 line
IGNORE
=
13
# IGNORE this data
ENDENV
=
14
# TEMP END OF GROUP INDICATOR
IF
=
15
# IF-directive
# data = (flag,negate,[ch, ch, ch,...])
the_types
=
(
PLAIN
,
GROUP
,
CSNAME
,
COMMENT
,
DMATH
,
MATH
,
OTHER
,
ACTIVE
,
GOBBLEDWHITE
,
ENDLINE
,
DENDLINE
,
ENV
,
CSLINE
,
IGNORE
,
ENDENV
,
IF
)
# class, just to display symbolic name
class
ChunkType
:
def
__init__
(
self
,
chunk_type
):
if
chunk_type
not
in
the_types
:
raise
ValueError
,
'chunk_type not in the_types'
self
.
chunk_type
=
chunk_type
def
__cmp__
(
self
,
other
):
if
type
(
self
)
!=
type
(
other
):
other
=
chunk_type
[
other
]
return
cmp
(
self
.
chunk_type
,
other
.
chunk_type
)
def
__repr__
(
self
):
if
self
.
chunk_type
==
PLAIN
:
return
'PLAIN'
elif
self
.
chunk_type
==
GROUP
:
return
'GROUP'
elif
self
.
chunk_type
==
CSNAME
:
return
'CSNAME'
elif
self
.
chunk_type
==
COMMENT
:
return
'COMMENT'
elif
self
.
chunk_type
==
DMATH
:
return
'DMATH'
elif
self
.
chunk_type
==
MATH
:
return
'MATH'
elif
self
.
chunk_type
==
OTHER
:
return
'OTHER'
elif
self
.
chunk_type
==
ACTIVE
:
return
'ACTIVE'
elif
self
.
chunk_type
==
GOBBLEDWHITE
:
return
'GOBBLEDWHITE'
elif
self
.
chunk_type
==
DENDLINE
:
return
'DENDLINE'
elif
self
.
chunk_type
==
ENDLINE
:
return
'ENDLINE'
elif
self
.
chunk_type
==
ENV
:
return
'ENV'
elif
self
.
chunk_type
==
CSLINE
:
return
'CSLINE'
elif
self
.
chunk_type
==
IGNORE
:
return
'IGNORE'
elif
self
.
chunk_type
==
ENDENV
:
return
'ENDENV'
elif
self
.
chunk_type
==
IF
:
return
'IF'
else
:
raise
ValueError
,
'chunk_type not in the_types'
# ...and the wrapper
chunk_type
=
{}
for
t
in
the_types
:
chunk_type
[
t
]
=
ChunkType
(
t
)
# store a type object of the ChunkType-class-instance...
chunk_type_type
=
type
(
chunk_type
[
PLAIN
])
# this class contains a part of the parsed buffer
class
Chunk
:
def
__init__
(
self
,
chtype
,
where
,
data
):
if
type
(
chtype
)
!=
chunk_type_type
:
chtype
=
chunk_type
[
chtype
]
self
.
chtype
=
chtype
self
.
where
=
where
self
.
data
=
data
__datatypes
=
[
chunk_type
[
CSNAME
],
chunk_type
[
PLAIN
],
chunk_type
[
CSLINE
]]
def
__repr__
(
self
):
if
self
.
chtype
in
self
.
__datatypes
:
data
=
s
(
self
.
buf
,
self
.
data
)
else
:
data
=
self
.
data
return
'chunk'
+
`self.chtype, self.where, data`
# and the wrapper
chunk
=
Chunk
error
=
'partparse.error'
#
# TeX's catcodes...
#
CC_ESCAPE
=
0
CC_LBRACE
=
1
CC_RBRACE
=
2
CC_MATHSHIFT
=
3
CC_ALIGNMENT
=
4
CC_ENDLINE
=
5
CC_PARAMETER
=
6
CC_SUPERSCRIPT
=
7
CC_SUBSCRIPT
=
8
CC_IGNORE
=
9
CC_WHITE
=
10
CC_LETTER
=
11
CC_OTHER
=
12
CC_ACTIVE
=
13
CC_COMMENT
=
14
CC_INVALID
=
15
# and the names
cc_names
=
[
'CC_ESCAPE'
,
'CC_LBRACE'
,
'CC_RBRACE'
,
'CC_MATHSHIFT'
,
'CC_ALIGNMENT'
,
'CC_ENDLINE'
,
'CC_PARAMETER'
,
'CC_SUPERSCRIPT'
,
'CC_SUBSCRIPT'
,
'CC_IGNORE'
,
'CC_WHITE'
,
'CC_LETTER'
,
'CC_OTHER'
,
'CC_ACTIVE'
,
'CC_COMMENT'
,
'CC_INVALID'
,
]
# Show a list of catcode-name-symbols
def
pcl
(
codelist
):
result
=
''
for
i
in
codelist
:
result
=
result
+
cc_names
[
i
]
+
', '
return
'['
+
result
[:
-
2
]
+
']'
# the name of the catcode (ACTIVE, OTHER, etc.)
def
pc
(
code
):
return
cc_names
[
code
]
# Which catcodes make the parser stop parsing regular plaintext
regular_stopcodes
=
[
CC_ESCAPE
,
CC_LBRACE
,
CC_RBRACE
,
CC_MATHSHIFT
,
CC_ALIGNMENT
,
CC_PARAMETER
,
CC_SUPERSCRIPT
,
CC_SUBSCRIPT
,
CC_IGNORE
,
CC_ACTIVE
,
CC_COMMENT
,
CC_INVALID
,
CC_ENDLINE
]
# same for scanning a control sequence name
csname_scancodes
=
[
CC_LETTER
]
# same for gobbling LWSP
white_scancodes
=
[
CC_WHITE
]
##white_scancodes = [CC_WHITE, CC_ENDLINE]
# make a list of all catcode id's, except for catcode ``other''
all_but_other_codes
=
range
(
16
)
del
all_but_other_codes
[
CC_OTHER
]
##print all_but_other_codes
# when does a comment end
comment_stopcodes
=
[
CC_ENDLINE
]
# gather all characters together, specified by a list of catcodes
def
code2string
(
cc
,
codelist
):
##print 'code2string: codelist = ' + pcl(codelist),
result
=
''
for
category
in
codelist
:
if
cc
[
category
]:
result
=
result
+
cc
[
category
]
##print 'result = ' + `result`
return
result
# automatically generate all characters of catcode other, being the
# complement set in the ASCII range (128 characters)
def
make_other_codes
(
cc
):
otherchars
=
range
(
256
)
# could be made 256, no problem
for
category
in
all_but_other_codes
:
if
cc
[
category
]:
for
c
in
cc
[
category
]:
otherchars
[
ord
(
c
)]
=
None
result
=
''
for
i
in
otherchars
:
if
i
!=
None
:
result
=
result
+
chr
(
i
)
return
result
# catcode dump (which characters have which catcodes).
def
dump_cc
(
name
,
cc
):
##print '\t' + name
##print '=' * (8+len(name))
if
len
(
cc
)
!=
16
:
raise
TypeError
,
'cc not good cat class'
## for i in range(16):
## print pc(i) + '\t' + `cc[i]`
# In the beginning,....
epoch_cc
=
[
None
]
*
16
##dump_cc('epoch_cc', epoch_cc)
# INITEX
initex_cc
=
epoch_cc
[:]
initex_cc
[
CC_ESCAPE
]
=
'
\
\
'
initex_cc
[
CC_ENDLINE
],
initex_cc
[
CC_IGNORE
],
initex_cc
[
CC_WHITE
]
=
\
'
\
n
'
,
'
\
0
'
,
' '
initex_cc
[
CC_LETTER
]
=
string
.
uppercase
+
string
.
lowercase
initex_cc
[
CC_COMMENT
],
initex_cc
[
CC_INVALID
]
=
'%'
,
'
\
x7F
'
#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
##dump_cc('initex_cc', initex_cc)
# LPLAIN: LaTeX catcode setting (see lplain.tex)
lplain_cc
=
initex_cc
[:]
lplain_cc
[
CC_LBRACE
],
lplain_cc
[
CC_RBRACE
]
=
'{'
,
'}'
lplain_cc
[
CC_MATHSHIFT
]
=
'$'
lplain_cc
[
CC_ALIGNMENT
]
=
'&'
lplain_cc
[
CC_PARAMETER
]
=
'#'
lplain_cc
[
CC_SUPERSCRIPT
]
=
'^
\
x0B
'
# '^' and C-k
lplain_cc
[
CC_SUBSCRIPT
]
=
'_
\
x01
'
# '_' and C-a
lplain_cc
[
CC_WHITE
]
=
lplain_cc
[
CC_WHITE
]
+
'
\
t
'
lplain_cc
[
CC_ACTIVE
]
=
'~
\
x0C
'
# '~' and C-l
lplain_cc
[
CC_OTHER
]
=
make_other_codes
(
lplain_cc
)
##dump_cc('lplain_cc', lplain_cc)
# Guido's LaTeX environment catcoded '_' as ``other''
# my own purpose catlist
my_cc
=
lplain_cc
[:]
my_cc
[
CC_SUBSCRIPT
]
=
my_cc
[
CC_SUBSCRIPT
][
1
:]
# remove '_' here
my_cc
[
CC_OTHER
]
=
my_cc
[
CC_OTHER
]
+
'_'
# add it to OTHER list
dump_cc
(
'my_cc'
,
my_cc
)
# needed for un_re, my equivalent for regexp-quote in Emacs
re_meaning
=
'
\
\
[]^$'
def
un_re
(
str
):
result
=
''
for
i
in
str
:
if
i
in
re_meaning
:
result
=
result
+
'
\
\
'
result
=
result
+
i
return
result
# NOTE the negate ('^') operator in *some* of the regexps below
def
make_rc_regular
(
cc
):
# problems here if '[]' are included!!
return
regex
.
compile
(
'['
+
code2string
(
cc
,
regular_stopcodes
)
+
']'
)
def
make_rc_cs_scan
(
cc
):
return
regex
.
compile
(
'[^'
+
code2string
(
cc
,
csname_scancodes
)
+
']'
)
def
make_rc_comment
(
cc
):
return
regex
.
compile
(
'['
+
code2string
(
cc
,
comment_stopcodes
)
+
']'
)
def
make_rc_endwhite
(
cc
):
return
regex
.
compile
(
'[^'
+
code2string
(
cc
,
white_scancodes
)
+
']'
)
# regular: normal mode:
rc_regular
=
make_rc_regular
(
my_cc
)
# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
rc_cs_scan
=
make_rc_cs_scan
(
my_cc
)
rc_comment
=
make_rc_comment
(
my_cc
)
rc_endwhite
=
make_rc_endwhite
(
my_cc
)
# parseit (BUF, PARSEMODE=mode[MODE_REGULAR], START=0, RECURSION-LEVEL=0)
# RECURSION-LEVEL will is incremented on entry.
# result contains the list of chunks returned
# together with this list, the buffer position is returned
# RECURSION-LEVEL will be set to zero *again*, when recursively a
# {,D}MATH-mode scan has been enetered.
# This has been done in order to better check for environment-mismatches
def
parseit
(
buf
,
parsemode
=
mode
[
MODE_REGULAR
],
start
=
0
,
lvl
=
0
):
global
lineno
result
=
[]
end
=
len
(
buf
)
if
lvl
==
0
and
parsemode
==
mode
[
MODE_REGULAR
]:
lineno
=
1
lvl
=
lvl
+
1
##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
#
# some of the more regular modes...
#
if
parsemode
in
(
mode
[
MODE_REGULAR
],
mode
[
MODE_DMATH
],
mode
[
MODE_MATH
]):
cstate
=
[]
newpos
=
start
curpmode
=
parsemode
while
1
:
where
=
newpos
#print '\tnew round: ' + epsilon(buf, where)
if
where
==
end
:
if
lvl
>
1
or
curpmode
!=
mode
[
MODE_REGULAR
]:
# not the way we started...
raise
EOFError
,
'premature end of file.'
+
lle
(
lvl
,
buf
,
where
)
# the real ending of lvl-1 parse
return
end
,
result
pos
=
rc_regular
.
search
(
buf
,
where
)
if
pos
<
0
:
pos
=
end
if
pos
!=
where
:
newpos
,
c
=
pos
,
chunk
(
PLAIN
,
where
,
(
where
,
pos
))
result
.
append
(
c
)
continue
#
# ok, pos == where and pos != end
#
foundchar
=
buf
[
where
]
if
foundchar
in
my_cc
[
CC_LBRACE
]:
# recursive subgroup parse...
newpos
,
data
=
parseit
(
buf
,
curpmode
,
where
+
1
,
lvl
)
result
.
append
(
chunk
(
GROUP
,
where
,
data
))
elif
foundchar
in
my_cc
[
CC_RBRACE
]:
if
lvl
<=
1
:
raise
error
,
'ENDGROUP while in base level.'
+
lle
(
lvl
,
buf
,
where
)
if
lvl
==
1
and
mode
!=
mode
[
MODE_REGULAR
]:
raise
error
,
'endgroup while in math mode. +lin() + epsilon(buf, where)'
return
where
+
1
,
result
elif
foundchar
in
my_cc
[
CC_ESCAPE
]:
#
# call the routine that actually deals with
# this problem. If do_ret is None, than
# return the value of do_ret
#
# Note that handle_cs might call this routine
# recursively again...
#
do_ret
,
newpos
=
handlecs
(
buf
,
where
,
curpmode
,
lvl
,
result
,
end
)
if
do_ret
!=
None
:
return
do_ret
elif
foundchar
in
my_cc
[
CC_COMMENT
]:
newpos
,
data
=
parseit
(
buf
,
mode
[
MODE_COMMENT
],
where
+
1
,
lvl
)
result
.
append
(
chunk
(
COMMENT
,
where
,
data
))
elif
foundchar
in
my_cc
[
CC_MATHSHIFT
]:
# note that recursive calls to math-mode
# scanning are called with recursion-level 0
# again, in order to check for bad mathend
#
if
where
+
1
!=
end
and
buf
[
where
+
1
]
in
my_cc
[
CC_MATHSHIFT
]:
#
# double mathshift, e.g. '$$'
#
if
curpmode
==
mode
[
MODE_REGULAR
]:
newpos
,
data
=
parseit
(
buf
,
mode
[
MODE_DMATH
],
where
+
2
,
0
)
result
.
append
(
chunk
(
DMATH
,
where
,
data
))
elif
curpmode
==
mode
[
MODE_MATH
]:
raise
error
,
'wrong math delimiiter'
+
lin
()
+
epsilon
(
buf
,
where
)
elif
lvl
!=
1
:
raise
error
,
'bad mathend.'
+
lle
(
lvl
,
buf
,
where
)
else
:
return
where
+
2
,
result
else
:
#
# single math shift, e.g. '$'
#
if
curpmode
==
mode
[
MODE_REGULAR
]:
newpos
,
data
=
parseit
(
buf
,
mode
[
MODE_MATH
],
where
+
1
,
0
)
result
.
append
(
chunk
(
MATH
,
where
,
data
))
elif
curpmode
==
mode
[
MODE_DMATH
]:
raise
error
,
'wrong math delimiiter'
+
lin
()
+
epsilon
(
buf
,
where
)
elif
lvl
!=
1
:
raise
error
,
'bad mathend.'
+
lv
(
lvl
,
buf
,
where
)
else
:
return
where
+
1
,
result
elif
foundchar
in
my_cc
[
CC_IGNORE
]:
print
'warning: ignored char'
,
`foundchar`
newpos
=
where
+
1
elif
foundchar
in
my_cc
[
CC_ACTIVE
]:
result
.
append
(
chunk
(
ACTIVE
,
where
,
foundchar
))
newpos
=
where
+
1
elif
foundchar
in
my_cc
[
CC_INVALID
]:
raise
error
,
'invalid char '
+
`foundchar`
newpos
=
where
+
1
elif
foundchar
in
my_cc
[
CC_ENDLINE
]:
#
# after an end of line, eat the rest of
# whitespace on the beginning of the next line
# this is what LaTeX more or less does
#
# also, try to indicate double newlines (\par)
#
lineno
=
lineno
+
1
savedwhere
=
where
newpos
,
dummy
=
parseit
(
buf
,
mode
[
MODE_GOBBLEWHITE
],
where
+
1
,
lvl
)
if
newpos
!=
end
and
buf
[
newpos
]
in
my_cc
[
CC_ENDLINE
]:
result
.
append
(
chunk
(
DENDLINE
,
savedwhere
,
foundchar
))
else
:
result
.
append
(
chunk
(
ENDLINE
,
savedwhere
,
foundchar
))
else
:
result
.
append
(
chunk
(
OTHER
,
where
,
foundchar
))
newpos
=
where
+
1
elif
parsemode
==
mode
[
MODE_CS_SCAN
]:
#
# scan for a control sequence token. `\ape', `\nut' or `\%'
#
if
start
==
end
:
raise
EOFError
,
'can
\
'
t find end of csname'
pos
=
rc_cs_scan
.
search
(
buf
,
start
)
if
pos
<
0
:
pos
=
end
if
pos
==
start
:
# first non-letter right where we started the search
# ---> the control sequence name consists of one single
# character. Also: don't eat white space...
if
buf
[
pos
]
in
my_cc
[
CC_ENDLINE
]:
lineno
=
lineno
+
1
pos
=
pos
+
1
return
pos
,
(
start
,
pos
)
else
:
spos
=
pos
if
buf
[
pos
]
==
'
\
n
'
:
lineno
=
lineno
+
1
spos
=
pos
+
1
pos2
,
dummy
=
parseit
(
buf
,
mode
[
MODE_GOBBLEWHITE
],
spos
,
lvl
)
return
pos2
,
(
start
,
pos
)
elif
parsemode
==
mode
[
MODE_GOBBLEWHITE
]:
if
start
==
end
:
return
start
,
''
pos
=
rc_endwhite
.
search
(
buf
,
start
)
if
pos
<
0
:
pos
=
start
return
pos
,
(
start
,
pos
)
elif
parsemode
==
mode
[
MODE_COMMENT
]:
pos
=
rc_comment
.
search
(
buf
,
start
)
lineno
=
lineno
+
1
if
pos
<
0
:
print
'no newline perhaps?'
raise
EOFError
,
'can
\
'
t find end of comment'
pos
=
pos
+
1
pos2
,
dummy
=
parseit
(
buf
,
mode
[
MODE_GOBBLEWHITE
],
pos
,
lvl
)
return
pos2
,
(
start
,
pos
)
else
:
raise
error
,
'Unknown mode ('
+
`parsemode`
+
')'
#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
#boxcommands = 'mbox', 'fbox'
#defcommands = 'def', 'newcommand'
endverbstr
=
'
\
\
end{verbatim}'
re_endverb
=
regex
.
compile
(
un_re
(
endverbstr
))
#
# handlecs: helper function for parseit, for the special thing we might
# wanna do after certain command control sequences
# returns: None or return_data, newpos
#
# in the latter case, the calling function is instructed to immediately
# return with the data in return_data
#
def
handlecs
(
buf
,
where
,
curpmode
,
lvl
,
result
,
end
):
global
lineno
# get the control sequence name...
newpos
,
data
=
parseit
(
buf
,
mode
[
MODE_CS_SCAN
],
where
+
1
,
lvl
)
saveddata
=
data
s_buf_data
=
s
(
buf
,
data
)
if
s_buf_data
in
(
'begin'
,
'end'
):
# skip the expected '{' and get the LaTeX-envname '}'
newpos
,
data
=
parseit
(
buf
,
mode
[
MODE_REGULAR
],
newpos
+
1
,
lvl
)
if
len
(
data
)
!=
1
:
raise
error
,
'expected 1 chunk of data.'
+
lle
(
lvl
,
buf
,
where
)
# yucky, we've got an environment
envname
=
s
(
buf
,
data
[
0
].
data
)
s_buf_saveddata
=
s
(
buf
,
saveddata
)
##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
if
s_buf_saveddata
==
'begin'
and
envname
==
'verbatim'
:
# verbatim deserves special treatment
pos
=
re_endverb
.
search
(
buf
,
newpos
)
if
pos
<
0
:
raise
error
,
"%s not found.%s"
\
%
(
`endverbstr`
,
lle
(
lvl
,
buf
,
where
))
result
.
append
(
chunk
(
ENV
,
where
,
(
envname
,
[
chunk
(
PLAIN
,
newpos
,
(
newpos
,
pos
))])))
newpos
=
pos
+
len
(
endverbstr
)
elif
s_buf_saveddata
==
'begin'
:
# start parsing recursively... If that parse returns
# from an '\end{...}', then should the last item of
# the returned data be a string containing the ended
# environment
newpos
,
data
=
parseit
(
buf
,
curpmode
,
newpos
,
lvl
)
if
not
data
or
type
(
data
[
-
1
])
is
not
StringType
:
raise
error
,
"missing 'end'"
+
lle
(
lvl
,
buf
,
where
)
\
+
epsilon
(
buf
,
newpos
)
retenv
=
data
[
-
1
]
del
data
[
-
1
]
if
retenv
!=
envname
:
#[`retenv`, `envname`]
raise
error
,
'environments do not match.%s%s'
\
%
(
lle
(
lvl
,
buf
,
where
),
epsilon
(
buf
,
newpos
))
result
.
append
(
chunk
(
ENV
,
where
,
(
retenv
,
data
)))
else
:
# 'end'... append the environment name, as just
# pointed out, and order parsit to return...
result
.
append
(
envname
)
##print 'POINT of return: ' + epsilon(buf, newpos)
# the tuple will be returned by parseit
return
(
newpos
,
result
),
newpos
# end of \begin ... \end handling
elif
s_buf_data
[
0
:
2
]
==
'if'
:
# another scary monster: the 'if' directive
flag
=
s_buf_data
[
2
:]
# recursively call parseit, just like environment above..
# the last item of data should contain the if-termination
# e.g., 'else' of 'fi'
newpos
,
data
=
parseit
(
buf
,
curpmode
,
newpos
,
lvl
)
if
not
data
or
data
[
-
1
]
not
in
(
'else'
,
'fi'
):
raise
error
,
'wrong if... termination'
+
\
lle
(
lvl
,
buf
,
where
)
+
epsilon
(
buf
,
newpos
)
ifterm
=
data
[
-
1
]
del
data
[
-
1
]
# 0 means dont_negate flag
result
.
append
(
chunk
(
IF
,
where
,
(
flag
,
0
,
data
)))
if
ifterm
==
'else'
:
# do the whole thing again, there is only one way
# to end this one, by 'fi'
newpos
,
data
=
parseit
(
buf
,
curpmode
,
newpos
,
lvl
)
if
not
data
or
data
[
-
1
]
not
in
(
'fi'
,
):
raise
error
,
'wrong if...else... termination'
\
+
lle
(
lvl
,
buf
,
where
)
\
+
epsilon
(
buf
,
newpos
)
ifterm
=
data
[
-
1
]
del
data
[
-
1
]
result
.
append
(
chunk
(
IF
,
where
,
(
flag
,
1
,
data
)))
#done implicitely: return None, newpos
elif
s_buf_data
in
(
'else'
,
'fi'
):
result
.
append
(
s
(
buf
,
data
))
# order calling party to return tuple
return
(
newpos
,
result
),
newpos
# end of \if, \else, ... \fi handling
elif
s
(
buf
,
saveddata
)
==
'verb'
:
x2
=
saveddata
[
1
]
result
.
append
(
chunk
(
CSNAME
,
where
,
data
))
if
x2
==
end
:
raise
error
,
'premature end of command.'
+
lle
(
lvl
,
buf
,
where
)
delimchar
=
buf
[
x2
]
##print 'VERB: delimchar ' + `delimchar`
pos
=
regex
.
compile
(
un_re
(
delimchar
)).
search
(
buf
,
x2
+
1
)
if
pos
<
0
:
raise
error
,
'end of
\
'
verb
\
'
argument ('
+
\
`delimchar`
+
') not found.'
+
\
lle
(
lvl
,
buf
,
where
)
result
.
append
(
chunk
(
GROUP
,
x2
,
[
chunk
(
PLAIN
,
x2
+
1
,
(
x2
+
1
,
pos
))]))
newpos
=
pos
+
1
else
:
result
.
append
(
chunk
(
CSNAME
,
where
,
data
))
return
None
,
newpos
# this is just a function to get the string value if the possible data-tuple
def
s
(
buf
,
data
):
if
type
(
data
)
is
StringType
:
return
data
if
len
(
data
)
!=
2
or
not
(
type
(
data
[
0
])
is
type
(
data
[
1
])
is
IntType
):
raise
TypeError
,
'expected tuple of 2 integers'
x1
,
x2
=
data
return
buf
[
x1
:
x2
]
##length, data1, i = getnextarg(length, buf, pp, i + 1)
# make a deep-copy of some chunks
def
crcopy
(
r
):
return
map
(
chunkcopy
,
r
)
# copy a chunk, would better be a method of class Chunk...
def
chunkcopy
(
ch
):
if
ch
.
chtype
==
chunk_type
[
GROUP
]:
return
chunk
(
GROUP
,
ch
.
where
,
map
(
chunkcopy
,
ch
.
data
))
else
:
return
chunk
(
ch
.
chtype
,
ch
.
where
,
ch
.
data
)
# get next argument for TeX-macro, flatten a group (insert between)
# or return Command Sequence token, or give back one character
def
getnextarg
(
length
,
buf
,
pp
,
item
):
##wobj = Wobj()
##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
while
item
<
length
and
pp
[
item
].
chtype
==
chunk_type
[
ENDLINE
]:
del
pp
[
item
]
length
=
length
-
1
if
item
>=
length
:
raise
error
,
'no next arg.'
+
epsilon
(
buf
,
pp
[
-
1
].
where
)
if
pp
[
item
].
chtype
==
chunk_type
[
GROUP
]:
newpp
=
pp
[
item
].
data
del
pp
[
item
]
length
=
length
-
1
changeit
(
buf
,
newpp
)
length
=
length
+
len
(
newpp
)
pp
[
item
:
item
]
=
newpp
item
=
item
+
len
(
newpp
)
if
len
(
newpp
)
<
10
:
wobj
=
Wobj
()
dumpit
(
buf
,
wobj
.
write
,
newpp
)
##print 'GETNEXTARG: inserted ' + `wobj.data`
return
length
,
item
elif
pp
[
item
].
chtype
==
chunk_type
[
PLAIN
]:
#grab one char
print
'WARNING: grabbing one char'
if
len
(
s
(
buf
,
pp
[
item
].
data
))
>
1
:
pp
.
insert
(
item
,
chunk
(
PLAIN
,
pp
[
item
].
where
,
s
(
buf
,
pp
[
item
].
data
)[:
1
]))
item
,
length
=
item
+
1
,
length
+
1
pp
[
item
].
data
=
s
(
buf
,
pp
[
item
].
data
)[
1
:]
else
:
item
=
item
+
1
return
length
,
item
else
:
ch
=
pp
[
item
]
try
:
str
=
`s(buf, ch.data)`
except
TypeError
:
str
=
`ch.data`
if
len
(
str
)
>
400
:
str
=
str
[:
400
]
+
'...'
print
'GETNEXTARG:'
,
ch
.
chtype
,
'not handled, data '
+
str
return
length
,
item
# this one is needed to find the end of LaTeX's optional argument, like
# item[...]
re_endopt
=
regex
.
compile
(
']'
)
# get a LaTeX-optional argument, you know, the square braces '[' and ']'
def
getoptarg
(
length
,
buf
,
pp
,
item
):
wobj
=
Wobj
()
dumpit
(
buf
,
wobj
.
write
,
pp
[
item
:
min
(
length
,
item
+
5
)])
##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
if
item
>=
length
or
\
pp
[
item
].
chtype
!=
chunk_type
[
PLAIN
]
or
\
s
(
buf
,
pp
[
item
].
data
)[
0
]
!=
'['
:
return
length
,
item
pp
[
item
].
data
=
s
(
buf
,
pp
[
item
].
data
)[
1
:]
if
len
(
pp
[
item
].
data
)
==
0
:
del
pp
[
item
]
length
=
length
-
1
while
1
:
if
item
==
length
:
raise
error
,
'No end of optional arg found'
if
pp
[
item
].
chtype
==
chunk_type
[
PLAIN
]:
text
=
s
(
buf
,
pp
[
item
].
data
)
pos
=
re_endopt
.
search
(
text
)
if
pos
>=
0
:
pp
[
item
].
data
=
text
[:
pos
]
if
pos
==
0
:
del
pp
[
item
]
length
=
length
-
1
else
:
item
=
item
+
1
text
=
text
[
pos
+
1
:]
while
text
and
text
[
0
]
in
'
\
t
'
:
text
=
text
[
1
:]
if
text
:
pp
.
insert
(
item
,
chunk
(
PLAIN
,
0
,
text
))
length
=
length
+
1
return
length
,
item
item
=
item
+
1
# Wobj just add write-requests to the ``data'' attribute
class
Wobj
:
data
=
''
def
write
(
self
,
data
):
self
.
data
=
self
.
data
+
data
# ignore these commands
ignoredcommands
=
(
'hline'
,
'small'
,
'/'
,
'tableofcontents'
,
'Large'
)
# map commands like these to themselves as plaintext
wordsselves
=
(
'UNIX'
,
'ABC'
,
'C'
,
'ASCII'
,
'EOF'
,
'LaTeX'
,
'POSIX'
,
'TeX'
,
'SliTeX'
)
# \{ --> {, \} --> }, etc
themselves
=
(
'{'
,
'}'
,
','
,
'.'
,
'@'
,
' '
,
'
\
n
'
)
+
wordsselves
# these ones also themselves (see argargs macro in myformat.sty)
inargsselves
=
(
','
,
'['
,
']'
,
'('
,
')'
)
# this is how *I* would show the difference between emph and strong
# code 1 means: fold to uppercase
markcmds
=
{
'code'
:
(
''
,
''
),
'var'
:
1
,
'emph'
:
(
'_'
,
'_'
),
'strong'
:
(
'*'
,
'*'
)}
# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
fontchanges
=
{
'rm'
:
'r'
,
'it'
:
'i'
,
'em'
:
'emph'
,
'bf'
:
'b'
,
'tt'
:
't'
}
# try to remove macros and return flat text
def
flattext
(
buf
,
pp
):
pp
=
crcopy
(
pp
)
##print '---> FLATTEXT ' + `pp`
wobj
=
Wobj
()
i
,
length
=
0
,
len
(
pp
)
while
1
:
if
len
(
pp
)
!=
length
:
raise
'FATAL'
,
'inconsistent length'
if
i
>=
length
:
break
ch
=
pp
[
i
]
i
=
i
+
1
if
ch
.
chtype
==
chunk_type
[
PLAIN
]:
pass
elif
ch
.
chtype
==
chunk_type
[
CSNAME
]:
s_buf_data
=
s
(
buf
,
ch
.
data
)
if
convertible_csname
(
s_buf_data
):
ch
.
chtype
,
ch
.
data
,
nix
=
conversion
(
s_buf_data
)
if
hist
.
inargs
and
s_buf_data
in
inargsselves
:
ch
.
chtype
=
chunk_type
[
PLAIN
]
elif
len
(
s_buf_data
)
==
1
\
and
s_buf_data
in
onlylatexspecial
:
ch
.
chtype
=
chunk_type
[
PLAIN
]
# if it is followed by an empty group,
# remove that group, it was needed for
# a true space
if
i
<
length
\
and
pp
[
i
].
chtype
==
chunk_type
[
GROUP
]
\
and
len
(
pp
[
i
].
data
)
==
0
:
del
pp
[
i
]
length
=
length
-
1
elif
s_buf_data
in
markcmds
.
keys
():
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
str
=
flattext
(
buf
,
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ch
.
chtype
=
chunk_type
[
PLAIN
]
markcmd
=
s_buf_data
x
=
markcmds
[
markcmd
]
if
type
(
x
)
==
TupleType
:
pre
,
after
=
x
str
=
pre
+
str
+
after
elif
x
==
1
:
str
=
string
.
upper
(
str
)
else
:
raise
'FATAL'
,
'corrupt markcmds'
ch
.
data
=
str
else
:
if
s_buf_data
not
in
ignoredcommands
:
print
'WARNING: deleting command '
+
s_buf_data
print
'PP'
+
`pp[i-1]`
del
pp
[
i
-
1
]
i
,
length
=
i
-
1
,
length
-
1
elif
ch
.
chtype
==
chunk_type
[
GROUP
]:
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
-
1
)
i
=
i
-
1
## str = flattext(buf, crcopy(pp[i-1:newi]))
## del pp[i:newi]
## length = length - (newi - i)
## ch.chtype = chunk_type[PLAIN]
## ch.data = str
else
:
pass
dumpit
(
buf
,
wobj
.
write
,
pp
)
##print 'FLATTEXT: RETURNING ' + `wobj.data`
return
wobj
.
data
# try to generate node names (a bit shorter than the chapter title)
# note that the \nodename command (see elsewhere) overules these efforts
def
invent_node_names
(
text
):
words
=
string
.
split
(
text
)
##print 'WORDS ' + `words`
if
len
(
words
)
==
2
\
and
string
.
lower
(
words
[
0
])
==
'built-in'
\
and
string
.
lower
(
words
[
1
])
not
in
(
'modules'
,
'functions'
):
return
words
[
1
]
if
len
(
words
)
==
3
and
string
.
lower
(
words
[
1
])
==
'module'
:
return
words
[
2
]
if
len
(
words
)
==
3
and
string
.
lower
(
words
[
1
])
==
'object'
:
return
string
.
join
(
words
[
0
:
2
])
if
len
(
words
)
>
4
\
and
(
string
.
lower
(
string
.
join
(
words
[
-
4
:]))
\
==
'methods and data attributes'
):
return
string
.
join
(
words
[:
2
])
return
text
re_commas_etc
=
regex
.
compile
(
'[,`
\
'
@{}]'
)
re_whitespace
=
regex
.
compile
(
'[
\
t
]*'
)
##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
# look if the next non-white stuff is also a command, resulting in skipping
# double endlines (DENDLINE) too, and thus omitting \par's
# Sometimes this is too much, maybe consider DENDLINE's as stop
def
next_command_p
(
length
,
buf
,
pp
,
i
,
cmdname
):
while
1
:
if
i
>=
len
(
pp
):
break
ch
=
pp
[
i
]
i
=
i
+
1
if
ch
.
chtype
==
chunk_type
[
ENDLINE
]:
continue
if
ch
.
chtype
==
chunk_type
[
DENDLINE
]:
continue
if
ch
.
chtype
==
chunk_type
[
PLAIN
]:
if
re_whitespace
.
search
(
s
(
buf
,
ch
.
data
))
==
0
and
\
re_whitespace
.
match
(
s
(
buf
,
ch
.
data
))
==
len
(
s
(
buf
,
ch
.
data
)):
continue
return
-
1
if
ch
.
chtype
==
chunk_type
[
CSNAME
]:
if
s
(
buf
,
ch
.
data
)
==
cmdname
:
return
i
# _after_ the command
return
-
1
return
-
1
# things that are special to LaTeX, but not to texi..
onlylatexspecial
=
'_~^$#&%'
class
Struct
:
pass
hist
=
Struct
()
out
=
Struct
()
def
startchange
():
global
hist
,
out
hist
.
chaptertype
=
"chapter"
hist
.
inenv
=
[]
hist
.
nodenames
=
[]
hist
.
cindex
=
[]
hist
.
inargs
=
0
hist
.
enumeratenesting
,
hist
.
itemizenesting
=
0
,
0
hist
.
this_module
=
None
out
.
doublenodes
=
[]
out
.
doublecindeces
=
[]
spacech
=
[
chunk
(
PLAIN
,
0
,
' '
)]
commach
=
[
chunk
(
PLAIN
,
0
,
', '
)]
cindexch
=
[
chunk
(
CSLINE
,
0
,
'cindex'
)]
# the standard variation in symbols for itemize
itemizesymbols
=
[
'bullet'
,
'minus'
,
'dots'
]
# same for enumerate
enumeratesymbols
=
[
'1'
,
'A'
,
'a'
]
# Map of things that convert one-to-one. Each entry is a 3-tuple:
#
# new_chtype, new_data, nix_trailing_empty_group
#
d
=
{}
# add stuff that converts from one name to another:
for
name
in
(
'url'
,
'module'
,
'function'
,
'cfunction'
,
'keyword'
,
'method'
,
'exception'
,
'constant'
,
'email'
,
'class'
,
'member'
,
'cdata'
,
'ctype'
,
'member'
,
'sectcode'
,
'verb'
,
'cfunction'
,
'cdata'
,
'ctype'
,
):
d
[
name
]
=
chunk_type
[
CSNAME
],
'code'
,
0
for
name
in
(
'emph'
,
'var'
,
'strong'
,
'code'
,
'kbd'
,
'key'
,
'dfn'
,
'samp'
,
'file'
,
'r'
,
'i'
,
't'
):
d
[
name
]
=
chunk_type
[
CSNAME
],
name
,
0
d
[
'character'
]
=
chunk_type
[
CSNAME
],
'samp'
,
0
d
[
'url'
]
=
chunk_type
[
CSNAME
],
'code'
,
0
d
[
'email'
]
=
chunk_type
[
CSNAME
],
'code'
,
0
d
[
'mimetype'
]
=
chunk_type
[
CSNAME
],
'code'
,
0
d
[
'newsgroup'
]
=
chunk_type
[
CSNAME
],
'code'
,
0
d
[
'program'
]
=
chunk_type
[
CSNAME
],
'strong'
,
0
d
[
'
\
\
'
]
=
chunk_type
[
CSNAME
],
'*'
,
0
# add stuff that converts to text:
for
name
in
themselves
:
d
[
name
]
=
chunk_type
[
PLAIN
],
name
,
0
for
name
in
wordsselves
:
d
[
name
]
=
chunk_type
[
PLAIN
],
name
,
1
for
name
in
',[]()'
:
d
[
name
]
=
chunk_type
[
PLAIN
],
name
,
0
# a lot of these are LaTeX2e additions
for
name
,
value
in
[(
'quotedblbase'
,
',,'
),
(
'quotesinglbase'
,
','
),
(
'textquotedbl'
,
'"'
),
(
'LaTeXe'
,
'LaTeX2e'
),
(
'e'
,
'
\
\
'
),
(
'textquotedblleft'
,
"``"
),
(
'textquotedblright'
,
"''"
),
(
'textquoteleft'
,
"`"
),
(
'textquoteright'
,
"'"
),
(
'textbackslash'
,
'
\
\
'
),
(
'textbar'
,
'|'
),
(
'textless'
,
'<'
),
(
'textgreater'
,
'>'
),
(
'textasciicircum'
,
'^'
),
(
'Cpp'
,
'C++'
),
(
'copyright'
,
''
)]:
d
[
name
]
=
chunk_type
[
PLAIN
],
value
,
1
convertible_csname
=
d
.
has_key
conversion
=
d
.
get
del
d
,
name
,
value
##
## \begin{ {func,data,exc}desc }{name}...
## the resulting texi-code is dependent on the contents of indexsubitem
##
# indexsubitem: `['XXX', 'function']
# funcdesc:
# deffn {`idxsi`} NAME (FUNCARGS)
# indexsubitem: `['XXX', 'method']`
# funcdesc:
# defmethod {`idxsi[0]`} NAME (FUNCARGS)
# indexsubitem: `['in', 'module', 'MODNAME']'
# datadesc:
# defcv data {`idxsi[1:]`} NAME
# excdesc:
# defcv exception {`idxsi[1:]`} NAME
# funcdesc:
# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
# indexsubitem: `['OBJECT', 'attribute']'
# datadesc
# defcv attribute {`OBJECT`} NAME
## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
## or \funcline{NAME}{ARGS}
##
def
do_funcdesc
(
length
,
buf
,
pp
,
i
,
index
=
1
):
startpoint
=
i
-
1
ch
=
pp
[
startpoint
]
wh
=
ch
.
where
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
funcname
=
chunk
(
GROUP
,
wh
,
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
save
=
hist
.
inargs
hist
.
inargs
=
1
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
hist
.
inargs
=
save
del
save
the_args
=
[
chunk
(
PLAIN
,
wh
,
'()'
[
0
])]
+
pp
[
i
:
newi
]
+
\
[
chunk
(
PLAIN
,
wh
,
'()'
[
1
])]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
idxsi
=
hist
.
indexsubitem
# words
command
=
'deffn'
if
hist
.
this_module
:
cat_class
=
'function of '
+
hist
.
this_module
else
:
cat_class
=
'built-in function'
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
command
cslinearg
=
[
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
cat_class
)])]
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
cslinearg
.
append
(
funcname
)
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
l
=
len
(
cslinearg
)
cslinearg
[
l
:
l
]
=
the_args
pp
.
insert
(
i
,
chunk
(
GROUP
,
wh
,
cslinearg
))
i
,
length
=
i
+
1
,
length
+
1
hist
.
command
=
command
return
length
,
i
## this routine will be called on \begin{excdesc}{NAME}
## or \excline{NAME}
##
def
do_excdesc
(
length
,
buf
,
pp
,
i
):
startpoint
=
i
-
1
ch
=
pp
[
startpoint
]
wh
=
ch
.
where
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
excname
=
chunk
(
GROUP
,
wh
,
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
idxsi
=
hist
.
indexsubitem
# words
command
=
''
cat_class
=
''
class_class
=
''
if
idxsi
==
[
'built-in'
,
'exception'
,
'base'
,
'class'
]:
command
=
'defvr'
cat_class
=
'exception base class'
else
:
command
=
'defcv'
cat_class
=
'exception'
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
command
cslinearg
=
[
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
cat_class
)])]
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
if
class_class
:
cslinearg
.
append
(
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
class_class
)]))
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
cslinearg
.
append
(
excname
)
pp
.
insert
(
i
,
chunk
(
GROUP
,
wh
,
cslinearg
))
i
,
length
=
i
+
1
,
length
+
1
hist
.
command
=
command
return
length
,
i
## same for datadesc or dataline...
def
do_datadesc
(
length
,
buf
,
pp
,
i
,
index
=
1
):
startpoint
=
i
-
1
ch
=
pp
[
startpoint
]
wh
=
ch
.
where
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
dataname
=
chunk
(
GROUP
,
wh
,
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
idxsi
=
hist
.
indexsubitem
# words
command
=
'defcv'
cat_class
=
'data'
class_class
=
''
if
idxsi
[
-
1
]
in
(
'attribute'
,
'option'
):
cat_class
=
idxsi
[
-
1
]
class_class
=
string
.
join
(
idxsi
[:
-
1
])
elif
len
(
idxsi
)
==
3
and
idxsi
[:
2
]
==
[
'in'
,
'module'
]:
class_class
=
string
.
join
(
idxsi
[
1
:])
elif
len
(
idxsi
)
==
4
and
idxsi
[:
3
]
==
[
'data'
,
'in'
,
'module'
]:
class_class
=
string
.
join
(
idxsi
[
2
:])
else
:
class_class
=
string
.
join
(
idxsi
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
command
cslinearg
=
[
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
cat_class
)])]
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
if
class_class
:
cslinearg
.
append
(
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
class_class
)]))
cslinearg
.
append
(
chunk
(
PLAIN
,
wh
,
' '
))
cslinearg
.
append
(
dataname
)
pp
.
insert
(
i
,
chunk
(
GROUP
,
wh
,
cslinearg
))
i
,
length
=
i
+
1
,
length
+
1
hist
.
command
=
command
return
length
,
i
def
do_opcodedesc
(
length
,
buf
,
pp
,
i
):
startpoint
=
i
-
1
ch
=
pp
[
startpoint
]
wh
=
ch
.
where
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
dataname
=
chunk
(
GROUP
,
wh
,
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ch
.
chtype
=
CSLINE
ch
.
data
=
"deffn"
cslinearg
=
[
chunk
(
PLAIN
,
wh
,
'byte
\
code
\
instruction'
),
chunk
(
GROUP
,
wh
,
[
chunk
(
PLAIN
,
wh
,
"byte code instruction"
)]),
chunk
(
PLAIN
,
wh
,
' '
),
dataname
,
chunk
(
PLAIN
,
wh
,
' '
),
pp
[
i
],
]
pp
[
i
]
=
chunk
(
GROUP
,
wh
,
cslinearg
)
hist
.
command
=
ch
.
data
return
length
,
i
def
add_module_index
(
pp
,
length
,
i
,
buf
,
ch
,
extra
,
ref
=
1
):
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'pindex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
if
not
ref
:
if
len
(
ingroupch
)
==
1
:
hist
.
this_module
=
s
(
buf
,
ch
.
data
)
else
:
hist
.
this_module
=
None
print
'add_module_index() error ==>'
,
ingroupch
if
extra
:
ingroupch
.
append
(
chunk
(
PLAIN
,
ch
.
where
,
' '
))
ingroupch
.
append
(
chunk
(
CSNAME
,
ch
.
where
,
'r'
))
ingroupch
.
append
(
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
extra
)]))
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
return
length
+
1
,
i
+
1
def
yank_indexsubitem
(
pp
,
length
,
i
,
buf
,
ch
,
cmdname
):
stuff
=
pp
[
i
].
data
if
len
(
stuff
)
!=
1
:
raise
error
,
"first parameter to
\
\
%s too long"
%
cmdname
if
pp
[
i
].
chtype
!=
chunk_type
[
GROUP
]:
raise
error
,
"bad chunk type following
\
\
%s"
\
"
\
n
expected GROUP, got %s"
+
(
cmdname
,
str
(
ch
.
chtype
))
text
=
s
(
buf
,
stuff
[
0
].
data
)
if
text
[:
1
]
!=
'('
or
text
[
-
1
:]
!=
')'
:
raise
error
,
\
'expected indexsubitem enclosed in parenteses'
hist
.
indexsubitem
=
string
.
split
(
text
[
1
:
-
1
])
del
pp
[
i
-
1
:
i
+
1
]
return
length
-
2
,
i
-
1
# regular indices: those that are not set in tt font by default....
regindices
=
(
'cindex'
,
)
# remove illegal characters from node names
def
rm_commas_etc
(
text
):
result
=
''
changed
=
0
while
1
:
pos
=
re_commas_etc
.
search
(
text
)
if
pos
>=
0
:
changed
=
1
result
=
result
+
text
[:
pos
]
text
=
text
[
pos
+
1
:]
else
:
result
=
result
+
text
break
if
changed
:
print
'Warning: nodename changed to '
+
`result`
return
result
# boolean flags
flags
=
{
'texi'
:
1
}
# map of \label{} to node names
label_nodes
=
{}
##
## changeit: the actual routine, that changes the contents of the parsed
## chunks
##
def
changeit
(
buf
,
pp
):
global
onlylatexspecial
,
hist
,
out
i
,
length
=
0
,
len
(
pp
)
while
1
:
# sanity check: length should always equal len(pp)
if
len
(
pp
)
!=
length
:
print
i
,
pp
[
i
]
raise
'FATAL'
,
'inconsistent length. thought '
+
`length`
+
', but should really be '
+
`len(pp)`
if
i
>=
length
:
break
ch
=
pp
[
i
]
i
=
i
+
1
if
type
(
ch
)
is
StringType
:
#normally, only chunks are present in pp,
# but in some cases, some extra info
# has been inserted, e.g., the \end{...} clauses
raise
'FATAL'
,
'got string, probably too many '
+
`end`
if
ch
.
chtype
==
chunk_type
[
GROUP
]:
# check for {\em ...} constructs
data
=
ch
.
data
if
data
and
\
data
[
0
].
chtype
==
chunk_type
[
CSNAME
]
and
\
fontchanges
.
has_key
(
s
(
buf
,
data
[
0
].
data
)):
k
=
s
(
buf
,
data
[
0
].
data
)
del
data
[
0
]
pp
.
insert
(
i
-
1
,
chunk
(
CSNAME
,
ch
.
where
,
fontchanges
[
k
]))
length
,
i
=
length
+
1
,
i
+
1
elif
data
:
if
len
(
data
)
\
and
data
[
0
].
chtype
==
chunk_type
[
GROUP
]
\
and
len
(
data
[
0
].
data
)
\
and
data
[
0
].
data
[
0
].
chtype
==
chunk_type
[
CSNAME
]
\
and
s
(
buf
,
data
[
0
].
data
[
0
].
data
)
==
'e'
:
data
[
0
]
=
data
[
0
].
data
[
0
]
print
"invoking
\
\
e magic group transform..."
else
:
## print "GROUP -- ch.data[0].data =", ch.data[0].data
k
=
s
(
buf
,
data
[
0
].
data
)
if
k
==
"fulllineitems"
:
del
data
[
0
]
pp
[
i
-
1
:
i
]
=
data
i
=
i
-
1
length
=
length
+
len
(
data
)
-
1
continue
# recursively parse the contents of the group
changeit
(
buf
,
data
)
elif
ch
.
chtype
==
chunk_type
[
IF
]:
# \if...
flag
,
negate
,
data
=
ch
.
data
##print 'IF: flag, negate = ' + `flag, negate`
if
flag
not
in
flags
.
keys
():
raise
error
,
'unknown flag '
+
`flag`
value
=
flags
[
flag
]
if
negate
:
value
=
(
not
value
)
del
pp
[
i
-
1
]
length
,
i
=
length
-
1
,
i
-
1
if
value
:
pp
[
i
:
i
]
=
data
length
=
length
+
len
(
data
)
elif
ch
.
chtype
==
chunk_type
[
ENV
]:
# \begin{...} ....
envname
,
data
=
ch
.
data
#push this environment name on stack
hist
.
inenv
.
insert
(
0
,
envname
)
#append an endenv chunk after grouped data
data
.
append
(
chunk
(
ENDENV
,
ch
.
where
,
envname
))
##[`data`]
#delete this object
del
pp
[
i
-
1
]
i
,
length
=
i
-
1
,
length
-
1
#insert found data
pp
[
i
:
i
]
=
data
length
=
length
+
len
(
data
)
if
envname
==
'verbatim'
:
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'example'
),
chunk
(
GROUP
,
ch
.
where
,
[])]
length
,
i
=
length
+
2
,
i
+
2
elif
envname
in
(
'itemize'
,
'list'
,
'fulllineitems'
):
if
hist
.
itemizenesting
>
len
(
itemizesymbols
):
raise
error
,
'too deep itemize nesting'
if
envname
==
'list'
:
del
pp
[
i
:
i
+
2
]
length
=
length
-
2
ingroupch
=
[
chunk
(
CSNAME
,
ch
.
where
,
itemizesymbols
[
hist
.
itemizenesting
])]
hist
.
itemizenesting
=
hist
.
itemizenesting
+
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'itemize'
),
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
length
,
i
=
length
+
2
,
i
+
2
elif
envname
==
'enumerate'
:
if
hist
.
enumeratenesting
>
len
(
enumeratesymbols
):
raise
error
,
'too deep enumerate nesting'
ingroupch
=
[
chunk
(
PLAIN
,
ch
.
where
,
enumeratesymbols
[
hist
.
enumeratenesting
])]
hist
.
enumeratenesting
=
hist
.
enumeratenesting
+
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'enumerate'
),
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
length
,
i
=
length
+
2
,
i
+
2
elif
envname
==
'description'
:
ingroupch
=
[
chunk
(
CSNAME
,
ch
.
where
,
'b'
)]
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'table'
),
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
length
,
i
=
length
+
2
,
i
+
2
elif
(
envname
==
'tableiii'
)
or
(
envname
==
'tableii'
):
if
(
envname
==
'tableii'
):
ltable
=
2
else
:
ltable
=
3
wh
=
ch
.
where
newcode
=
[]
#delete tabular format description
# e.g., {|l|c|l|}
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
newcode
.
append
(
chunk
(
CSLINE
,
wh
,
'table'
))
ingroupch
=
[
chunk
(
CSNAME
,
wh
,
'asis'
)]
newcode
.
append
(
chunk
(
GROUP
,
wh
,
ingroupch
))
newcode
.
append
(
chunk
(
CSLINE
,
wh
,
'item'
))
#get the name of macro for @item
# e.g., {code}
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
if
newi
-
i
!=
1
:
raise
error
,
'Sorry, expected 1 chunk argument'
if
pp
[
i
].
chtype
!=
chunk_type
[
PLAIN
]:
raise
error
,
'Sorry, expected plain text argument'
hist
.
itemargmacro
=
s
(
buf
,
pp
[
i
].
data
)
if
convertible_csname
(
hist
.
itemargmacro
):
hist
.
itemargmacro
=
conversion
(
hist
.
itemargmacro
)[
1
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
itembody
=
[]
for
count
in
range
(
ltable
):
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
emphgroup
=
[
chunk
(
CSNAME
,
wh
,
'emph'
),
chunk
(
GROUP
,
0
,
pp
[
i
:
newi
])]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
if
count
==
0
:
itemarg
=
emphgroup
elif
count
==
ltable
-
1
:
itembody
=
itembody
+
\
[
chunk
(
PLAIN
,
wh
,
' --- '
)]
+
emphgroup
else
:
itembody
=
emphgroup
newcode
.
append
(
chunk
(
GROUP
,
wh
,
itemarg
))
newcode
=
newcode
+
itembody
+
[
chunk
(
DENDLINE
,
wh
,
'
\
n
'
)]
pp
[
i
:
i
]
=
newcode
l
=
len
(
newcode
)
length
,
i
=
length
+
l
,
i
+
l
del
newcode
,
l
if
length
!=
len
(
pp
):
raise
'STILL, SOMETHING wrong'
,
`i`
elif
envname
in
(
'methoddesc'
,
'methoddescni'
):
length
,
newi
=
getoptarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
#
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_funcdesc
(
length
,
buf
,
pp
,
i
,
envname
[
-
2
:]
!=
"ni"
)
elif
envname
in
(
'memberdesc'
,
'memberdescni'
):
length
,
newi
=
getoptarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
#
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_datadesc
(
length
,
buf
,
pp
,
i
,
envname
[
-
2
:]
!=
"ni"
)
elif
envname
in
(
'funcdesc'
,
'funcdescni'
,
'classdesc'
):
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_funcdesc
(
length
,
buf
,
pp
,
i
,
envname
[
-
2
:]
!=
"ni"
)
elif
envname
==
'excdesc'
:
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_excdesc
(
length
,
buf
,
pp
,
i
)
elif
envname
in
(
'datadesc'
,
'datadescni'
):
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_datadesc
(
length
,
buf
,
pp
,
i
,
envname
[
-
2
:]
!=
"ni"
)
elif
envname
==
'opcodedesc'
:
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
''
))
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
do_opcodedesc
(
length
,
buf
,
pp
,
i
)
elif
envname
==
'seealso'
:
chunks
=
[
chunk
(
ENDLINE
,
ch
.
where
,
"
\
n
"
),
chunk
(
CSNAME
,
ch
.
where
,
"b"
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
"See also: "
)]),
chunk
(
ENDLINE
,
ch
.
where
,
"
\
n
"
),
chunk
(
ENDLINE
,
ch
.
where
,
"
\
n
"
)]
pp
[
i
-
1
:
i
]
=
chunks
length
=
length
+
len
(
chunks
)
-
1
i
=
i
+
len
(
chunks
)
-
1
elif
envname
in
(
'sloppypar'
,
'flushleft'
,
'document'
):
pass
else
:
print
'WARNING: don
\
'
t know what to do with env '
+
`envname`
elif
ch
.
chtype
==
chunk_type
[
ENDENV
]:
envname
=
ch
.
data
if
envname
!=
hist
.
inenv
[
0
]:
raise
error
,
'
\
'
end
\
'
does not match. Name '
+
`envname`
+
', expected '
+
`hist.inenv[0]`
del
hist
.
inenv
[
0
]
del
pp
[
i
-
1
]
i
,
length
=
i
-
1
,
length
-
1
if
envname
==
'verbatim'
:
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'example'
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
envname
in
(
'itemize'
,
'list'
,
'fulllineitems'
):
hist
.
itemizenesting
=
hist
.
itemizenesting
-
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'itemize'
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
envname
==
'enumerate'
:
hist
.
enumeratenesting
=
hist
.
enumeratenesting
-
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'enumerate'
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
envname
==
'description'
:
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'table'
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
(
envname
==
'tableiii'
)
or
(
envname
==
'tableii'
):
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'table'
)])]
i
,
length
=
i
+
2
,
length
+
2
pp
.
insert
(
i
,
chunk
(
DENDLINE
,
ch
.
where
,
'
\
n
'
))
i
,
length
=
i
+
1
,
length
+
1
elif
envname
in
(
'funcdesc'
,
'excdesc'
,
'datadesc'
,
'classdesc'
,
'funcdescni'
,
'datadescni'
,
'methoddesc'
,
'memberdesc'
,
'methoddescni'
,
'memberdescni'
,
):
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
hist
.
command
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
envname
==
'opcodedesc'
:
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
"deffn"
)])]
i
,
length
=
i
+
2
,
length
+
2
elif
envname
in
(
'seealso'
,
'sloppypar'
,
'flushleft'
,
'document'
):
pass
else
:
print
'WARNING: ending env %s has no actions'
%
`envname`
elif
ch
.
chtype
==
chunk_type
[
CSNAME
]:
# control name transformations
s_buf_data
=
s
(
buf
,
ch
.
data
)
if
s_buf_data
==
'optional'
:
pp
[
i
-
1
].
chtype
=
chunk_type
[
PLAIN
]
pp
[
i
-
1
].
data
=
'['
if
(
i
<
length
)
and
\
(
pp
[
i
].
chtype
==
chunk_type
[
GROUP
]):
cp
=
pp
[
i
].
data
pp
[
i
:
i
+
1
]
=
cp
+
[
chunk
(
PLAIN
,
ch
.
where
,
']'
)]
length
=
length
+
len
(
cp
)
elif
s_buf_data
in
ignoredcommands
:
del
pp
[
i
-
1
]
i
,
length
=
i
-
1
,
length
-
1
elif
s_buf_data
==
'@'
and
\
i
!=
length
and
\
pp
[
i
].
chtype
==
chunk_type
[
PLAIN
]
and
\
s
(
buf
,
pp
[
i
].
data
)[
0
]
==
'.'
:
# \@. --> \. --> @.
ch
.
data
=
'.'
del
pp
[
i
]
length
=
length
-
1
elif
convertible_csname
(
s_buf_data
):
ch
.
chtype
,
ch
.
data
,
nix
=
conversion
(
s_buf_data
)
try
:
if
nix
and
pp
[
i
].
chtype
==
chunk_type
[
GROUP
]
\
and
len
(
pp
[
i
].
data
)
==
0
:
del
pp
[
i
]
length
=
length
-
1
except
IndexError
:
pass
elif
s_buf_data
==
'
\
\
'
:
# \\ --> \* --> @*
ch
.
data
=
'*'
elif
len
(
s_buf_data
)
==
1
and
\
s_buf_data
in
onlylatexspecial
:
ch
.
chtype
=
chunk_type
[
PLAIN
]
# check if such a command is followed by
# an empty group: e.g., `\%{}'. If so, remove
# this empty group too
if
i
<
length
and
\
pp
[
i
].
chtype
==
chunk_type
[
GROUP
]
\
and
len
(
pp
[
i
].
data
)
==
0
:
del
pp
[
i
]
length
=
length
-
1
elif
s_buf_data
==
"appendix"
:
hist
.
chaptertype
=
"appendix"
del
pp
[
i
-
1
]
i
,
length
=
i
-
1
,
length
-
1
elif
hist
.
inargs
and
s_buf_data
in
inargsselves
:
# This is the special processing of the
# arguments of the \begin{funcdesc}... or
# \funcline... arguments
# \, --> , \[ --> [, \] --> ]
ch
.
chtype
=
chunk_type
[
PLAIN
]
elif
s_buf_data
==
'setindexsubitem'
:
length
,
i
=
yank_indexsubitem
(
pp
,
length
,
i
,
buf
,
ch
,
'setindexsubitem'
)
elif
s_buf_data
==
'withsubitem'
:
oldsubitem
=
hist
.
indexsubitem
try
:
length
,
i
=
yank_indexsubitem
(
pp
,
length
,
i
,
buf
,
ch
,
'withsubitem'
)
stuff
=
pp
[
i
].
data
del
pp
[
i
]
length
=
length
-
1
changeit
(
buf
,
stuff
)
stuff
=
None
finally
:
hist
.
indexsubitem
=
oldsubitem
elif
s_buf_data
in
(
'textrm'
,
'pytype'
):
stuff
=
pp
[
i
].
data
pp
[
i
-
1
:
i
+
1
]
=
stuff
length
=
length
-
2
+
len
(
stuff
)
stuff
=
None
i
=
i
-
1
elif
s_buf_data
==
'newcommand'
:
print
"ignoring definition of
\
\
"
+
s
(
buf
,
pp
[
i
].
data
[
0
].
data
)
del
pp
[
i
-
1
:
i
+
2
]
i
=
i
-
1
length
=
length
-
3
elif
s_buf_data
==
'renewcommand'
:
print
"ignoring redefinition of
\
\
"
\
+
s
(
buf
,
pp
[
i
].
data
[
0
].
data
)
del
pp
[
i
-
1
:
i
+
2
]
i
=
i
-
1
length
=
length
-
3
elif
s_buf_data
==
'mbox'
:
stuff
=
pp
[
i
].
data
pp
[
i
-
1
:
i
+
1
]
=
stuff
i
=
i
-
1
length
=
length
+
len
(
stuff
)
-
2
stuff
=
None
elif
s_buf_data
==
'version'
:
ch
.
chtype
=
chunk_type
[
PLAIN
]
ch
.
data
=
release_version
elif
s_buf_data
==
'item'
:
ch
.
chtype
=
chunk_type
[
CSLINE
]
length
,
newi
=
getoptarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
changeit
(
buf
,
ingroupch
)
# catch stuff inside the optional arg
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
i
,
length
=
i
+
1
,
length
+
1
elif
s_buf_data
==
'ttindex'
:
idxsi
=
hist
.
indexsubitem
cat_class
=
''
if
len
(
idxsi
)
>=
2
and
idxsi
[
1
]
in
\
(
'method'
,
'function'
,
'protocol'
):
command
=
'findex'
elif
len
(
idxsi
)
>=
2
and
idxsi
[
1
]
in
\
(
'exception'
,
'object'
):
command
=
'vindex'
elif
len
(
idxsi
)
==
3
and
idxsi
[:
2
]
==
[
'in'
,
'module'
]:
command
=
'cindex'
elif
len
(
idxsi
)
==
3
and
idxsi
[:
2
]
==
[
'class'
,
'in'
]:
command
=
'findex'
else
:
print
'WARNING: can
\
'
t categorize '
+
`idxsi`
\
+
' for
\
'
ttindex
\
'
command'
command
=
'cindex'
if
not
cat_class
:
cat_class
=
'(%s)'
%
string
.
join
(
idxsi
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
command
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
arg
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
cat_arg
=
[
chunk
(
PLAIN
,
ch
.
where
,
cat_class
)]
# determine what should be set in roman, and
# what in tt-font
if
command
in
regindices
:
arg
=
[
chunk
(
CSNAME
,
ch
.
where
,
't'
),
chunk
(
GROUP
,
ch
.
where
,
arg
)]
else
:
cat_arg
=
[
chunk
(
CSNAME
,
ch
.
where
,
'r'
),
chunk
(
GROUP
,
ch
.
where
,
cat_arg
)]
ingroupch
=
arg
+
\
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
\
cat_arg
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
length
,
i
=
length
+
1
,
i
+
1
elif
s_buf_data
==
'ldots'
:
# \ldots --> \dots{} --> @dots{}
ch
.
data
=
'dots'
if
i
==
length
\
or
pp
[
i
].
chtype
!=
chunk_type
[
GROUP
]
\
or
pp
[
i
].
data
!=
[]:
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
[]))
i
,
length
=
i
+
1
,
length
+
1
elif
s_buf_data
in
themselves
:
# \UNIX --> &UNIX;
ch
.
chtype
=
chunk_type
[
PLAIN
]
if
i
!=
length
\
and
pp
[
i
].
chtype
==
chunk_type
[
GROUP
]
\
and
pp
[
i
].
data
==
[]:
del
pp
[
i
]
length
=
length
-
1
elif
s_buf_data
==
'manpage'
:
ch
.
data
=
'emph'
sect
=
s
(
buf
,
pp
[
i
+
1
].
data
[
0
].
data
)
pp
[
i
+
1
].
data
=
"(%s)"
%
sect
pp
[
i
+
1
].
chtype
=
chunk_type
[
PLAIN
]
elif
s_buf_data
==
'envvar'
:
# this should do stuff in the index, too...
ch
.
data
=
"$"
ch
.
chtype
=
chunk_type
[
PLAIN
]
pp
[
i
]
=
pp
[
i
].
data
[
0
]
elif
s_buf_data
==
'regexp'
:
ch
.
data
=
'code'
pp
.
insert
(
i
+
1
,
chunk
(
PLAIN
,
ch
.
where
,
'"'
))
pp
.
insert
(
i
-
1
,
chunk
(
PLAIN
,
ch
.
where
,
'"'
))
length
=
length
+
2
i
=
i
+
1
elif
s_buf_data
in
(
'lineiii'
,
'lineii'
):
# This is the most tricky one
# \lineiii{a1}{a2}[{a3}] -->
# @item @<cts. of itemargmacro>{a1}
# a2 [ -- a3]
#
if
not
hist
.
inenv
:
raise
error
,
'no environment for lineiii'
if
(
hist
.
inenv
[
0
]
!=
'tableiii'
)
and
\
(
hist
.
inenv
[
0
]
!=
'tableii'
):
raise
error
,
\
'wrong command (%s) in wrong environment (%s)'
\
%
(
s_buf_data
,
`hist.inenv[0]`
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'item'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
[
chunk
(
CSNAME
,
0
,
hist
.
itemargmacro
),
chunk
(
GROUP
,
0
,
pp
[
i
:
newi
])]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
grouppos
=
i
i
,
length
=
i
+
1
,
length
+
1
length
,
i
=
getnextarg
(
length
,
buf
,
pp
,
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
if
newi
>
i
:
# we have a 3rd arg
pp
.
insert
(
i
,
chunk
(
PLAIN
,
ch
.
where
,
' --- '
))
i
=
newi
+
1
length
=
length
+
1
if
length
!=
len
(
pp
):
raise
'IN LINEIII IS THE ERR'
,
`i`
elif
s_buf_data
in
(
'chapter'
,
'section'
,
'subsection'
,
'subsubsection'
):
#\xxxsection{A} ---->
# @node A, , ,
# @xxxsection A
## also: remove commas and quotes
hist
.
this_module
=
None
if
s_buf_data
==
"chapter"
:
ch
.
data
=
hist
.
chaptertype
ch
.
chtype
=
chunk_type
[
CSLINE
]
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
afternodenamecmd
=
next_command_p
(
length
,
buf
,
pp
,
newi
,
'nodename'
)
if
afternodenamecmd
<
0
:
cp1
=
crcopy
(
pp
[
i
:
newi
])
pp
[
i
:
newi
]
=
[
chunk
(
GROUP
,
ch
.
where
,
pp
[
i
:
newi
])]
length
,
newi
=
length
-
(
newi
-
i
)
+
1
,
i
+
1
text
=
flattext
(
buf
,
cp1
)
text
=
invent_node_names
(
text
)
else
:
length
,
endarg
=
getnextarg
(
length
,
buf
,
pp
,
afternodenamecmd
)
cp1
=
crcopy
(
pp
[
afternodenamecmd
:
endarg
])
del
pp
[
newi
:
endarg
]
length
=
length
-
(
endarg
-
newi
)
pp
[
i
:
newi
]
=
[
chunk
(
GROUP
,
ch
.
where
,
pp
[
i
:
newi
])]
length
,
newi
=
length
-
(
newi
-
i
)
+
1
,
i
+
1
text
=
flattext
(
buf
,
cp1
)
if
text
[
-
1
]
==
'.'
:
text
=
text
[:
-
1
]
if
text
in
hist
.
nodenames
:
print
'WARNING: node name '
+
`text`
+
' already used'
out
.
doublenodes
.
append
(
text
)
else
:
hist
.
nodenames
.
append
(
text
)
text
=
rm_commas_etc
(
text
)
pp
[
i
-
1
:
i
-
1
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'node'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
text
+
', , ,'
)
])]
i
,
length
=
newi
+
2
,
length
+
2
elif
s_buf_data
==
'funcline'
:
# fold it to a very short environment
pp
[
i
-
1
:
i
-
1
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
hist
.
command
)])]
i
,
length
=
i
+
2
,
length
+
2
length
,
i
=
do_funcdesc
(
length
,
buf
,
pp
,
i
)
elif
s_buf_data
==
'dataline'
:
pp
[
i
-
1
:
i
-
1
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
hist
.
command
)])]
i
,
length
=
i
+
2
,
length
+
2
length
,
i
=
do_datadesc
(
length
,
buf
,
pp
,
i
)
elif
s_buf_data
==
'excline'
:
pp
[
i
-
1
:
i
-
1
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'end'
),
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
hist
.
command
)])]
i
,
length
=
i
+
2
,
length
+
2
length
,
i
=
do_excdesc
(
length
,
buf
,
pp
,
i
)
elif
s_buf_data
==
'index'
:
#\index{A} --->
# @cindex A
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'cindex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
length
,
i
=
length
+
1
,
i
+
1
elif
s_buf_data
==
'bifuncindex'
:
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'findex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ingroupch
.
append
(
chunk
(
PLAIN
,
ch
.
where
,
' '
))
ingroupch
.
append
(
chunk
(
CSNAME
,
ch
.
where
,
'r'
))
ingroupch
.
append
(
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'(built-in function)'
)]))
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
length
,
i
=
length
+
1
,
i
+
1
elif
s_buf_data
==
'obindex'
:
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'findex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ingroupch
.
append
(
chunk
(
PLAIN
,
ch
.
where
,
' '
))
ingroupch
.
append
(
chunk
(
CSNAME
,
ch
.
where
,
'r'
))
ingroupch
.
append
(
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'(object)'
)]))
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
length
,
i
=
length
+
1
,
i
+
1
elif
s_buf_data
==
'opindex'
:
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'findex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ingroupch
.
append
(
chunk
(
PLAIN
,
ch
.
where
,
' '
))
ingroupch
.
append
(
chunk
(
CSNAME
,
ch
.
where
,
'r'
))
ingroupch
.
append
(
chunk
(
GROUP
,
ch
.
where
,
[
chunk
(
PLAIN
,
ch
.
where
,
'(operator)'
)]))
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
length
,
i
=
length
+
1
,
i
+
1
elif
s_buf_data
in
(
'bimodindex'
,
'refbimodindex'
):
length
,
i
=
add_module_index
(
pp
,
length
,
i
,
buf
,
ch
,
'(built-in)'
,
(
s_buf_data
[:
3
]
==
'ref'
))
elif
s_buf_data
in
(
'modindex'
,
'refmodindex'
):
length
,
i
=
add_module_index
(
pp
,
length
,
i
,
buf
,
ch
,
''
,
(
s_buf_data
[:
3
]
==
'ref'
))
elif
s_buf_data
in
(
'stmodindex'
,
'refstmodindex'
):
length
,
i
=
add_module_index
(
pp
,
length
,
i
,
buf
,
ch
,
'(standard)'
,
(
s_buf_data
[:
3
]
==
'ref'
))
elif
s_buf_data
in
(
'exmodindex'
,
'refexmodindex'
):
length
,
i
=
add_module_index
(
pp
,
length
,
i
,
buf
,
ch
,
'(extension)'
,
(
s_buf_data
[:
3
]
==
'ref'
))
elif
s_buf_data
==
'stindex'
:
# XXX must actually go to newindex st
what
=
(
s_buf_data
[:
2
]
==
"st"
)
and
"statement"
or
"keyword"
wh
=
ch
.
where
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'cindex'
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
[
chunk
(
CSNAME
,
wh
,
'code'
),
chunk
(
GROUP
,
wh
,
pp
[
i
:
newi
])]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
t
=
ingroupch
[:]
t
.
append
(
chunk
(
PLAIN
,
wh
,
' '
+
what
))
pp
.
insert
(
i
,
chunk
(
GROUP
,
wh
,
t
))
i
,
length
=
i
+
1
,
length
+
1
pp
.
insert
(
i
,
chunk
(
CSLINE
,
wh
,
'cindex'
))
i
,
length
=
i
+
1
,
length
+
1
t
=
ingroupch
[:]
t
.
insert
(
0
,
chunk
(
PLAIN
,
wh
,
what
+
', '
))
pp
.
insert
(
i
,
chunk
(
GROUP
,
wh
,
t
))
i
,
length
=
i
+
1
,
length
+
1
elif
s_buf_data
==
'indexii'
:
#\indexii{A}{B} --->
# @cindex A B
# @cindex B, A
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp11
=
pp
[
i
:
newi
]
cp21
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp12
=
pp
[
i
:
newi
]
cp22
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'cindex'
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
cp11
+
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
cp12
))
i
,
length
=
i
+
1
,
length
+
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'cindex'
),
chunk
(
GROUP
,
ch
.
where
,
cp22
+
[
chunk
(
PLAIN
,
ch
.
where
,
', '
)]
+
cp21
)]
i
,
length
=
i
+
2
,
length
+
2
elif
s_buf_data
==
'indexiii'
:
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp11
=
pp
[
i
:
newi
]
cp21
=
crcopy
(
pp
[
i
:
newi
])
cp31
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp12
=
pp
[
i
:
newi
]
cp22
=
crcopy
(
pp
[
i
:
newi
])
cp32
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp13
=
pp
[
i
:
newi
]
cp23
=
crcopy
(
pp
[
i
:
newi
])
cp33
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'cindex'
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
cp11
+
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
cp12
+
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
cp13
))
i
,
length
=
i
+
1
,
length
+
1
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'cindex'
),
chunk
(
GROUP
,
ch
.
where
,
cp22
+
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
cp23
+
[
chunk
(
PLAIN
,
ch
.
where
,
', '
)]
+
cp21
)]
i
,
length
=
i
+
2
,
length
+
2
pp
[
i
:
i
]
=
[
chunk
(
CSLINE
,
ch
.
where
,
'cindex'
),
chunk
(
GROUP
,
ch
.
where
,
cp33
+
[
chunk
(
PLAIN
,
ch
.
where
,
', '
)]
+
cp31
+
[
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
cp32
)]
i
,
length
=
i
+
2
,
length
+
2
elif
s_buf_data
==
'indexiv'
:
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp11
=
pp
[
i
:
newi
]
cp21
=
crcopy
(
pp
[
i
:
newi
])
cp31
=
crcopy
(
pp
[
i
:
newi
])
cp41
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp12
=
pp
[
i
:
newi
]
cp22
=
crcopy
(
pp
[
i
:
newi
])
cp32
=
crcopy
(
pp
[
i
:
newi
])
cp42
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp13
=
pp
[
i
:
newi
]
cp23
=
crcopy
(
pp
[
i
:
newi
])
cp33
=
crcopy
(
pp
[
i
:
newi
])
cp43
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
cp14
=
pp
[
i
:
newi
]
cp24
=
crcopy
(
pp
[
i
:
newi
])
cp34
=
crcopy
(
pp
[
i
:
newi
])
cp44
=
crcopy
(
pp
[
i
:
newi
])
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
ch
.
chtype
=
chunk_type
[
CSLINE
]
ch
.
data
=
'cindex'
ingroupch
=
cp11
+
\
spacech
+
cp12
+
\
spacech
+
cp13
+
\
spacech
+
cp14
pp
.
insert
(
i
,
chunk
(
GROUP
,
ch
.
where
,
ingroupch
))
i
,
length
=
i
+
1
,
length
+
1
ingroupch
=
cp22
+
\
spacech
+
cp23
+
\
spacech
+
cp24
+
\
commach
+
cp21
pp
[
i
:
i
]
=
cindexch
+
[
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
i
,
length
=
i
+
2
,
length
+
2
ingroupch
=
cp33
+
\
spacech
+
cp34
+
\
commach
+
cp31
+
\
spacech
+
cp32
pp
[
i
:
i
]
=
cindexch
+
[
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
i
,
length
=
i
+
2
,
length
+
2
ingroupch
=
cp44
+
\
commach
+
cp41
+
\
spacech
+
cp42
+
\
spacech
+
cp43
pp
[
i
:
i
]
=
cindexch
+
[
chunk
(
GROUP
,
ch
.
where
,
ingroupch
)]
i
,
length
=
i
+
2
,
length
+
2
elif
s_buf_data
==
'seemodule'
:
# discard optional arg first:
length
,
newi
=
getoptarg
(
length
,
buf
,
pp
,
i
)
ingroupch
=
pp
[
i
:
newi
]
del
pp
[
i
:
newi
]
length
=
length
-
(
newi
-
i
)
#
ch
.
data
=
"code"
data
=
pp
[
i
+
1
].
data
data
.
insert
(
0
,
chunk
(
PLAIN
,
ch
.
where
,
" ("
))
data
.
append
(
chunk
(
PLAIN
,
ch
.
where
,
")"
))
pp
[
i
+
1
:
i
+
2
]
=
data
length
=
length
+
len
(
data
)
-
1
elif
s_buf_data
==
'seetext'
:
data
=
pp
[
i
].
data
data
.
insert
(
0
,
chunk
(
ENDLINE
,
ch
.
where
,
"
\
n
"
))
pp
[
i
-
1
:
i
+
1
]
=
data
i
=
i
-
1
length
=
length
+
len
(
data
)
-
2
elif
s_buf_data
==
'deprecated'
:
length
,
newi
=
getnextarg
(
length
,
buf
,
pp
,
i
)
version
=
pp
[
i
:
newi
][
0
]
length
,
newi2
=
getnextarg
(
length
,
buf
,
pp
,
newi
)
action
=
pp
[
newi
:
newi2
]
del
pp
[
i
-
1
:
newi2
]
length
=
length
-
(
newi2
-
i
)
-
1
stuff
=
[
chunk
(
PLAIN
,
ch
.
where
,
'Deprecated since release '
),
version
,
chunk
(
PLAIN
,
ch
.
where
,
'.'
)]
chunks
=
[
chunk
(
CSNAME
,
ch
.
where
,
'strong'
),
chunk
(
GROUP
,
ch
.
where
,
stuff
),
chunk
(
PLAIN
,
ch
.
where
,
' '
)]
+
action
\
+
[
chunk
(
DENDLINE
,
ch
.
where
,
'
\
n
'
)]
stuff
=
None
i
=
i
-
1
pp
[
i
:
i
]
=
chunks
length
=
length
+
len
(
chunks
)
elif
s_buf_data
==
"quad"
:
ch
.
chtype
=
PLAIN
ch
.
data
=
" "
elif
s_buf_data
in
(
'usepackage'
,
'input'
):
del
pp
[
i
-
1
:
i
+
1
]
i
,
length
=
i
-
1
,
length
-
2
elif
s_buf_data
in
(
'noindent'
,
'indexsubitem'
,
'footnote'
):
pass
elif
s_buf_data
==
'label'
:
name
=
s
(
buf
,
pp
[
i
].
data
[
0
].
data
)
del
pp
[
i
-
1
:
i
+
1
]
length
=
length
-
2
i
=
i
-
1
label_nodes
[
name
]
=
hist
.
nodenames
[
-
1
]
elif
s_buf_data
==
'rfc'
:
ch
.
chtype
=
chunk_type
[
PLAIN
]
ch
.
data
=
"RFC "
+
s
(
buf
,
pp
[
i
].
data
[
0
].
data
)
del
pp
[
i
]
length
=
length
-
1
elif
s_buf_data
==
'ref'
:
name
=
s
(
buf
,
pp
[
i
].
data
[
0
].
data
)
if
label_nodes
.
has_key
(
name
):
pp
[
i
].
data
[
0
].
data
=
label_nodes
[
name
]
else
:
pp
[
i
-
1
:
i
+
1
]
=
[
chunk
(
PLAIN
,
ch
.
where
,
"(unknown node reference: %s)"
%
name
)]
length
=
length
-
1
print
"WARNING: unknown node label"
,
`name`
else
:
print
"don't know what to do with keyword "
+
s_buf_data
re_atsign
=
regex
.
compile
(
'[@{}]'
)
re_newline
=
regex
.
compile
(
'
\
n
'
)
def
dumpit
(
buf
,
wm
,
pp
):
global
out
i
,
length
=
0
,
len
(
pp
)
addspace
=
0
while
1
:
if
len
(
pp
)
!=
length
:
raise
'FATAL'
,
'inconsistent length'
if
i
==
length
:
break
ch
=
pp
[
i
]
i
=
i
+
1
dospace
=
addspace
addspace
=
0
if
ch
.
chtype
==
chunk_type
[
CSNAME
]:
s_buf_data
=
s
(
buf
,
ch
.
data
)
## if s_buf_data == 'e':
## wm('\\')
## continue
## if s_buf_data == '$':
## wm('$')
## continue
wm
(
'@'
+
s_buf_data
)
if
s_buf_data
==
'node'
and
\
pp
[
i
].
chtype
==
chunk_type
[
PLAIN
]
and
\
s
(
buf
,
pp
[
i
].
data
)
in
out
.
doublenodes
:
##XXX doesnt work yet??
wm
(
' ZZZ-'
+
zfill
(
`i`
,
4
))
if
s_buf_data
[
0
]
in
string
.
letters
:
addspace
=
1
elif
ch
.
chtype
==
chunk_type
[
PLAIN
]:
if
dospace
and
s
(
buf
,
ch
.
data
)
not
in
(
' '
,
'
\
t
'
):
wm
(
' '
)
text
=
s
(
buf
,
ch
.
data
)
while
1
:
pos
=
re_atsign
.
search
(
text
)
if
pos
<
0
:
break
wm
(
text
[:
pos
]
+
'@'
+
text
[
pos
])
text
=
text
[
pos
+
1
:]
wm
(
text
)
elif
ch
.
chtype
==
chunk_type
[
GROUP
]:
wm
(
'{'
)
dumpit
(
buf
,
wm
,
ch
.
data
)
wm
(
'}'
)
elif
ch
.
chtype
==
chunk_type
[
DENDLINE
]:
wm
(
'
\
n
\
n
'
)
while
i
!=
length
and
pp
[
i
].
chtype
in
\
(
chunk_type
[
DENDLINE
],
chunk_type
[
ENDLINE
]):
i
=
i
+
1
elif
ch
.
chtype
==
chunk_type
[
OTHER
]:
wm
(
s
(
buf
,
ch
.
data
))
elif
ch
.
chtype
==
chunk_type
[
ACTIVE
]:
wm
(
s
(
buf
,
ch
.
data
))
elif
ch
.
chtype
==
chunk_type
[
ENDLINE
]:
wm
(
'
\
n
'
)
elif
ch
.
chtype
==
chunk_type
[
CSLINE
]:
if
i
>=
2
and
pp
[
i
-
2
].
chtype
not
in
\
(
chunk_type
[
ENDLINE
],
chunk_type
[
DENDLINE
])
\
and
(
pp
[
i
-
2
].
chtype
!=
chunk_type
[
PLAIN
]
or
s
(
buf
,
pp
[
i
-
2
].
data
)[
-
1
]
!=
'
\
n
'
):
wm
(
'
\
n
'
)
wm
(
'@'
+
s
(
buf
,
ch
.
data
))
if
i
==
length
:
raise
error
,
'CSLINE expected another chunk'
if
pp
[
i
].
chtype
!=
chunk_type
[
GROUP
]:
raise
error
,
'CSLINE expected GROUP'
if
type
(
pp
[
i
].
data
)
!=
ListType
:
raise
error
,
'GROUP chould contain []-data'
wobj
=
Wobj
()
dumpit
(
buf
,
wobj
.
write
,
pp
[
i
].
data
)
i
=
i
+
1
text
=
wobj
.
data
del
wobj
if
text
:
wm
(
' '
)
while
1
:
pos
=
re_newline
.
search
(
text
)
if
pos
<
0
:
break
# these seem to be completely harmless, so don't warn:
## print 'WARNING: found newline in csline arg (%s)' \
## % s(buf, ch.data)
wm
(
text
[:
pos
]
+
' '
)
text
=
text
[
pos
+
1
:]
wm
(
text
)
if
i
>=
length
or
\
pp
[
i
].
chtype
not
in
(
chunk_type
[
CSLINE
],
chunk_type
[
ENDLINE
],
chunk_type
[
DENDLINE
])
\
and
(
pp
[
i
].
chtype
!=
chunk_type
[
PLAIN
]
or
s
(
buf
,
pp
[
i
].
data
)[
0
]
!=
'
\
n
'
):
wm
(
'
\
n
'
)
elif
ch
.
chtype
==
chunk_type
[
COMMENT
]:
if
s
(
buf
,
ch
.
data
)
and
\
regex
.
match
(
'^[
\
t
]*$'
,
s
(
buf
,
ch
.
data
))
<
0
:
if
i
>=
2
\
and
pp
[
i
-
2
].
chtype
not
in
(
chunk_type
[
ENDLINE
],
chunk_type
[
DENDLINE
])
\
and
not
(
pp
[
i
-
2
].
chtype
==
chunk_type
[
PLAIN
]
and
regex
.
match
(
'
\
\
(.
\
\
|
\
n
\
\
)*[
\
t
]*
\
n
$'
,
s
(
buf
,
pp
[
i
-
2
].
data
))
>=
0
):
wm
(
'
\
n
'
)
wm
(
'@c '
+
s
(
buf
,
ch
.
data
))
elif
ch
.
chtype
==
chunk_type
[
IGNORE
]:
pass
else
:
try
:
str
=
`s(buf, ch.data)`
except
TypeError
:
str
=
`ch.data`
if
len
(
str
)
>
400
:
str
=
str
[:
400
]
+
'...'
print
'warning:'
,
ch
.
chtype
,
'not handled, data '
+
str
def
main
():
global
release_version
outfile
=
None
headerfile
=
'texipre.dat'
trailerfile
=
'texipost.dat'
try
:
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
'o:h:t:v:'
)
except
getopt
.
error
:
args
=
[]
if
not
args
:
print
'usage: partparse [-o outfile] [-h headerfile]'
,
print
'[-t trailerfile] file ...'
sys
.
exit
(
2
)
for
opt
,
arg
in
opts
:
if
opt
==
'-o'
:
outfile
=
arg
if
opt
==
'-h'
:
headerfile
=
arg
if
opt
==
'-t'
:
trailerfile
=
arg
if
opt
==
'-v'
:
release_version
=
arg
if
not
outfile
:
root
,
ext
=
os
.
path
.
splitext
(
args
[
0
])
outfile
=
root
+
'.texi'
if
outfile
in
args
:
print
'will not overwrite input file'
,
outfile
sys
.
exit
(
2
)
outf
=
open
(
outfile
,
'w'
)
outf
.
write
(
open
(
headerfile
,
'r'
).
read
())
for
file
in
args
:
if
len
(
args
)
>
1
:
print
'='
*
20
,
file
,
'='
*
20
buf
=
open
(
file
,
'r'
).
read
()
chunk
.
buf
=
buf
w
,
pp
=
parseit
(
buf
)
startchange
()
changeit
(
buf
,
pp
)
dumpit
(
buf
,
outf
.
write
,
pp
)
outf
.
write
(
open
(
trailerfile
,
'r'
).
read
())
outf
.
close
()
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment