Commit 21e9e894 authored by Fred Drake's avatar Fred Drake

Make this do the right thing with entries which start with the percent sign,

in response to Skip's comments in SF bug #487165.

Make use of string methods instead of string module functions in most places.
Add (and make the default) a way to collapse symbol entries into a single
"Symbols" section in the generated index.  This is similar to what makeindex
does, but does not include entries beginning with an underscore.
parent 77298d4a
...@@ -8,6 +8,10 @@ import string ...@@ -8,6 +8,10 @@ import string
import sys import sys
bang_join = "!".join
null_join = "".join
class Node: class Node:
__rmjunk = re.compile("<#\d+#>") __rmjunk = re.compile("<#\d+#>")
...@@ -38,15 +42,15 @@ class Node: ...@@ -38,15 +42,15 @@ class Node:
return c or cmp(self.key, other.key) or cmp(self.text, other.text) return c or cmp(self.key, other.key) or cmp(self.text, other.text)
def __repr__(self): def __repr__(self):
return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
def __str__(self): def __str__(self):
return string.join(self.key, '!') return bang_join(self.key)
def dump(self): def dump(self):
return "%s\1%s###%s\n" \ return "%s\1%s###%s\n" \
% (string.join(self.links, "\1"), % (string.join(self.links, "\1"),
string.join(self.text, '!'), bang_join(self.text),
self.seqno) self.seqno)
...@@ -54,8 +58,8 @@ def cmp_part(s1, s2): ...@@ -54,8 +58,8 @@ def cmp_part(s1, s2):
result = cmp(s1, s2) result = cmp(s1, s2)
if result == 0: if result == 0:
return 0 return 0
l1 = string.lower(s1) l1 = s1.lower()
l2 = string.lower(s2) l2 = s2.lower()
minlen = min(len(s1), len(s2)) minlen = min(len(s1), len(s2))
if len(s1) < len(s2) and l1 == l2[:len(s1)]: if len(s1) < len(s2) and l1 == l2[:len(s1)]:
result = -1 result = -1
...@@ -68,8 +72,8 @@ def cmp_part(s1, s2): ...@@ -68,8 +72,8 @@ def cmp_part(s1, s2):
def split_entry(str, which): def split_entry(str, which):
stuff = [] stuff = []
parts = string.split(str, '!') parts = str.split('!')
parts = map(string.split, parts, ['@'] * len(parts)) parts = [part.split('@') for part in parts]
for entry in parts: for entry in parts:
if len(entry) != 1: if len(entry) != 1:
key = entry[which] key = entry[which]
...@@ -88,9 +92,9 @@ def split_entry_key(str): ...@@ -88,9 +92,9 @@ def split_entry_key(str):
for i in range(len(parts)): for i in range(len(parts)):
m = _rmtt.match(parts[i]) m = _rmtt.match(parts[i])
if m: if m:
parts[i] = string.join(m.group(1, 2, 3), '') parts[i] = null_join(m.group(1, 2, 3))
else: else:
parts[i] = string.lower(parts[i]) parts[i] = parts[i].lower()
# remove '()' from the key: # remove '()' from the key:
parts[i] = _rmparens.sub('', parts[i]) parts[i] = _rmparens.sub('', parts[i])
return map(trim_ignored_letters, parts) return map(trim_ignored_letters, parts)
...@@ -100,7 +104,7 @@ def split_entry_text(str): ...@@ -100,7 +104,7 @@ def split_entry_text(str):
if '<' in str: if '<' in str:
m = _rmtt.match(str) m = _rmtt.match(str)
if m: if m:
str = string.join(m.group(1, 2, 3), '') str = null_join(m.group(1, 2, 3))
return split_entry(str, 1) return split_entry(str, 1)
...@@ -121,14 +125,16 @@ def load(fp): ...@@ -121,14 +125,16 @@ def load(fp):
def trim_ignored_letters(s): def trim_ignored_letters(s):
# ignore $ to keep environment variables with the # ignore $ to keep environment variables with the
# leading letter from the name # leading letter from the name
s = string.lower(s) if s.startswith("$"):
if s[0] == "$": return s[1:].lower()
return s[1:]
else: else:
return s return s.lower()
def get_first_letter(s): def get_first_letter(s):
return string.lower(trim_ignored_letters(s)[0]) if s.startswith("<tex2html_percent_mark>"):
return "%"
else:
return trim_ignored_letters(s)[0]
def split_letters(nodes): def split_letters(nodes):
...@@ -149,14 +155,24 @@ def split_letters(nodes): ...@@ -149,14 +155,24 @@ def split_letters(nodes):
return letter_groups return letter_groups
def group_symbols(groups):
entries = []
ident_letters = string.ascii_letters + "_"
while groups[0][0] not in ident_letters:
entries += groups[0][1]
del groups[0]
if entries:
groups.insert(0, ("Symbols", entries))
# need a function to separate the nodes into columns... # need a function to separate the nodes into columns...
def split_columns(nodes, columns=1): def split_columns(nodes, columns=1):
if columns <= 1: if columns <= 1:
return [nodes] return [nodes]
# This is a rough height; we may have to increase to avoid breaks before # This is a rough height; we may have to increase to avoid breaks before
# a subitem. # a subitem.
colheight = len(nodes) / columns colheight = int(len(nodes) / columns)
numlong = len(nodes) % columns numlong = int(len(nodes) % columns)
if numlong: if numlong:
colheight = colheight + 1 colheight = colheight + 1
else: else:
...@@ -169,7 +185,7 @@ def split_columns(nodes, columns=1): ...@@ -169,7 +185,7 @@ def split_columns(nodes, columns=1):
del nodes[:end] del nodes[:end]
colheight = colheight - 1 colheight = colheight - 1
try: try:
numshort = len(nodes) / colheight numshort = int(len(nodes) / colheight)
except ZeroDivisionError: except ZeroDivisionError:
cols = cols + (columns - len(cols)) * [[]] cols = cols + (columns - len(cols)) * [[]]
else: else:
...@@ -235,7 +251,7 @@ def format_column(nodes): ...@@ -235,7 +251,7 @@ def format_column(nodes):
previous = current previous = current
append("\n") append("\n")
append("</dl>" * (level + 1)) append("</dl>" * (level + 1))
return string.join(strings, '') return null_join(strings)
def format_nodes(nodes, columns=1): def format_nodes(nodes, columns=1):
...@@ -243,10 +259,10 @@ def format_nodes(nodes, columns=1): ...@@ -243,10 +259,10 @@ def format_nodes(nodes, columns=1):
append = strings.append append = strings.append
if columns > 1: if columns > 1:
colnos = range(columns) colnos = range(columns)
colheight = len(nodes) / columns colheight = int(len(nodes) / columns)
if len(nodes) % columns: if len(nodes) % columns:
colheight = colheight + 1 colheight = colheight + 1
colwidth = 100 / columns colwidth = int(100 / columns)
append('<table width="100%"><tr valign="top">') append('<table width="100%"><tr valign="top">')
for col in split_columns(nodes, columns): for col in split_columns(nodes, columns):
append('<td width="%d%%">\n' % colwidth) append('<td width="%d%%">\n' % colwidth)
...@@ -256,7 +272,7 @@ def format_nodes(nodes, columns=1): ...@@ -256,7 +272,7 @@ def format_nodes(nodes, columns=1):
else: else:
append(format_column(nodes)) append(format_column(nodes))
append("\n<p>\n") append("\n<p>\n")
return string.join(strings, '') return null_join(strings)
def format_letter(letter): def format_letter(letter):
...@@ -265,13 +281,15 @@ def format_letter(letter): ...@@ -265,13 +281,15 @@ def format_letter(letter):
elif letter == '_': elif letter == '_':
lettername = "_ (underscore)" lettername = "_ (underscore)"
else: else:
lettername = string.upper(letter) lettername = letter.capitalize()
return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
% (letter, lettername) % (letter, lettername)
def format_html_letters(nodes, columns=1): def format_html_letters(nodes, columns, group_symbol_nodes):
letter_groups = split_letters(nodes) letter_groups = split_letters(nodes)
if group_symbol_nodes:
group_symbols(letter_groups)
items = [] items = []
for letter, nodes in letter_groups: for letter, nodes in letter_groups:
s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
...@@ -280,7 +298,7 @@ def format_html_letters(nodes, columns=1): ...@@ -280,7 +298,7 @@ def format_html_letters(nodes, columns=1):
for letter, nodes in letter_groups: for letter, nodes in letter_groups:
s.append(format_letter(letter)) s.append(format_letter(letter))
s.append(format_nodes(nodes, columns)) s.append(format_nodes(nodes, columns))
return string.join(s, '') return null_join(s)
def format_html(nodes, columns): def format_html(nodes, columns):
return format_nodes(nodes, columns) return format_nodes(nodes, columns)
...@@ -308,11 +326,11 @@ def dump(nodes, fp): ...@@ -308,11 +326,11 @@ def dump(nodes, fp):
fp.write(node.dump()) fp.write(node.dump())
def process_nodes(nodes, columns, letters): def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
nodes.sort() nodes.sort()
collapse(nodes) collapse(nodes)
if letters: if letters:
return format_html_letters(nodes, columns) return format_html_letters(nodes, columns, group_symbol_nodes)
else: else:
return format_html(nodes, columns) return format_html(nodes, columns)
...@@ -323,22 +341,28 @@ def main(): ...@@ -323,22 +341,28 @@ def main():
ofn = "-" ofn = "-"
columns = 1 columns = 1
letters = 0 letters = 0
group_symbol_nodes = 1
opts, args = getopt.getopt(sys.argv[1:], "c:lo:", opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
["columns=", "letters", "output="]) ["columns=", "dont-group-symbols",
"group-symbols", "letters", "output="])
for opt, val in opts: for opt, val in opts:
if opt in ("-o", "--output"): if opt in ("-o", "--output"):
ofn = val ofn = val
elif opt in ("-c", "--columns"): elif opt in ("-c", "--columns"):
columns = string.atoi(val) columns = int(val, 10)
elif opt in ("-l", "--letters"): elif opt in ("-l", "--letters"):
letters = 1 letters = 1
elif opt == "--group-symbols":
group_symbol_nodes = 1
elif opt == "--dont-group-symbols":
group_symbol_nodes = 0
if not args: if not args:
args = [ifn] args = [ifn]
nodes = [] nodes = []
for fn in args: for fn in args:
nodes = nodes + load(open(fn)) nodes = nodes + load(open(fn))
num_nodes = len(nodes) num_nodes = len(nodes)
html = process_nodes(nodes, columns, letters) html = process_nodes(nodes, columns, letters, group_symbol_nodes)
program = os.path.basename(sys.argv[0]) program = os.path.basename(sys.argv[0])
if ofn == "-": if ofn == "-":
sys.stdout.write(html) sys.stdout.write(html)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment