Make this do the right thing with entries which start with the percent sign,

in response to Skip's comments in SF bug #487165. Make use of string methods instead of string module functions in most places. Add (and make the default) a way to collapse symbol entries into a single "Symbols" section in the generated index. This is similar to what makeindex does, but does not include entries beginning with an underscore.

Make this do the right thing with entries which start with the percent sign,
in response to Skip's comments in SF bug #487165. Make use of string methods instead of string module functions in most places. Add (and make the default) a way to collapse symbol entries into a single "Symbols" section in the generated index. This is similar to what makeindex does, but does not include entries beginning with an underscore.
21e9e894 · Fred Drake · 77298d4a · 21e9e894
Commit 21e9e894 authored Dec 26, 2001 by Fred Drake
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 30 deletions

Doc/tools/buildindex.py Doc/tools/buildindex.py +54 -30

No files found.
--- a/Doc/tools/buildindex.py
+++ b/Doc/tools/buildindex.py
@@ -8,6 +8,10 @@ import string
 import sys
+bang_join = "!".join
+null_join = "".join
 class Node:
    __rmjunk = re.compile("<#\d+#>")
@@ -38,15 +42,15 @@ class Node:
        return c or cmp(self.key, other.key) or cmp(self.text, other.text)
    def __repr__(self):
-        return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
+        return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
    def __str__(self):
-        return string.join(self.key, '!')
+        return bang_join(self.key)
    def dump(self):
        return "%s\1%s###%s\n" \
               % (string.join(self.links, "\1"),
-                  string.join(self.text, '!'),
+                  bang_join(self.text),
                  self.seqno)
@@ -54,8 +58,8 @@ def cmp_part(s1, s2):
    result = cmp(s1, s2)
    if result == 0:
        return 0
-    l1 = string.lower(s1)
+    l1 = s1.lower()
-    l2 = string.lower(s2)
+    l2 = s2.lower()
    minlen = min(len(s1), len(s2))
    if len(s1) < len(s2) and l1 == l2[:len(s1)]:
        result = -1
@@ -68,8 +72,8 @@ def cmp_part(s1, s2):
 def split_entry(str, which):
    stuff = []
-    parts = string.split(str, '!')
+    parts = str.split('!')
-    parts = map(string.split, parts, ['@'] * len(parts))
+    parts = [part.split('@') for part in parts]
    for entry in parts:
        if len(entry) != 1:
            key = entry[which]
@@ -88,9 +92,9 @@ def split_entry_key(str):
    for i in range(len(parts)):
        m = _rmtt.match(parts[i])
        if m:
-            parts[i] = string.join(m.group(1, 2, 3), '')
+            parts[i] = null_join(m.group(1, 2, 3))
        else:
-            parts[i] = string.lower(parts[i])
+            parts[i] = parts[i].lower()
        # remove '()' from the key:
        parts[i] = _rmparens.sub('', parts[i])
    return map(trim_ignored_letters, parts)
@@ -100,7 +104,7 @@ def split_entry_text(str):
    if '<' in str:
        m = _rmtt.match(str)
        if m:
-            str = string.join(m.group(1, 2, 3), '')
+            str = null_join(m.group(1, 2, 3))
    return split_entry(str, 1)
@@ -121,14 +125,16 @@ def load(fp):
 def trim_ignored_letters(s):
    # ignore $ to keep environment variables with the
    # leading letter from the name
-    s = string.lower(s)
+    if s.startswith("$"):
-    if s[0] == "$":
+        return s[1:].lower()
-        return s[1:]
    else:
-        return s
+        return s.lower()
 def get_first_letter(s):
-    return string.lower(trim_ignored_letters(s)[0])
+    if s.startswith("<tex2html_percent_mark>"):
+        return "%"
+    else:
+        return trim_ignored_letters(s)[0]
 def split_letters(nodes):
@@ -149,14 +155,24 @@ def split_letters(nodes):
    return letter_groups
+def group_symbols(groups):
+    entries = []
+    ident_letters = string.ascii_letters + "_"
+    while groups[0][0] not in ident_letters:
+        entries += groups[0][1]
+        del groups[0]
+    if entries:
+        groups.insert(0, ("Symbols", entries))
 # need a function to separate the nodes into columns...
 def split_columns(nodes, columns=1):
    if columns <= 1:
        return [nodes]
    # This is a rough height; we may have to increase to avoid breaks before
    # a subitem.
-    colheight = len(nodes) / columns
+    colheight = int(len(nodes) / columns)
-    numlong = len(nodes) % columns
+    numlong = int(len(nodes) % columns)
    if numlong:
        colheight = colheight + 1
    else:
@@ -169,7 +185,7 @@ def split_columns(nodes, columns=1):
    del nodes[:end]
    colheight = colheight - 1
    try:
-        numshort = len(nodes) / colheight
+        numshort = int(len(nodes) / colheight)
    except ZeroDivisionError:
        cols = cols + (columns - len(cols)) * [[]]
    else:
@@ -235,7 +251,7 @@ def format_column(nodes):
        previous = current
    append("\n")
    append("</dl>" * (level + 1))
-    return string.join(strings, '')
+    return null_join(strings)
 def format_nodes(nodes, columns=1):
@@ -243,10 +259,10 @@ def format_nodes(nodes, columns=1):
    append = strings.append
    if columns > 1:
        colnos = range(columns)
-        colheight = len(nodes) / columns
+        colheight = int(len(nodes) / columns)
        if len(nodes) % columns:
            colheight = colheight + 1
-        colwidth = 100 / columns
+        colwidth = int(100 / columns)
        append('<table width="100%"><tr valign="top">')
        for col in split_columns(nodes, columns):
            append('<td width="%d%%">\n' % colwidth)
@@ -256,7 +272,7 @@ def format_nodes(nodes, columns=1):
    else:
        append(format_column(nodes))
    append("\n<p>\n")
-    return string.join(strings, '')
+    return null_join(strings)
 def format_letter(letter):
@@ -265,13 +281,15 @@ def format_letter(letter):
    elif letter == '_':
        lettername = "_ (underscore)"
    else:
-        lettername = string.upper(letter)
+        lettername = letter.capitalize()
    return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
           % (letter, lettername)
-def format_html_letters(nodes, columns=1):
+def format_html_letters(nodes, columns, group_symbol_nodes):
    letter_groups = split_letters(nodes)
+    if group_symbol_nodes:
+        group_symbols(letter_groups)
    items = []
    for letter, nodes in letter_groups:
        s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
@@ -280,7 +298,7 @@ def format_html_letters(nodes, columns=1):
    for letter, nodes in letter_groups:
        s.append(format_letter(letter))
        s.append(format_nodes(nodes, columns))
-    return string.join(s, '')
+    return null_join(s)
 def format_html(nodes, columns):
    return format_nodes(nodes, columns)
@@ -308,11 +326,11 @@ def dump(nodes, fp):
        fp.write(node.dump())
-def process_nodes(nodes, columns, letters):
+def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
    nodes.sort()
    collapse(nodes)
    if letters:
-        return format_html_letters(nodes, columns)
+        return format_html_letters(nodes, columns, group_symbol_nodes)
    else:
        return format_html(nodes, columns)
@@ -323,22 +341,28 @@ def main():
    ofn = "-"
    columns = 1
    letters = 0
+    group_symbol_nodes = 1
    opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
-                               ["columns=", "letters", "output="])
+                               ["columns=", "dont-group-symbols",
+                                "group-symbols", "letters", "output="])
    for opt, val in opts:
        if opt in ("-o", "--output"):
            ofn = val
        elif opt in ("-c", "--columns"):
-            columns = string.atoi(val)
+            columns = int(val, 10)
        elif opt in ("-l", "--letters"):
            letters = 1
+        elif opt == "--group-symbols":
+            group_symbol_nodes = 1
+        elif opt == "--dont-group-symbols":
+            group_symbol_nodes = 0
    if not args:
        args = [ifn]
    nodes = []
    for fn in args:
        nodes = nodes + load(open(fn))
    num_nodes = len(nodes)
-    html = process_nodes(nodes, columns, letters)
+    html = process_nodes(nodes, columns, letters, group_symbol_nodes)
    program = os.path.basename(sys.argv[0])
    if ofn == "-":
        sys.stdout.write(html)