Rewritten by Ka-Ping Yee.

694f7010 · Guido van Rossum · ba885ffa · 694f7010
Commit 694f7010 authored Sep 10, 1996 by Guido van Rossum
Hide whitespace changes
Inline Side-by-side

Showing with 167 additions and 193 deletions

Misc/faq2html.py Misc/faq2html.py +167 -193

No files found.
--- a/Misc/faq2html.py
+++ b/Misc/faq2html.py
-#! /usr/local/bin/python
+#!/usr/local/bin/python
-# Convert the Python FAQ to HTML
+# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
-import string
+# Reads a text file given on standard input or named as first argument, and
-import regex
+# generates HTML 2.0 on standard output.  Recognizes these constructions:
-import regsub
+#
-import sys
+#     HTML element               pattern at the beginning of a line
-import os
+#
+#     section heading            (<number><period>)+<space>
-FAQ = 'FAQ'
+#     numbered list element      <1-2 spaces>(<number><period>)+<space>
+#     unnumbered list element    <0-2 spaces><hyphen or asterisk><space>
-chapterprog = regex.compile('^\([1-9][0-9]*\)\. ')
+#     preformatted section       <more than two spaces>
-questionprog = regex.compile('^\([1-9][0-9]*\)\.\([1-9][0-9]*\)\. ')
+#
-newquestionprog = regex.compile('^Q\. ')
+# Heading level is determined by the number of (<number><period>) segments.
-blankprog = regex.compile('^[ \t]*$')
+# Blank lines force a separation of elements; if none of the above four
-indentedorblankprog = regex.compile('^\([ \t]+\|[ \t]*$\)')
+# types is indicated, a new paragraph begins.  A line beginning with many
-underlineprog = regex.compile('^==*$')
+# spaces is interpreted as a continuation (instead of preformatted) after
-eightblanksprog = regex.compile('^\(        \| *\t\)')
+# a list element.  Headings are anchored; paragraphs starting with "Q." are
-mailheaderprog = regex.compile('^\(Subject\|Newsgroups\|Followup-To\|From\|Reply-To\|Approved\|Archive-name\|Version\|Last-modified\): +')
+# emphasized, and those marked with "A." get their first sentence emphasized.
-urlprog = regex.compile('<URL:\([^>]*\)>')
+#
-ampprog = regex.compile('&')
+# Hyperlinks are created from references to:
-aprog = regex.compile('^A\. +')
+#     URLs, explicitly marked using <URL:scheme://host...> 
-qprog = regex.compile('>Q\. +')
+#     other questions, of the form "question <number>(<period><number>)*"
-qrefprog = regex.compile('question +\([0-9]\.[0-9]+\)')
+#     sections, of the form "section <number>".
-versionprog = regex.compile('^Version: ')
-emailprog = regex.compile('<\([^>@:]+@[^>@:]+\)>')
+import sys, string, regex, regsub, regex_syntax
+regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)
-def main():
-    print 'Reading lines...'
+# --------------------------------------------------------- regular expressions
-    lines = open(FAQ, 'r').readlines()
+orditemprog = regex.compile('  ?([1-9][0-9]*\.)+ +')
-    print 'Renumbering in memory...'
+itemprog = regex.compile(' ? ?[-*] +')
-    oldlines = lines[:]
+headingprog = regex.compile('([1-9][0-9]*\.)+ +')
-    after_blank = 1
+prefmtprog = regex.compile('   ')
-    chapter = 0
+blankprog = regex.compile('^[ \t\r\n]$')
-    question = 0
+questionprog = regex.compile(' *Q\. +')
-    chapters = ['<OL>']
+answerprog = regex.compile(' *A\. +')
-    questions = ['<OL>']
+sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
-    for i in range(len(lines)):
-	line = lines[i]
+mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
-	if after_blank:
+    '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)
-	    n = chapterprog.match(line)
+urlprog = regex.compile('&lt;URL:([^&]+)&gt;')
-	    if n >= 0:
+addrprog = regex.compile('&lt;([^>@:]+@[^&@:]+)&gt;')
-		chapter = chapter + 1
+qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')
-		if chapter != 1:
+srefprog = regex.compile('section +([1-9][0-9]*)')
-		    questions.append('</UL>\n')
+entityprog = regex.compile('[&<>]')
-		question = 0
-		lines[i] = '<H2>' + line[n:-1] + '</H2>\n'
+# ------------------------------------------------------------ global variables
-		chapters.append('<LI> ' + line[n:])
+body = []
-		questions.append('<LI> ' + line[n:])
+ollev = ullev = 0
-		questions.append('<UL>\n')
+element = content = secnum = version = ''
-		afterblank = 0
-		continue
+# ----------------------------------------------------- for making nested lists
-	    n = underlineprog.match(line)
+def dnol():
-	    if n >= 0:
+    global body, ollev
-		lines[i] = ''
+    ollev = ollev + 1
-		continue
+    if body[-1] == '</li>': del body[-1]
-	    n = questionprog.match(line)
+    body.append('<ol>')
-	    if n < 0: n = newquestionprog.match(line) - 3
-	    if n >= 0:
+def upol(): 
-		question = question + 1
+    global body, ollev
-		number = '%d.%d'%(chapter, question)
+    ollev = ollev - 1
-		lines[i] = '<A NAME="' + number + '"><H3>' + line[n:]
+    body.append(ollev and '</ol></li>' or '</ol>')
-		questions.append('<LI><A HREF="#' + \
-				 number + '">' + line[n:])
+# --------------------------------- output one element and convert its contents
-		# Add up to 4 continuations of the question
+def spew(clearol=0, clearul=0):
-		n = len(number)
+    global content, body, ollev, ullev
-		for j in range(i+1, i+5):
-		    if blankprog.match(lines[j]) >= 0:
+    if content:
-			lines[j-1] = lines[j-1] + '</H3></A>'
+        if entityprog.search(content) > -1:
-			questions[-1] = \
+            content = regsub.gsub('&', '&amp;', content)
-			      questions[-1][:-1] + '</A>\n'
+            content = regsub.gsub('<', '&lt;', content)
-			break
+            content = regsub.gsub('>', '&gt;', content)
-		    questions.append(' '*(n+2) + lines[j])
-		afterblank = 0
+        n = questionprog.match(content)
-		continue
+        if n > 0:
-	afterblank = (blankprog.match(line) >= 0)
+            content = '<em>' + content[n:] + '</em>'
-    print 'Inserting list of chapters...'
+            if ollev:                       # question reference in index
-    chapters.append('</OL>\n')
+                fragid = regsub.gsub('^ +|\.? +$', '', secnum)
-    for i in range(len(lines)):
+                content = '<a href="#%s">%s</a>' % (fragid, content)
-	line = lines[i]
-	if regex.match(
+        if element[0] == 'h':               # heading in the main text
-		  '^This FAQ is divided in the following chapters',
+            fragid = regsub.gsub('^ +|\.? +$', '', secnum)
-		  line) >= 0:
+            content = secnum + '<a name="%s">%s</a>' % (fragid, content)
-	    i = i+1
-	    while 1:
+        n = answerprog.match(content)
-		line = lines[i]
+        if n > 0:                           # answer paragraph
-		if indentedorblankprog.match(line) < 0:
+            content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:])
-		    break
-		del lines[i]
+        body.append('<' + element + '>' + content)
-	    lines[i:i] = chapters
+        body.append('</' + element + '>')
-	    break
+        content = ''
-    else:
-	print '*** Can\'t find header for list of chapters'
+    while clearol and ollev: upol()
-	print '*** Chapters found:'
+    if clearul and ullev: body.append('</ul>'); ullev = 0
-	for line in chapters: print line,
-    print 'Inserting list of questions...'
+# ---------------------------------------------------------------- main program
-    questions.append('</UL></OL>\n')
+faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin
-    for i in range(len(lines)):
+lines = faq.readlines()
-	line = lines[i]
-	if regex.match('^Here.s an overview of the questions',
+for line in lines:
-		  line) >= 0:
+    if line[2:9] == '=======':              # <hr> will appear *before*
-	    i = i+1
+        body.append('<hr>')                 # the underlined heading
-	    while 1:
+        continue
-		line = lines[i]
-		if indentedorblankprog.match(line) < 0:
+    n = orditemprog.match(line)
-		    break
+    if n > 0:                               # make ordered list item
-		del lines[i]
+        spew(0, 'clear ul')
-	    lines[i:i] = questions
+        secnum = line[:n]
-	    break
+        level = string.count(secnum, '.')
-    else:
+        while level > ollev: dnol()
-	print '*** Can\'t find header for list of questions'
+        while level < ollev: upol()
-	print '*** Questions found:'
+        element, content = 'li', line[n:]
-	for line in questions: print line,
+        continue
-    # final cleanup
-    print "Final cleanup..."
+    n = itemprog.match(line)
-    doingpre = 0
+    if n > 0:                               # make unordered list item
-    for i in range(len(lines)):
+        spew('clear ol', 0)
-	# set lines indented by >= 8 spaces using PRE
+        if ullev == 0: body.append('<ul>'); ullev = 1
-	# blank lines either terminate PRE or separate paragraphs
+        element, content = 'li', line[n:]
-	n = eightblanksprog.match(lines[i])
+        continue
-	if n < 0: n = mailheaderprog.match(lines[i])
-	if n >= 0:
+    n = headingprog.match(line)
-	    if versionprog.match(lines[i]) > 0:
+    if n > 0:                               # make heading element
-		version = string.split(lines[i])[1]
+        spew('clear ol', 'clear ul')
-	    if doingpre == 0:
+        secnum = line[:n]
-		lines[i] = '<PRE>\n' + lines[i]
+        sys.stderr.write(line)
-		doingpre = 1
+        element, content = 'h%d' % string.count(secnum, '.'), line[n:]
-		continue
+        continue
-	n = blankprog.match(lines[i])
-	if n >= 0:
+    n = 0
-	    # print '*** ', lines[i-1], doingpre
+    if not secnum:                          # haven't hit body yet
-	    if doingpre == 1:
+        n = mailhdrprog.match(line) 
-		lines[i] = '</PRE><P>\n'
+        v = version and -1 or regex.match('Version: ', line)
-		doingpre = 0
+        if v > 0 and not version: version = line[v:]
-	    else:
+    if n <= 0 and element != 'li':          # not pre if after a list item
-		lines[i] = '<P>\n'
+        n = prefmtprog.match(line)
-	    continue
+    if n > 0:                               # make preformatted element
+        if element == 'pre':
-	# & -> &amp;
+            content = content + line
-	n = ampprog.search(lines[i])
+        else: 
-	if n >= 0:
+            spew('clear ol', 'clear ul')
-	    lines[i] = regsub.gsub(ampprog, '&amp;', lines[i])
+            element, content = 'pre', line
-	    # no continue - there might be other changes to the line...
+        continue
-	# zap all the 'Q.' and 'A.' leaders - what happened to the
+    if blankprog.match(line) > 0:           # force a new element
-	# last couple?
+        spew()
-	n = qprog.search(lines[i])
+        element = ''
-	if n >= 0:
+    elif element:                           # continue current element
-	    lines[i] = regsub.sub(qprog, '>', lines[i])
+        content = content + line
-	    # no continue - there might be other changes to the line...
+    else:                                   # no element; make paragraph
+        spew('clear ol', 'clear ul')
-	n = aprog.search(lines[i])
+        element, content = 'p', line
-	if n >= 0:
-	    lines[i] = regsub.sub(aprog, '', lines[i])
+spew()										# output last element
-	    # no continue - there might be other changes to the line...
+body = string.joinfields(body, '')
-	# patch up hard refs to questions
+body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body)
-	n = qrefprog.search(lines[i])
+body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body)
-	if n >= 0:
+body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body)
-	    lines[i] = regsub.sub(qrefprog,
+body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body)
-				  '<A HREF="#\\1">question \\1</A>', lines[i])
-	    # no continue - there might be other changes to the line...
+print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'
+print '<head><title>Python Frequently-Asked Questions v' + version
-	# make <URL:...> into actual links
+print "</title></head><body>(This file was generated using Ping's"
-	n = urlprog.search(lines[i])
+print '<a href="faq2html.py">faq2html.py</a>.)'
-	if n >= 0:
+print body + '</body></html>'
-	    lines[i] = regsub.gsub(urlprog, '<A HREF="\\1">\\1</A>', lines[i])
-	    # no continue - there might be other changes to the line...
-	# make <user@host.domain> into <mailto:...> links
-	n = emailprog.search(lines[i])
-	if n >= 0:
-	    lines[i] = regsub.gsub(emailprog,
-				   '<A HREF="mailto:\\1">\\1</A>', lines[i])
-	    # no continue - there might be other changes to the line...
-    lines[0:0] = ['<HTML><HEAD><TITLE>Python Frequently Asked Questions v',
-		  version,
-		  '</TITLE>\n',
-		  '</HEAD><body>\n',
-		  '(This file was generated using\n',
-		  '<A HREF="faq2html.py">faq2html.py</A>.)<P>\n']
-    lines.append('<P></BODY></HTML>\n')
-    print 'Writing html file...'
-    f = open(FAQ + '.html', 'w')
-    for line in lines:
-	f.write(line)
-    f.close()
-    print 'Done.'
-main()