Commit 694f7010 authored by Guido van Rossum's avatar Guido van Rossum

Rewritten by Ka-Ping Yee.

parent ba885ffa
#! /usr/local/bin/python #!/usr/local/bin/python
# Convert the Python FAQ to HTML # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
import string # Reads a text file given on standard input or named as first argument, and
import regex # generates HTML 2.0 on standard output. Recognizes these constructions:
import regsub #
import sys # HTML element pattern at the beginning of a line
import os #
# section heading (<number><period>)+<space>
FAQ = 'FAQ' # numbered list element <1-2 spaces>(<number><period>)+<space>
# unnumbered list element <0-2 spaces><hyphen or asterisk><space>
chapterprog = regex.compile('^\([1-9][0-9]*\)\. ') # preformatted section <more than two spaces>
questionprog = regex.compile('^\([1-9][0-9]*\)\.\([1-9][0-9]*\)\. ') #
newquestionprog = regex.compile('^Q\. ') # Heading level is determined by the number of (<number><period>) segments.
blankprog = regex.compile('^[ \t]*$') # Blank lines force a separation of elements; if none of the above four
indentedorblankprog = regex.compile('^\([ \t]+\|[ \t]*$\)') # types is indicated, a new paragraph begins. A line beginning with many
underlineprog = regex.compile('^==*$') # spaces is interpreted as a continuation (instead of preformatted) after
eightblanksprog = regex.compile('^\( \| *\t\)') # a list element. Headings are anchored; paragraphs starting with "Q." are
mailheaderprog = regex.compile('^\(Subject\|Newsgroups\|Followup-To\|From\|Reply-To\|Approved\|Archive-name\|Version\|Last-modified\): +') # emphasized, and those marked with "A." get their first sentence emphasized.
urlprog = regex.compile('<URL:\([^>]*\)>') #
ampprog = regex.compile('&') # Hyperlinks are created from references to:
aprog = regex.compile('^A\. +') # URLs, explicitly marked using <URL:scheme://host...>
qprog = regex.compile('>Q\. +') # other questions, of the form "question <number>(<period><number>)*"
qrefprog = regex.compile('question +\([0-9]\.[0-9]+\)') # sections, of the form "section <number>".
versionprog = regex.compile('^Version: ')
emailprog = regex.compile('<\([^>@:]+@[^>@:]+\)>') import sys, string, regex, regsub, regex_syntax
regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)
def main():
print 'Reading lines...' # --------------------------------------------------------- regular expressions
lines = open(FAQ, 'r').readlines() orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +')
print 'Renumbering in memory...' itemprog = regex.compile(' ? ?[-*] +')
oldlines = lines[:] headingprog = regex.compile('([1-9][0-9]*\.)+ +')
after_blank = 1 prefmtprog = regex.compile(' ')
chapter = 0 blankprog = regex.compile('^[ \t\r\n]$')
question = 0 questionprog = regex.compile(' *Q\. +')
chapters = ['<OL>'] answerprog = regex.compile(' *A\. +')
questions = ['<OL>'] sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
for i in range(len(lines)):
line = lines[i] mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
if after_blank: '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)
n = chapterprog.match(line) urlprog = regex.compile('&lt;URL:([^&]+)&gt;')
if n >= 0: addrprog = regex.compile('&lt;([^>@:]+@[^&@:]+)&gt;')
chapter = chapter + 1 qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')
if chapter != 1: srefprog = regex.compile('section +([1-9][0-9]*)')
questions.append('</UL>\n') entityprog = regex.compile('[&<>]')
question = 0
lines[i] = '<H2>' + line[n:-1] + '</H2>\n' # ------------------------------------------------------------ global variables
chapters.append('<LI> ' + line[n:]) body = []
questions.append('<LI> ' + line[n:]) ollev = ullev = 0
questions.append('<UL>\n') element = content = secnum = version = ''
afterblank = 0
continue # ----------------------------------------------------- for making nested lists
n = underlineprog.match(line) def dnol():
if n >= 0: global body, ollev
lines[i] = '' ollev = ollev + 1
continue if body[-1] == '</li>': del body[-1]
n = questionprog.match(line) body.append('<ol>')
if n < 0: n = newquestionprog.match(line) - 3
if n >= 0: def upol():
question = question + 1 global body, ollev
number = '%d.%d'%(chapter, question) ollev = ollev - 1
lines[i] = '<A NAME="' + number + '"><H3>' + line[n:] body.append(ollev and '</ol></li>' or '</ol>')
questions.append('<LI><A HREF="#' + \
number + '">' + line[n:]) # --------------------------------- output one element and convert its contents
# Add up to 4 continuations of the question def spew(clearol=0, clearul=0):
n = len(number) global content, body, ollev, ullev
for j in range(i+1, i+5):
if blankprog.match(lines[j]) >= 0: if content:
lines[j-1] = lines[j-1] + '</H3></A>' if entityprog.search(content) > -1:
questions[-1] = \ content = regsub.gsub('&', '&amp;', content)
questions[-1][:-1] + '</A>\n' content = regsub.gsub('<', '&lt;', content)
break content = regsub.gsub('>', '&gt;', content)
questions.append(' '*(n+2) + lines[j])
afterblank = 0 n = questionprog.match(content)
continue if n > 0:
afterblank = (blankprog.match(line) >= 0) content = '<em>' + content[n:] + '</em>'
print 'Inserting list of chapters...' if ollev: # question reference in index
chapters.append('</OL>\n') fragid = regsub.gsub('^ +|\.? +$', '', secnum)
for i in range(len(lines)): content = '<a href="#%s">%s</a>' % (fragid, content)
line = lines[i]
if regex.match( if element[0] == 'h': # heading in the main text
'^This FAQ is divided in the following chapters', fragid = regsub.gsub('^ +|\.? +$', '', secnum)
line) >= 0: content = secnum + '<a name="%s">%s</a>' % (fragid, content)
i = i+1
while 1: n = answerprog.match(content)
line = lines[i] if n > 0: # answer paragraph
if indentedorblankprog.match(line) < 0: content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:])
break
del lines[i] body.append('<' + element + '>' + content)
lines[i:i] = chapters body.append('</' + element + '>')
break content = ''
else:
print '*** Can\'t find header for list of chapters' while clearol and ollev: upol()
print '*** Chapters found:' if clearul and ullev: body.append('</ul>'); ullev = 0
for line in chapters: print line,
print 'Inserting list of questions...' # ---------------------------------------------------------------- main program
questions.append('</UL></OL>\n') faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin
for i in range(len(lines)): lines = faq.readlines()
line = lines[i]
if regex.match('^Here.s an overview of the questions', for line in lines:
line) >= 0: if line[2:9] == '=======': # <hr> will appear *before*
i = i+1 body.append('<hr>') # the underlined heading
while 1: continue
line = lines[i]
if indentedorblankprog.match(line) < 0: n = orditemprog.match(line)
break if n > 0: # make ordered list item
del lines[i] spew(0, 'clear ul')
lines[i:i] = questions secnum = line[:n]
break level = string.count(secnum, '.')
else: while level > ollev: dnol()
print '*** Can\'t find header for list of questions' while level < ollev: upol()
print '*** Questions found:' element, content = 'li', line[n:]
for line in questions: print line, continue
# final cleanup
print "Final cleanup..." n = itemprog.match(line)
doingpre = 0 if n > 0: # make unordered list item
for i in range(len(lines)): spew('clear ol', 0)
# set lines indented by >= 8 spaces using PRE if ullev == 0: body.append('<ul>'); ullev = 1
# blank lines either terminate PRE or separate paragraphs element, content = 'li', line[n:]
n = eightblanksprog.match(lines[i]) continue
if n < 0: n = mailheaderprog.match(lines[i])
if n >= 0: n = headingprog.match(line)
if versionprog.match(lines[i]) > 0: if n > 0: # make heading element
version = string.split(lines[i])[1] spew('clear ol', 'clear ul')
if doingpre == 0: secnum = line[:n]
lines[i] = '<PRE>\n' + lines[i] sys.stderr.write(line)
doingpre = 1 element, content = 'h%d' % string.count(secnum, '.'), line[n:]
continue continue
n = blankprog.match(lines[i])
if n >= 0: n = 0
# print '*** ', lines[i-1], doingpre if not secnum: # haven't hit body yet
if doingpre == 1: n = mailhdrprog.match(line)
lines[i] = '</PRE><P>\n' v = version and -1 or regex.match('Version: ', line)
doingpre = 0 if v > 0 and not version: version = line[v:]
else: if n <= 0 and element != 'li': # not pre if after a list item
lines[i] = '<P>\n' n = prefmtprog.match(line)
continue if n > 0: # make preformatted element
if element == 'pre':
# & -> &amp; content = content + line
n = ampprog.search(lines[i]) else:
if n >= 0: spew('clear ol', 'clear ul')
lines[i] = regsub.gsub(ampprog, '&amp;', lines[i]) element, content = 'pre', line
# no continue - there might be other changes to the line... continue
# zap all the 'Q.' and 'A.' leaders - what happened to the if blankprog.match(line) > 0: # force a new element
# last couple? spew()
n = qprog.search(lines[i]) element = ''
if n >= 0: elif element: # continue current element
lines[i] = regsub.sub(qprog, '>', lines[i]) content = content + line
# no continue - there might be other changes to the line... else: # no element; make paragraph
spew('clear ol', 'clear ul')
n = aprog.search(lines[i]) element, content = 'p', line
if n >= 0:
lines[i] = regsub.sub(aprog, '', lines[i]) spew() # output last element
# no continue - there might be other changes to the line...
body = string.joinfields(body, '')
# patch up hard refs to questions body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body)
n = qrefprog.search(lines[i]) body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body)
if n >= 0: body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body)
lines[i] = regsub.sub(qrefprog, body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body)
'<A HREF="#\\1">question \\1</A>', lines[i])
# no continue - there might be other changes to the line... print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'
print '<head><title>Python Frequently-Asked Questions v' + version
# make <URL:...> into actual links print "</title></head><body>(This file was generated using Ping's"
n = urlprog.search(lines[i]) print '<a href="faq2html.py">faq2html.py</a>.)'
if n >= 0: print body + '</body></html>'
lines[i] = regsub.gsub(urlprog, '<A HREF="\\1">\\1</A>', lines[i])
# no continue - there might be other changes to the line...
# make <user@host.domain> into <mailto:...> links
n = emailprog.search(lines[i])
if n >= 0:
lines[i] = regsub.gsub(emailprog,
'<A HREF="mailto:\\1">\\1</A>', lines[i])
# no continue - there might be other changes to the line...
lines[0:0] = ['<HTML><HEAD><TITLE>Python Frequently Asked Questions v',
version,
'</TITLE>\n',
'</HEAD><body>\n',
'(This file was generated using\n',
'<A HREF="faq2html.py">faq2html.py</A>.)<P>\n']
lines.append('<P></BODY></HTML>\n')
print 'Writing html file...'
f = open(FAQ + '.html', 'w')
for line in lines:
f.write(line)
f.close()
print 'Done.'
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment