Commit e779d4f0 authored by Fred Drake's avatar Fred Drake

Lots of adjustments to deal with the document content now being stored

in a fragment rather than the main document object.
parent 54fb7fb9
...@@ -12,7 +12,10 @@ import re ...@@ -12,7 +12,10 @@ import re
import string import string
import sys import sys
import xml.dom.core import xml.dom.core
import xml.dom.esis_builder
from xml.dom.core import \
ELEMENT, \
TEXT
class ConversionError(Exception): class ConversionError(Exception):
...@@ -32,11 +35,11 @@ else: ...@@ -32,11 +35,11 @@ else:
# Workaround to deal with invalid documents (multiple root elements). This # Workaround to deal with invalid documents (multiple root elements). This
# does not indicate a bug in the DOM implementation. # does not indicate a bug in the DOM implementation.
# #
def get_documentElement(self): def get_documentElement(doc):
docelem = None docelem = None
for n in self._node.children: for n in doc.childNodes:
if n.type == xml.dom.core.ELEMENT: if n.nodeType == ELEMENT:
docelem = xml.dom.core.Element(n, self, self) docelem = n
return docelem return docelem
xml.dom.core.Document.get_documentElement = get_documentElement xml.dom.core.Document.get_documentElement = get_documentElement
...@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement ...@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
# accessed from the Document object via .childNodes (no matter how many # accessed from the Document object via .childNodes (no matter how many
# levels of access are used) will be given an ownerDocument of None. # levels of access are used) will be given an ownerDocument of None.
# #
def get_childNodes(self): def get_childNodes(doc):
return xml.dom.core.NodeList(self._node.children, self, self) return xml.dom.core.NodeList(doc._node.children, doc._node)
xml.dom.core.Document.get_childNodes = get_childNodes xml.dom.core.Document.get_childNodes = get_childNodes
def get_first_element(doc, gi): def get_first_element(doc, gi):
for n in doc.childNodes: for n in doc.childNodes:
if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi: if n.nodeType == ELEMENT and n.tagName == gi:
return n return n
def extract_first_element(doc, gi): def extract_first_element(doc, gi):
...@@ -66,10 +69,10 @@ def extract_first_element(doc, gi): ...@@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
def find_all_elements(doc, gi): def find_all_elements(doc, gi):
nodes = [] nodes = []
if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi: if doc.nodeType == ELEMENT and doc.tagName == gi:
nodes.append(doc) nodes.append(doc)
for child in doc.childNodes: for child in doc.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
if child.tagName == gi: if child.tagName == gi:
nodes.append(child) nodes.append(child)
for node in child.getElementsByTagName(gi): for node in child.getElementsByTagName(gi):
...@@ -77,36 +80,36 @@ def find_all_elements(doc, gi): ...@@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
return nodes return nodes
def simplify(doc): def simplify(doc, fragment):
# Try to rationalize the document a bit, since these things are simply # Try to rationalize the document a bit, since these things are simply
# not valid SGML/XML documents as they stand, and need a little work. # not valid SGML/XML documents as they stand, and need a little work.
documentclass = "document" documentclass = "document"
inputs = [] inputs = []
node = extract_first_element(doc, "documentclass") node = extract_first_element(fragment, "documentclass")
if node is not None: if node is not None:
documentclass = node.getAttribute("classname") documentclass = node.getAttribute("classname")
node = extract_first_element(doc, "title") node = extract_first_element(fragment, "title")
if node is not None: if node is not None:
inputs.append(node) inputs.append(node)
# update the name of the root element # update the name of the root element
node = get_first_element(doc, "document") node = get_first_element(fragment, "document")
if node is not None: if node is not None:
node._node.name = documentclass node._node.name = documentclass
while 1: while 1:
node = extract_first_element(doc, "input") node = extract_first_element(fragment, "input")
if node is None: if node is None:
break break
inputs.append(node) inputs.append(node)
if inputs: if inputs:
docelem = doc.documentElement docelem = get_documentElement(fragment)
inputs.reverse() inputs.reverse()
for node in inputs: for node in inputs:
text = doc.createTextNode("\n") text = doc.createTextNode("\n")
docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(text, docelem.firstChild)
docelem.insertBefore(node, text) docelem.insertBefore(node, text)
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
while doc.firstChild.nodeType == xml.dom.core.TEXT: while fragment.firstChild.nodeType == TEXT:
doc.removeChild(doc.firstChild) fragment.removeChild(fragment.firstChild)
def cleanup_root_text(doc): def cleanup_root_text(doc):
...@@ -115,9 +118,9 @@ def cleanup_root_text(doc): ...@@ -115,9 +118,9 @@ def cleanup_root_text(doc):
for n in doc.childNodes: for n in doc.childNodes:
prevskip = skip prevskip = skip
skip = 0 skip = 0
if n.nodeType == xml.dom.core.TEXT and not prevskip: if n.nodeType == TEXT and not prevskip:
discards.append(n) discards.append(n)
elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT": elif n.nodeType == ELEMENT and n.tagName == "COMMENT":
skip = 1 skip = 1
for node in discards: for node in discards:
doc.removeChild(node) doc.removeChild(node)
...@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = ( ...@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
"datadesc", "datadescni", "datadesc", "datadescni",
) )
def fixup_descriptors(doc): def fixup_descriptors(doc, fragment):
sections = find_all_elements(doc, "section") sections = find_all_elements(fragment, "section")
for section in sections: for section in sections:
find_and_fix_descriptors(doc, section) find_and_fix_descriptors(doc, section)
...@@ -139,7 +142,7 @@ def fixup_descriptors(doc): ...@@ -139,7 +142,7 @@ def fixup_descriptors(doc):
def find_and_fix_descriptors(doc, container): def find_and_fix_descriptors(doc, container):
children = container.childNodes children = container.childNodes
for child in children: for child in children:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
tagName = child.tagName tagName = child.tagName
if tagName in DESCRIPTOR_ELEMENTS: if tagName in DESCRIPTOR_ELEMENTS:
rewrite_descriptor(doc, child) rewrite_descriptor(doc, child)
...@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor): ...@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
pos = skip_leading_nodes(children, 0) pos = skip_leading_nodes(children, 0)
if pos < len(children): if pos < len(children):
child = children[pos] child = children[pos]
if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args": if child.nodeType == ELEMENT and child.tagName == "args":
# create an <args> in <signature>: # create an <args> in <signature>:
args = doc.createElement("args") args = doc.createElement("args")
argchildren = [] argchildren = []
...@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor): ...@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
# 3, 4. # 3, 4.
pos = skip_leading_nodes(children, pos + 1) pos = skip_leading_nodes(children, pos + 1)
while pos < len(children) \ while pos < len(children) \
and children[pos].nodeType == xml.dom.core.ELEMENT \ and children[pos].nodeType == ELEMENT \
and children[pos].tagName in (linename, "versionadded"): and children[pos].tagName in (linename, "versionadded"):
if children[pos].tagName == linename: if children[pos].tagName == linename:
# this is really a supplemental signature, create <signature> # this is really a supplemental signature, create <signature>
...@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor): ...@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
newchildren.append(description) newchildren.append(description)
move_children(descriptor, description, pos) move_children(descriptor, description, pos)
last = description.childNodes[-1] last = description.childNodes[-1]
if last.nodeType == xml.dom.core.TEXT: if last.nodeType == TEXT:
last.data = string.rstrip(last.data) + "\n " last.data = string.rstrip(last.data) + "\n "
# 6. # 6.
# should have nothing but whitespace and signature lines in <descriptor>; # should have nothing but whitespace and signature lines in <descriptor>;
...@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0): ...@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
dest.appendChild(node) dest.appendChild(node)
def handle_appendix(doc): def handle_appendix(doc, fragment):
# must be called after simplfy() if document is multi-rooted to begin with # must be called after simplfy() if document is multi-rooted to begin with
docelem = doc.documentElement docelem = get_documentElement(fragment)
toplevel = docelem.tagName == "manual" and "chapter" or "section" toplevel = docelem.tagName == "manual" and "chapter" or "section"
appendices = 0 appendices = 0
nodes = [] nodes = []
for node in docelem.childNodes: for node in docelem.childNodes:
if appendices: if appendices:
nodes.append(node) nodes.append(node)
elif node.nodeType == xml.dom.core.ELEMENT: elif node.nodeType == ELEMENT:
appnodes = node.getElementsByTagName("appendix") appnodes = node.getElementsByTagName("appendix")
if appnodes: if appnodes:
appendices = 1 appendices = 1
...@@ -281,7 +284,7 @@ def handle_appendix(doc): ...@@ -281,7 +284,7 @@ def handle_appendix(doc):
back = doc.createElement("back-matter") back = doc.createElement("back-matter")
docelem.appendChild(back) docelem.appendChild(back)
back.appendChild(doc.createTextNode("\n")) back.appendChild(doc.createTextNode("\n"))
while nodes and nodes[0].nodeType == xml.dom.core.TEXT \ while nodes and nodes[0].nodeType == TEXT \
and not string.strip(nodes[0].data): and not string.strip(nodes[0].data):
del nodes[0] del nodes[0]
map(back.appendChild, nodes) map(back.appendChild, nodes)
...@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap): ...@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
while queue: while queue:
node = queue[0] node = queue[0]
del queue[0] del queue[0]
if node.nodeType == xml.dom.core.ELEMENT \ if node.nodeType == ELEMENT \
and wsmap.has_key(node.tagName): and wsmap.has_key(node.tagName):
ws = wsmap[node.tagName] ws = wsmap[node.tagName]
children = node.childNodes children = node.childNodes
children.reverse() children.reverse()
if children[0].nodeType == xml.dom.core.TEXT: if children[0].nodeType == TEXT:
data = string.rstrip(children[0].data) + ws data = string.rstrip(children[0].data) + ws
children[0].data = data children[0].data = data
children.reverse() children.reverse()
# hack to get the title in place: # hack to get the title in place:
if node.tagName == "title" \ if node.tagName == "title" \
and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT: and node.parentNode.firstChild.nodeType == ELEMENT:
node.parentNode.insertBefore(doc.createText("\n "), node.parentNode.insertBefore(doc.createText("\n "),
node.parentNode.firstChild) node.parentNode.firstChild)
for child in node.childNodes: for child in node.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
queue.append(child) queue.append(child)
def normalize(doc): def normalize(doc):
for node in doc.childNodes: for node in doc.childNodes:
if node.nodeType == xml.dom.core.ELEMENT: if node.nodeType == ELEMENT:
node.normalize() node.normalize()
...@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names): ...@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
rewrite_element = d.has_key rewrite_element = d.has_key
queue = [] queue = []
for node in doc.childNodes: for node in doc.childNodes:
if node.nodeType == xml.dom.core.ELEMENT: if node.nodeType == ELEMENT:
queue.append(node) queue.append(node)
while queue: while queue:
node = queue[0] node = queue[0]
...@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names): ...@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
if rewrite_element(node.tagName): if rewrite_element(node.tagName):
children = node.childNodes children = node.childNodes
if len(children) == 1 \ if len(children) == 1 \
and children[0].nodeType == xml.dom.core.TEXT: and children[0].nodeType == TEXT:
data = children[0].data data = children[0].data
if data[-2:] == "()": if data[-2:] == "()":
children[0].data = data[:-2] children[0].data = data[:-2]
else: else:
for child in node.childNodes: for child in node.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
queue.append(child) queue.append(child)
...@@ -366,13 +369,13 @@ def contents_match(left, right): ...@@ -366,13 +369,13 @@ def contents_match(left, right):
nodeType = l.nodeType nodeType = l.nodeType
if nodeType != r.nodeType: if nodeType != r.nodeType:
return 0 return 0
if nodeType == xml.dom.core.ELEMENT: if nodeType == ELEMENT:
if l.tagName != r.tagName: if l.tagName != r.tagName:
return 0 return 0
# should check attributes, but that's not a problem here # should check attributes, but that's not a problem here
if not contents_match(l, r): if not contents_match(l, r):
return 0 return 0
elif nodeType == xml.dom.core.TEXT: elif nodeType == TEXT:
if l.data != r.data: if l.data != r.data:
return 0 return 0
else: else:
...@@ -388,7 +391,7 @@ def create_module_info(doc, section): ...@@ -388,7 +391,7 @@ def create_module_info(doc, section):
return return
node._node.name = "synopsis" node._node.name = "synopsis"
lastchild = node.childNodes[-1] lastchild = node.childNodes[-1]
if lastchild.nodeType == xml.dom.core.TEXT \ if lastchild.nodeType == TEXT \
and lastchild.data[-1:] == ".": and lastchild.data[-1:] == ".":
lastchild.data = lastchild.data[:-1] lastchild.data = lastchild.data[:-1]
modauthor = extract_first_element(section, "moduleauthor") modauthor = extract_first_element(section, "moduleauthor")
...@@ -423,7 +426,7 @@ def create_module_info(doc, section): ...@@ -423,7 +426,7 @@ def create_module_info(doc, section):
if title: if title:
children = title.childNodes children = title.childNodes
if len(children) >= 2 \ if len(children) >= 2 \
and children[0].nodeType == xml.dom.core.ELEMENT \ and children[0].nodeType == ELEMENT \
and children[0].tagName == "module" \ and children[0].tagName == "module" \
and children[0].childNodes[0].data == name: and children[0].childNodes[0].data == name:
# this is it; morph the <title> into <short-synopsis> # this is it; morph the <title> into <short-synopsis>
...@@ -431,7 +434,7 @@ def create_module_info(doc, section): ...@@ -431,7 +434,7 @@ def create_module_info(doc, section):
if first_data.data[:4] == " ---": if first_data.data[:4] == " ---":
first_data.data = string.lstrip(first_data.data[4:]) first_data.data = string.lstrip(first_data.data[4:])
title._node.name = "short-synopsis" title._node.name = "short-synopsis"
if children[-1].nodeType == xml.dom.core.TEXT \ if children[-1].nodeType == TEXT \
and children[-1].data[-1:] == ".": and children[-1].data[-1:] == ".":
children[-1].data = children[-1].data[:-1] children[-1].data = children[-1].data[:-1]
section.removeChild(title) section.removeChild(title)
...@@ -470,10 +473,10 @@ def create_module_info(doc, section): ...@@ -470,10 +473,10 @@ def create_module_info(doc, section):
children = section.childNodes children = section.childNodes
for i in range(len(children)): for i in range(len(children)):
node = children[i] node = children[i]
if node.nodeType == xml.dom.core.ELEMENT \ if node.nodeType == ELEMENT \
and node.tagName == "moduleinfo": and node.tagName == "moduleinfo":
nextnode = children[i+1] nextnode = children[i+1]
if nextnode.nodeType == xml.dom.core.TEXT: if nextnode.nodeType == TEXT:
data = nextnode.data data = nextnode.data
if len(string.lstrip(data)) < (len(data) - 4): if len(string.lstrip(data)) < (len(data) - 4):
nextnode.data = "\n\n\n" + string.lstrip(data) nextnode.data = "\n\n\n" + string.lstrip(data)
...@@ -487,7 +490,7 @@ def cleanup_synopses(doc): ...@@ -487,7 +490,7 @@ def cleanup_synopses(doc):
def remap_element_names(root, name_map): def remap_element_names(root, name_map):
queue = [] queue = []
for child in root.childNodes: for child in root.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
queue.append(child) queue.append(child)
while queue: while queue:
node = queue.pop() node = queue.pop()
...@@ -498,13 +501,13 @@ def remap_element_names(root, name_map): ...@@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
for attr, value in attrs.items(): for attr, value in attrs.items():
node.setAttribute(attr, value) node.setAttribute(attr, value)
for child in node.childNodes: for child in node.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
queue.append(child) queue.append(child)
def fixup_table_structures(doc): def fixup_table_structures(doc, fragment):
# must be done after remap_element_names(), or the tables won't be found # must be done after remap_element_names(), or the tables won't be found
for table in find_all_elements(doc, "table"): for table in find_all_elements(fragment, "table"):
fixup_table(doc, table) fixup_table(doc, table)
...@@ -522,7 +525,7 @@ def fixup_table(doc, table): ...@@ -522,7 +525,7 @@ def fixup_table(doc, table):
last_was_hline = 0 last_was_hline = 0
children = table.childNodes children = table.childNodes
for child in children: for child in children:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
tagName = child.tagName tagName = child.tagName
if tagName == "hline" and prev_row is not None: if tagName == "hline" and prev_row is not None:
prev_row.setAttribute("rowsep", "1") prev_row.setAttribute("rowsep", "1")
...@@ -535,12 +538,12 @@ def fixup_table(doc, table): ...@@ -535,12 +538,12 @@ def fixup_table(doc, table):
while children: while children:
child = children[0] child = children[0]
nodeType = child.nodeType nodeType = child.nodeType
if nodeType == xml.dom.core.TEXT: if nodeType == TEXT:
if string.strip(child.data): if string.strip(child.data):
raise ConversionError("unexpected free data in table") raise ConversionError("unexpected free data in table")
table.removeChild(child) table.removeChild(child)
continue continue
if nodeType == xml.dom.core.ELEMENT: if nodeType == ELEMENT:
if child.tagName != "hline": if child.tagName != "hline":
raise ConversionError( raise ConversionError(
"unexpected <%s> in table" % child.tagName) "unexpected <%s> in table" % child.tagName)
...@@ -572,7 +575,7 @@ def fixup_row(doc, row): ...@@ -572,7 +575,7 @@ def fixup_row(doc, row):
def move_elements_by_name(doc, source, dest, name, sep=None): def move_elements_by_name(doc, source, dest, name, sep=None):
nodes = [] nodes = []
for child in source.childNodes: for child in source.childNodes:
if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name: if child.nodeType == ELEMENT and child.tagName == name:
nodes.append(child) nodes.append(child)
for node in nodes: for node in nodes:
source.removeChild(node) source.removeChild(node)
...@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = ( ...@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
) )
def fixup_paras(doc): def fixup_paras(doc, fragment):
for child in doc.childNodes: for child in fragment.childNodes:
if child.nodeType == xml.dom.core.ELEMENT \ if child.nodeType == ELEMENT \
and child.tagName in RECURSE_INTO_PARA_CONTAINERS: and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
# #
fixup_paras_helper(doc, child) fixup_paras_helper(doc, child)
descriptions = find_all_elements(doc, "description") descriptions = find_all_elements(fragment, "description")
for description in descriptions: for description in descriptions:
fixup_paras_helper(doc, description) fixup_paras_helper(doc, description)
...@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0): ...@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
# #
# Either paragraph material or something to recurse into: # Either paragraph material or something to recurse into:
# #
if (children[start].nodeType == xml.dom.core.ELEMENT) \ if (children[start].nodeType == ELEMENT) \
and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS): and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
fixup_paras_helper(doc, children[start]) fixup_paras_helper(doc, children[start])
start = skip_leading_nodes(children, start + 1) start = skip_leading_nodes(children, start + 1)
...@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i): ...@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
after = j + 1 after = j + 1
child = children[j] child = children[j]
nodeType = child.nodeType nodeType = child.nodeType
if nodeType == xml.dom.core.ELEMENT: if nodeType == ELEMENT:
if child.tagName in BREAK_ELEMENTS: if child.tagName in BREAK_ELEMENTS:
after = j after = j
break break
elif nodeType == xml.dom.core.TEXT: elif nodeType == TEXT:
pos = string.find(child.data, "\n\n") pos = string.find(child.data, "\n\n")
if pos == 0: if pos == 0:
after = j after = j
...@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i): ...@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
if (start + 1) > after: if (start + 1) > after:
raise ConversionError( raise ConversionError(
"build_para() could not identify content to turn into a paragraph") "build_para() could not identify content to turn into a paragraph")
if children[after - 1].nodeType == xml.dom.core.TEXT: if children[after - 1].nodeType == TEXT:
# we may need to split off trailing white space: # we may need to split off trailing white space:
child = children[after - 1] child = children[after - 1]
data = child.data data = child.data
...@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start): ...@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
# skip over leading comments and whitespace: # skip over leading comments and whitespace:
child = children[start] child = children[start]
nodeType = child.nodeType nodeType = child.nodeType
if nodeType == xml.dom.core.TEXT: if nodeType == TEXT:
data = child.data data = child.data
shortened = string.lstrip(data) shortened = string.lstrip(data)
if shortened: if shortened:
...@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start): ...@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
return start + 1 return start + 1
return start return start
# all whitespace, just skip # all whitespace, just skip
elif nodeType == xml.dom.core.ELEMENT: elif nodeType == ELEMENT:
tagName = child.tagName tagName = child.tagName
if tagName in RECURSE_INTO_PARA_CONTAINERS: if tagName in RECURSE_INTO_PARA_CONTAINERS:
return start return start
...@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start): ...@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
return start return start
def fixup_rfc_references(doc): def fixup_rfc_references(doc, fragment):
for rfcnode in find_all_elements(doc, "rfc"): for rfcnode in find_all_elements(fragment, "rfc"):
rfcnode.appendChild(doc.createTextNode( rfcnode.appendChild(doc.createTextNode(
"RFC " + rfcnode.getAttribute("num"))) "RFC " + rfcnode.getAttribute("num")))
def fixup_signatures(doc): def fixup_signatures(doc, fragment):
for child in doc.childNodes: for child in fragment.childNodes:
if child.nodeType == xml.dom.core.ELEMENT: if child.nodeType == ELEMENT:
args = child.getElementsByTagName("args") args = child.getElementsByTagName("args")
for arg in args: for arg in args:
fixup_args(doc, arg) fixup_args(doc, arg)
...@@ -748,7 +751,7 @@ def fixup_signatures(doc): ...@@ -748,7 +751,7 @@ def fixup_signatures(doc):
def fixup_args(doc, arglist): def fixup_args(doc, arglist):
for child in arglist.childNodes: for child in arglist.childNodes:
if child.nodeType == xml.dom.core.ELEMENT \ if child.nodeType == ELEMENT \
and child.tagName == "optional": and child.tagName == "optional":
# found it; fix and return # found it; fix and return
arglist.insertBefore(doc.createTextNode("["), child) arglist.insertBefore(doc.createTextNode("["), child)
...@@ -762,8 +765,8 @@ def fixup_args(doc, arglist): ...@@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
return fixup_args(doc, arglist) return fixup_args(doc, arglist)
def fixup_sectionauthors(doc): def fixup_sectionauthors(doc, fragment):
for sectauth in find_all_elements(doc, "sectionauthor"): for sectauth in find_all_elements(fragment, "sectionauthor"):
section = sectauth.parentNode section = sectauth.parentNode
section.removeChild(sectauth) section.removeChild(sectauth)
sectauth._node.name = "author" sectauth._node.name = "author"
...@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc): ...@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
sectauth.removeAttribute("name") sectauth.removeAttribute("name")
after = section.childNodes[2] after = section.childNodes[2]
title = section.childNodes[1] title = section.childNodes[1]
if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title": if title.nodeType == ELEMENT and title.tagName != "title":
after = section.childNodes[0] after = section.childNodes[0]
section.insertBefore(doc.createTextNode("\n "), after) section.insertBefore(doc.createTextNode("\n "), after)
section.insertBefore(sectauth, after) section.insertBefore(sectauth, after)
...@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc): ...@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
def fixup_verbatims(doc): def fixup_verbatims(doc):
for verbatim in find_all_elements(doc, "verbatim"): for verbatim in find_all_elements(doc, "verbatim"):
child = verbatim.childNodes[0] child = verbatim.childNodes[0]
if child.nodeType == xml.dom.core.TEXT \ if child.nodeType == TEXT \
and string.lstrip(child.data)[:3] == ">>>": and string.lstrip(child.data)[:3] == ">>>":
verbatim._node.name = "interpreter-session" verbatim._node.name = "interactive-session"
#verbatim.setAttribute("interactive", "interactive")
_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
...@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$") ...@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
def write_esis(doc, ofp, knownempty): def write_esis(doc, ofp, knownempty):
for node in doc.childNodes: for node in doc.childNodes:
nodeType = node.nodeType nodeType = node.nodeType
if nodeType == xml.dom.core.ELEMENT: if nodeType == ELEMENT:
gi = node.tagName gi = node.tagName
if knownempty(gi): if knownempty(gi):
if node.hasChildNodes(): if node.hasChildNodes():
...@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty): ...@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
ofp.write("(%s\n" % gi) ofp.write("(%s\n" % gi)
write_esis(node, ofp, knownempty) write_esis(node, ofp, knownempty)
ofp.write(")%s\n" % gi) ofp.write(")%s\n" % gi)
elif nodeType == xml.dom.core.TEXT: elif nodeType == TEXT:
ofp.write("-%s\n" % esistools.encode(node.data)) ofp.write("-%s\n" % esistools.encode(node.data))
else: else:
raise RuntimeError, "unsupported node type: %s" % nodeType raise RuntimeError, "unsupported node type: %s" % nodeType
...@@ -818,10 +820,11 @@ def convert(ifp, ofp): ...@@ -818,10 +820,11 @@ def convert(ifp, ofp):
p = esistools.ExtendedEsisBuilder() p = esistools.ExtendedEsisBuilder()
p.feed(ifp.read()) p.feed(ifp.read())
doc = p.document doc = p.document
normalize(doc) fragment = p.fragment
simplify(doc) normalize(fragment)
handle_labels(doc) simplify(doc, fragment)
handle_appendix(doc) handle_labels(fragment)
handle_appendix(doc, fragment)
fixup_trailing_whitespace(doc, { fixup_trailing_whitespace(doc, {
"abstract": "\n", "abstract": "\n",
"title": "", "title": "",
...@@ -835,12 +838,12 @@ def convert(ifp, ofp): ...@@ -835,12 +838,12 @@ def convert(ifp, ofp):
cleanup_root_text(doc) cleanup_root_text(doc)
cleanup_trailing_parens(doc, ["function", "method", "cfunction"]) cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
cleanup_synopses(doc) cleanup_synopses(doc)
fixup_descriptors(doc) fixup_descriptors(doc, fragment)
fixup_verbatims(doc) fixup_verbatims(fragment)
normalize(doc) normalize(fragment)
fixup_paras(doc) fixup_paras(doc, fragment)
fixup_sectionauthors(doc) fixup_sectionauthors(doc, fragment)
remap_element_names(doc, { remap_element_names(fragment, {
"tableii": ("table", {"cols": "2"}), "tableii": ("table", {"cols": "2"}),
"tableiii": ("table", {"cols": "3"}), "tableiii": ("table", {"cols": "3"}),
"tableiv": ("table", {"cols": "4"}), "tableiv": ("table", {"cols": "4"}),
...@@ -849,9 +852,9 @@ def convert(ifp, ofp): ...@@ -849,9 +852,9 @@ def convert(ifp, ofp):
"lineiv": ("row", {}), "lineiv": ("row", {}),
"refmodule": ("module", {"link": "link"}), "refmodule": ("module", {"link": "link"}),
}) })
fixup_table_structures(doc) fixup_table_structures(doc, fragment)
fixup_rfc_references(doc) fixup_rfc_references(doc, fragment)
fixup_signatures(doc) fixup_signatures(doc, fragment)
# #
d = {} d = {}
for gi in p.get_empties(): for gi in p.get_empties():
...@@ -861,7 +864,7 @@ def convert(ifp, ofp): ...@@ -861,7 +864,7 @@ def convert(ifp, ofp):
knownempty = d.has_key knownempty = d.has_key
# #
try: try:
write_esis(doc, ofp, knownempty) write_esis(fragment, ofp, knownempty)
except IOError, (err, msg): except IOError, (err, msg):
# Ignore EPIPE; it just means that whoever we're writing to stopped # Ignore EPIPE; it just means that whoever we're writing to stopped
# reading. The rest of the output would be ignored. All other errors # reading. The rest of the output would be ignored. All other errors
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment