Commit dfe289ad authored by Fred Drake's avatar Fred Drake

Lots of small changes to make this work with the Python DOM bindings

(minidom in particular); it was using PyDOM which is now obsolete.

Only write the output file on success -- this avoids updating the timestamp
on the file on failure, which confuses "make".
parent 2be82461
...@@ -11,12 +11,12 @@ import esistools ...@@ -11,12 +11,12 @@ import esistools
import re import re
import string import string
import sys import sys
import xml.dom.core import xml.dom
import xml.dom.minidom
from xml.dom.core import \ ELEMENT = xml.dom.Node.ELEMENT_NODE
ELEMENT, \ ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
ENTITY_REFERENCE, \ TEXT = xml.dom.Node.TEXT_NODE
TEXT
class ConversionError(Exception): class ConversionError(Exception):
...@@ -49,32 +49,9 @@ else: ...@@ -49,32 +49,9 @@ else:
pass pass
# Workaround to deal with invalid documents (multiple root elements). This
# does not indicate a bug in the DOM implementation.
#
def get_documentElement(doc):
docelem = None
for n in doc.childNodes:
if n.nodeType == ELEMENT:
docelem = n
return docelem
xml.dom.core.Document.get_documentElement = get_documentElement
# Replace get_childNodes for the Document class; without this, children
# accessed from the Document object via .childNodes (no matter how many
# levels of access are used) will be given an ownerDocument of None.
#
def get_childNodes(doc):
return xml.dom.core.NodeList(doc._node.children, doc._node)
xml.dom.core.Document.get_childNodes = get_childNodes
def get_first_element(doc, gi): def get_first_element(doc, gi):
for n in doc.childNodes: for n in doc.childNodes:
if n.get_nodeName() == gi: if n.nodeName == gi:
return n return n
def extract_first_element(doc, gi): def extract_first_element(doc, gi):
...@@ -84,13 +61,25 @@ def extract_first_element(doc, gi): ...@@ -84,13 +61,25 @@ def extract_first_element(doc, gi):
return node return node
def get_documentElement(node):
result = None
for child in node.childNodes:
if child.nodeType == ELEMENT:
result = child
return result
def set_tagName(elem, gi):
elem.nodeName = elem.tagName = gi
def find_all_elements(doc, gi): def find_all_elements(doc, gi):
nodes = [] nodes = []
if doc.get_nodeName() == gi: if doc.nodeName == gi:
nodes.append(doc) nodes.append(doc)
for child in doc.childNodes: for child in doc.childNodes:
if child.nodeType == ELEMENT: if child.nodeType == ELEMENT:
if child.get_tagName() == gi: if child.tagName == gi:
nodes.append(child) nodes.append(child)
for node in child.getElementsByTagName(gi): for node in child.getElementsByTagName(gi):
nodes.append(node) nodes.append(node)
...@@ -99,18 +88,19 @@ def find_all_elements(doc, gi): ...@@ -99,18 +88,19 @@ def find_all_elements(doc, gi):
def find_all_child_elements(doc, gi): def find_all_child_elements(doc, gi):
nodes = [] nodes = []
for child in doc.childNodes: for child in doc.childNodes:
if child.get_nodeName() == gi: if child.nodeName == gi:
nodes.append(child) nodes.append(child)
return nodes return nodes
def find_all_elements_from_set(doc, gi_set): def find_all_elements_from_set(doc, gi_set):
return __find_all_elements_from_set(doc, gi_set, []) return __find_all_elements_from_set(doc, gi_set, [])
def __find_all_elements_from_set(doc, gi_set, nodes): def __find_all_elements_from_set(doc, gi_set, nodes):
if doc.get_nodeName() in gi_set: if doc.nodeName in gi_set:
nodes.append(doc) nodes.append(doc)
for child in doc.childNodes: for child in doc.childNodes:
if child.get_nodeType() == ELEMENT: if child.nodeType == ELEMENT:
__find_all_elements_from_set(child, gi_set, nodes) __find_all_elements_from_set(child, gi_set, nodes)
return nodes return nodes
...@@ -129,7 +119,7 @@ def simplify(doc, fragment): ...@@ -129,7 +119,7 @@ def simplify(doc, fragment):
# update the name of the root element # update the name of the root element
node = get_first_element(fragment, "document") node = get_first_element(fragment, "document")
if node is not None: if node is not None:
node._node.name = documentclass set_tagName(node, documentclass)
while 1: while 1:
node = extract_first_element(fragment, "input") node = extract_first_element(fragment, "input")
if node is None: if node is None:
...@@ -143,7 +133,7 @@ def simplify(doc, fragment): ...@@ -143,7 +133,7 @@ def simplify(doc, fragment):
docelem.insertBefore(text, docelem.firstChild) docelem.insertBefore(text, docelem.firstChild)
docelem.insertBefore(node, text) docelem.insertBefore(node, text)
docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild) docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT: while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
fragment.removeChild(fragment.firstChild) fragment.removeChild(fragment.firstChild)
...@@ -153,9 +143,9 @@ def cleanup_root_text(doc): ...@@ -153,9 +143,9 @@ def cleanup_root_text(doc):
for n in doc.childNodes: for n in doc.childNodes:
prevskip = skip prevskip = skip
skip = 0 skip = 0
if n.get_nodeType() == TEXT and not prevskip: if n.nodeType == TEXT and not prevskip:
discards.append(n) discards.append(n)
elif n.get_nodeName() == "COMMENT": elif n.nodeName == "COMMENT":
skip = 1 skip = 1
for node in discards: for node in discards:
doc.removeChild(node) doc.removeChild(node)
...@@ -177,8 +167,8 @@ def fixup_descriptors(doc, fragment): ...@@ -177,8 +167,8 @@ def fixup_descriptors(doc, fragment):
def find_and_fix_descriptors(doc, container): def find_and_fix_descriptors(doc, container):
children = container.childNodes children = container.childNodes
for child in children: for child in children:
if child.get_nodeType() == ELEMENT: if child.nodeType == ELEMENT:
tagName = child.get_tagName() tagName = child.tagName
if tagName in DESCRIPTOR_ELEMENTS: if tagName in DESCRIPTOR_ELEMENTS:
rewrite_descriptor(doc, child) rewrite_descriptor(doc, child)
elif tagName == "subsection": elif tagName == "subsection":
...@@ -200,12 +190,12 @@ def rewrite_descriptor(doc, descriptor): ...@@ -200,12 +190,12 @@ def rewrite_descriptor(doc, descriptor):
# 6. Put it back together. # 6. Put it back together.
# #
# 1. # 1.
descname = descriptor.get_tagName() descname = descriptor.tagName
index = 1 index = 1
if descname[-2:] == "ni": if descname[-2:] == "ni":
descname = descname[:-2] descname = descname[:-2]
descriptor.setAttribute("index", "no") descriptor.setAttribute("index", "no")
descriptor._node.name = descname set_tagName(descriptor, descname)
index = 0 index = 0
desctype = descname[:-4] # remove 'desc' desctype = descname[:-4] # remove 'desc'
linename = desctype + "line" linename = desctype + "line"
...@@ -219,7 +209,7 @@ def rewrite_descriptor(doc, descriptor): ...@@ -219,7 +209,7 @@ def rewrite_descriptor(doc, descriptor):
name.appendChild(doc.createTextNode(descriptor.getAttribute("name"))) name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
descriptor.removeAttribute("name") descriptor.removeAttribute("name")
# 2a. # 2a.
if descriptor.attributes.has_key("var"): if descriptor.hasAttribute("var"):
if descname != "opcodedesc": if descname != "opcodedesc":
raise RuntimeError, \ raise RuntimeError, \
"got 'var' attribute on descriptor other than opcodedesc" "got 'var' attribute on descriptor other than opcodedesc"
...@@ -245,10 +235,15 @@ def rewrite_descriptor(doc, descriptor): ...@@ -245,10 +235,15 @@ def rewrite_descriptor(doc, descriptor):
# 3, 4. # 3, 4.
pos = skip_leading_nodes(children, pos) pos = skip_leading_nodes(children, pos)
while pos < len(children) \ while pos < len(children) \
and children[pos].get_nodeName() in (linename, "versionadded"): and children[pos].nodeName in (linename, "versionadded"):
if children[pos].get_tagName() == linename: if children[pos].tagName == linename:
# this is really a supplemental signature, create <signature> # this is really a supplemental signature, create <signature>
oldchild = children[pos].cloneNode(1)
try:
sig = methodline_to_signature(doc, children[pos]) sig = methodline_to_signature(doc, children[pos])
except KeyError:
print oldchild.toxml()
raise
newchildren.append(sig) newchildren.append(sig)
else: else:
# <versionadded added=...> # <versionadded added=...>
...@@ -301,7 +296,7 @@ def move_children(origin, dest, start=0): ...@@ -301,7 +296,7 @@ def move_children(origin, dest, start=0):
def handle_appendix(doc, fragment): def handle_appendix(doc, fragment):
# must be called after simplfy() if document is multi-rooted to begin with # must be called after simplfy() if document is multi-rooted to begin with
docelem = get_documentElement(fragment) docelem = get_documentElement(fragment)
toplevel = docelem.get_tagName() == "manual" and "chapter" or "section" toplevel = docelem.tagName == "manual" and "chapter" or "section"
appendices = 0 appendices = 0
nodes = [] nodes = []
for node in docelem.childNodes: for node in docelem.childNodes:
...@@ -333,7 +328,7 @@ def handle_labels(doc, fragment): ...@@ -333,7 +328,7 @@ def handle_labels(doc, fragment):
if not id: if not id:
continue continue
parent = label.parentNode parent = label.parentNode
parentTagName = parent.get_tagName() parentTagName = parent.tagName
if parentTagName == "title": if parentTagName == "title":
parent.parentNode.setAttribute("id", id) parent.parentNode.setAttribute("id", id)
else: else:
...@@ -352,8 +347,8 @@ def fixup_trailing_whitespace(doc, wsmap): ...@@ -352,8 +347,8 @@ def fixup_trailing_whitespace(doc, wsmap):
while queue: while queue:
node = queue[0] node = queue[0]
del queue[0] del queue[0]
if wsmap.has_key(node.get_nodeName()): if wsmap.has_key(node.nodeName):
ws = wsmap[node.get_tagName()] ws = wsmap[node.tagName]
children = node.childNodes children = node.childNodes
children.reverse() children.reverse()
if children[0].nodeType == TEXT: if children[0].nodeType == TEXT:
...@@ -361,8 +356,8 @@ def fixup_trailing_whitespace(doc, wsmap): ...@@ -361,8 +356,8 @@ def fixup_trailing_whitespace(doc, wsmap):
children[0].data = data children[0].data = data
children.reverse() children.reverse()
# hack to get the title in place: # hack to get the title in place:
if node.get_tagName() == "title" \ if node.tagName == "title" \
and node.parentNode.firstChild.get_nodeType() == ELEMENT: and node.parentNode.firstChild.nodeType == ELEMENT:
node.parentNode.insertBefore(doc.createText("\n "), node.parentNode.insertBefore(doc.createText("\n "),
node.parentNode.firstChild) node.parentNode.firstChild)
for child in node.childNodes: for child in node.childNodes:
...@@ -388,7 +383,7 @@ def cleanup_trailing_parens(doc, element_names): ...@@ -388,7 +383,7 @@ def cleanup_trailing_parens(doc, element_names):
while queue: while queue:
node = queue[0] node = queue[0]
del queue[0] del queue[0]
if rewrite_element(node.get_tagName()): if rewrite_element(node.tagName):
children = node.childNodes children = node.childNodes
if len(children) == 1 \ if len(children) == 1 \
and children[0].nodeType == TEXT: and children[0].nodeType == TEXT:
...@@ -411,7 +406,7 @@ def contents_match(left, right): ...@@ -411,7 +406,7 @@ def contents_match(left, right):
if nodeType != r.nodeType: if nodeType != r.nodeType:
return 0 return 0
if nodeType == ELEMENT: if nodeType == ELEMENT:
if l.get_tagName() != r.get_tagName(): if l.tagName != r.tagName:
return 0 return 0
# should check attributes, but that's not a problem here # should check attributes, but that's not a problem here
if not contents_match(l, r): if not contents_match(l, r):
...@@ -430,19 +425,19 @@ def create_module_info(doc, section): ...@@ -430,19 +425,19 @@ def create_module_info(doc, section):
node = extract_first_element(section, "modulesynopsis") node = extract_first_element(section, "modulesynopsis")
if node is None: if node is None:
return return
node._node.name = "synopsis" set_tagName(node, "synopsis")
lastchild = node.childNodes[-1] lastchild = node.childNodes[-1]
if lastchild.nodeType == TEXT \ if lastchild.nodeType == TEXT \
and lastchild.data[-1:] == ".": and lastchild.data[-1:] == ".":
lastchild.data = lastchild.data[:-1] lastchild.data = lastchild.data[:-1]
modauthor = extract_first_element(section, "moduleauthor") modauthor = extract_first_element(section, "moduleauthor")
if modauthor: if modauthor:
modauthor._node.name = "author" set_tagName(modauthor, "author")
modauthor.appendChild(doc.createTextNode( modauthor.appendChild(doc.createTextNode(
modauthor.getAttribute("name"))) modauthor.getAttribute("name")))
modauthor.removeAttribute("name") modauthor.removeAttribute("name")
platform = extract_first_element(section, "platform") platform = extract_first_element(section, "platform")
if section.get_tagName() == "section": if section.tagName == "section":
modinfo_pos = 2 modinfo_pos = 2
modinfo = doc.createElement("moduleinfo") modinfo = doc.createElement("moduleinfo")
moddecl = extract_first_element(section, "declaremodule") moddecl = extract_first_element(section, "declaremodule")
...@@ -467,13 +462,13 @@ def create_module_info(doc, section): ...@@ -467,13 +462,13 @@ def create_module_info(doc, section):
if title: if title:
children = title.childNodes children = title.childNodes
if len(children) >= 2 \ if len(children) >= 2 \
and children[0].get_nodeName() == "module" \ and children[0].nodeName == "module" \
and children[0].childNodes[0].data == name: and children[0].childNodes[0].data == name:
# this is it; morph the <title> into <short-synopsis> # this is it; morph the <title> into <short-synopsis>
first_data = children[1] first_data = children[1]
if first_data.data[:4] == " ---": if first_data.data[:4] == " ---":
first_data.data = string.lstrip(first_data.data[4:]) first_data.data = string.lstrip(first_data.data[4:])
title._node.name = "short-synopsis" set_tagName(title, "short-synopsis")
if children[-1].nodeType == TEXT \ if children[-1].nodeType == TEXT \
and children[-1].data[-1:] == ".": and children[-1].data[-1:] == ".":
children[-1].data = children[-1].data[:-1] children[-1].data = children[-1].data[:-1]
...@@ -511,7 +506,7 @@ def create_module_info(doc, section): ...@@ -511,7 +506,7 @@ def create_module_info(doc, section):
children = section.childNodes children = section.childNodes
for i in range(len(children)): for i in range(len(children)):
node = children[i] node = children[i]
if node.get_nodeName() == "moduleinfo": if node.nodeName == "moduleinfo":
nextnode = children[i+1] nextnode = children[i+1]
if nextnode.nodeType == TEXT: if nextnode.nodeType == TEXT:
data = nextnode.data data = nextnode.data
...@@ -544,7 +539,7 @@ def fixup_table(doc, table): ...@@ -544,7 +539,7 @@ def fixup_table(doc, table):
children = table.childNodes children = table.childNodes
for child in children: for child in children:
if child.nodeType == ELEMENT: if child.nodeType == ELEMENT:
tagName = child.get_tagName() tagName = child.tagName
if tagName == "hline" and prev_row is not None: if tagName == "hline" and prev_row is not None:
prev_row.setAttribute("rowsep", "1") prev_row.setAttribute("rowsep", "1")
elif tagName == "row": elif tagName == "row":
...@@ -558,13 +553,14 @@ def fixup_table(doc, table): ...@@ -558,13 +553,14 @@ def fixup_table(doc, table):
nodeType = child.nodeType nodeType = child.nodeType
if nodeType == TEXT: if nodeType == TEXT:
if string.strip(child.data): if string.strip(child.data):
raise ConversionError("unexpected free data in table") raise ConversionError("unexpected free data in <%s>: %r"
% (table.tagName, child.data))
table.removeChild(child) table.removeChild(child)
continue continue
if nodeType == ELEMENT: if nodeType == ELEMENT:
if child.get_tagName() != "hline": if child.tagName != "hline":
raise ConversionError( raise ConversionError(
"unexpected <%s> in table" % child.get_tagName()) "unexpected <%s> in table" % child.tagName)
table.removeChild(child) table.removeChild(child)
continue continue
raise ConversionError( raise ConversionError(
...@@ -593,7 +589,7 @@ def fixup_row(doc, row): ...@@ -593,7 +589,7 @@ def fixup_row(doc, row):
def move_elements_by_name(doc, source, dest, name, sep=None): def move_elements_by_name(doc, source, dest, name, sep=None):
nodes = [] nodes = []
for child in source.childNodes: for child in source.childNodes:
if child.get_nodeName() == name: if child.nodeName == name:
nodes.append(child) nodes.append(child)
for node in nodes: for node in nodes:
source.removeChild(node) source.removeChild(node)
...@@ -633,7 +629,7 @@ PARA_LEVEL_PRECEEDERS = ( ...@@ -633,7 +629,7 @@ PARA_LEVEL_PRECEEDERS = (
def fixup_paras(doc, fragment): def fixup_paras(doc, fragment):
for child in fragment.childNodes: for child in fragment.childNodes:
if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS: if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
fixup_paras_helper(doc, child) fixup_paras_helper(doc, child)
descriptions = find_all_elements(fragment, "description") descriptions = find_all_elements(fragment, "description")
for description in descriptions: for description in descriptions:
...@@ -645,7 +641,7 @@ def fixup_paras_helper(doc, container, depth=0): ...@@ -645,7 +641,7 @@ def fixup_paras_helper(doc, container, depth=0):
children = container.childNodes children = container.childNodes
start = skip_leading_nodes(children) start = skip_leading_nodes(children)
while len(children) > start: while len(children) > start:
if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS: if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
# Something to recurse into: # Something to recurse into:
fixup_paras_helper(doc, children[start]) fixup_paras_helper(doc, children[start])
else: else:
...@@ -668,7 +664,7 @@ def build_para(doc, parent, start, i): ...@@ -668,7 +664,7 @@ def build_para(doc, parent, start, i):
child = children[j] child = children[j]
nodeType = child.nodeType nodeType = child.nodeType
if nodeType == ELEMENT: if nodeType == ELEMENT:
if child.get_tagName() in BREAK_ELEMENTS: if child.tagName in BREAK_ELEMENTS:
after = j after = j
break break
elif nodeType == TEXT: elif nodeType == TEXT:
...@@ -742,7 +738,7 @@ def skip_leading_nodes(children, start=0): ...@@ -742,7 +738,7 @@ def skip_leading_nodes(children, start=0):
return start return start
# all whitespace, just skip # all whitespace, just skip
elif nodeType == ELEMENT: elif nodeType == ELEMENT:
tagName = child.get_tagName() tagName = child.tagName
if tagName in RECURSE_INTO_PARA_CONTAINERS: if tagName in RECURSE_INTO_PARA_CONTAINERS:
return start return start
if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS: if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
...@@ -772,7 +768,7 @@ def fixup_signatures(doc, fragment): ...@@ -772,7 +768,7 @@ def fixup_signatures(doc, fragment):
def fixup_args(doc, arglist): def fixup_args(doc, arglist):
for child in arglist.childNodes: for child in arglist.childNodes:
if child.get_nodeName() == "optional": if child.nodeName == "optional":
# found it; fix and return # found it; fix and return
arglist.insertBefore(doc.createTextNode("["), child) arglist.insertBefore(doc.createTextNode("["), child)
optkids = child.childNodes optkids = child.childNodes
...@@ -789,13 +785,13 @@ def fixup_sectionauthors(doc, fragment): ...@@ -789,13 +785,13 @@ def fixup_sectionauthors(doc, fragment):
for sectauth in find_all_elements(fragment, "sectionauthor"): for sectauth in find_all_elements(fragment, "sectionauthor"):
section = sectauth.parentNode section = sectauth.parentNode
section.removeChild(sectauth) section.removeChild(sectauth)
sectauth._node.name = "author" set_tagName(sectauth, "author")
sectauth.appendChild(doc.createTextNode( sectauth.appendChild(doc.createTextNode(
sectauth.getAttribute("name"))) sectauth.getAttribute("name")))
sectauth.removeAttribute("name") sectauth.removeAttribute("name")
after = section.childNodes[2] after = section.childNodes[2]
title = section.childNodes[1] title = section.childNodes[1]
if title.get_nodeName() != "title": if title.nodeName != "title":
after = section.childNodes[0] after = section.childNodes[0]
section.insertBefore(doc.createTextNode("\n "), after) section.insertBefore(doc.createTextNode("\n "), after)
section.insertBefore(sectauth, after) section.insertBefore(sectauth, after)
...@@ -806,17 +802,17 @@ def fixup_verbatims(doc): ...@@ -806,17 +802,17 @@ def fixup_verbatims(doc):
child = verbatim.childNodes[0] child = verbatim.childNodes[0]
if child.nodeType == TEXT \ if child.nodeType == TEXT \
and string.lstrip(child.data)[:3] == ">>>": and string.lstrip(child.data)[:3] == ">>>":
verbatim._node.name = "interactive-session" set_tagName(verbatim, "interactive-session")
def add_node_ids(fragment, counter=0): def add_node_ids(fragment, counter=0):
fragment._node.node_id = counter fragment.node_id = counter
for node in fragment.childNodes: for node in fragment.childNodes:
counter = counter + 1 counter = counter + 1
if node.nodeType == ELEMENT: if node.nodeType == ELEMENT:
counter = add_node_ids(node, counter) counter = add_node_ids(node, counter)
else: else:
node._node.node_id = counter node.node_id = counter
return counter + 1 return counter + 1
...@@ -831,14 +827,14 @@ def fixup_refmodindexes(fragment): ...@@ -831,14 +827,14 @@ def fixup_refmodindexes(fragment):
d = {} d = {}
for node in nodes: for node in nodes:
parent = node.parentNode parent = node.parentNode
d[parent._node.node_id] = parent d[parent.node_id] = parent
del nodes del nodes
map(fixup_refmodindexes_chunk, d.values()) map(fixup_refmodindexes_chunk, d.values())
def fixup_refmodindexes_chunk(container): def fixup_refmodindexes_chunk(container):
# node is probably a <para>; let's see how often it isn't: # node is probably a <para>; let's see how often it isn't:
if container.get_tagName() != PARA_ELEMENT: if container.tagName != PARA_ELEMENT:
bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container) bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
module_entries = find_all_elements(container, "module") module_entries = find_all_elements(container, "module")
if not module_entries: if not module_entries:
...@@ -849,7 +845,7 @@ def fixup_refmodindexes_chunk(container): ...@@ -849,7 +845,7 @@ def fixup_refmodindexes_chunk(container):
children = entry.childNodes children = entry.childNodes
if len(children) != 0: if len(children) != 0:
bwrite("--- unexpected number of children for %s node:\n" bwrite("--- unexpected number of children for %s node:\n"
% entry.get_tagName()) % entry.tagName)
ewrite(entry.toxml() + "\n") ewrite(entry.toxml() + "\n")
continue continue
found = 0 found = 0
...@@ -873,7 +869,7 @@ def fixup_bifuncindexes(fragment): ...@@ -873,7 +869,7 @@ def fixup_bifuncindexes(fragment):
# make sure that each parent is only processed once: # make sure that each parent is only processed once:
for node in nodes: for node in nodes:
parent = node.parentNode parent = node.parentNode
d[parent._node.node_id] = parent d[parent.node_id] = parent
del nodes del nodes
map(fixup_bifuncindexes_chunk, d.values()) map(fixup_bifuncindexes_chunk, d.values())
...@@ -905,7 +901,7 @@ def join_adjacent_elements(container, gi): ...@@ -905,7 +901,7 @@ def join_adjacent_elements(container, gi):
while queue: while queue:
parent = queue.pop() parent = queue.pop()
i = 0 i = 0
children = parent.get_childNodes() children = parent.childNodes
nchildren = len(children) nchildren = len(children)
while i < (nchildren - 1): while i < (nchildren - 1):
child = children[i] child = children[i]
...@@ -914,7 +910,7 @@ def join_adjacent_elements(container, gi): ...@@ -914,7 +910,7 @@ def join_adjacent_elements(container, gi):
ewrite("--- merging two <%s/> elements\n" % gi) ewrite("--- merging two <%s/> elements\n" % gi)
child = children[i] child = children[i]
nextchild = children[i+1] nextchild = children[i+1]
nextchildren = nextchild.get_childNodes() nextchildren = nextchild.childNodes
while len(nextchildren): while len(nextchildren):
node = nextchildren[0] node = nextchildren[0]
nextchild.removeChild(node) nextchild.removeChild(node)
...@@ -932,14 +928,13 @@ def write_esis(doc, ofp, knownempty): ...@@ -932,14 +928,13 @@ def write_esis(doc, ofp, knownempty):
for node in doc.childNodes: for node in doc.childNodes:
nodeType = node.nodeType nodeType = node.nodeType
if nodeType == ELEMENT: if nodeType == ELEMENT:
gi = node.get_tagName() gi = node.tagName
if knownempty(gi): if knownempty(gi):
if node.hasChildNodes(): if node.hasChildNodes():
raise ValueError, \ raise ValueError, \
"declared-empty node <%s> has children" % gi "declared-empty node <%s> has children" % gi
ofp.write("e\n") ofp.write("e\n")
for k, v in node.attributes.items(): for k, value in node.attributes.items():
value = v.value
if _token_rx.match(value): if _token_rx.match(value):
dtype = "TOKEN" dtype = "TOKEN"
else: else:
...@@ -951,16 +946,17 @@ def write_esis(doc, ofp, knownempty): ...@@ -951,16 +946,17 @@ def write_esis(doc, ofp, knownempty):
elif nodeType == TEXT: elif nodeType == TEXT:
ofp.write("-%s\n" % esistools.encode(node.data)) ofp.write("-%s\n" % esistools.encode(node.data))
elif nodeType == ENTITY_REFERENCE: elif nodeType == ENTITY_REFERENCE:
ofp.write("&%s\n" % node.get_nodeName()) ofp.write("&%s\n" % node.nodeName)
else: else:
raise RuntimeError, "unsupported node type: %s" % nodeType raise RuntimeError, "unsupported node type: %s" % nodeType
def convert(ifp, ofp): def convert(ifp, ofp):
p = esistools.ExtendedEsisBuilder() events = esistools.parse(ifp)
p.feed(ifp.read()) toktype, doc = events.getEvent()
doc = p.document fragment = doc.createDocumentFragment()
fragment = p.fragment events.expandNode(fragment)
normalize(fragment) normalize(fragment)
simplify(doc, fragment) simplify(doc, fragment)
handle_labels(doc, fragment) handle_labels(doc, fragment)
...@@ -994,8 +990,10 @@ def convert(ifp, ofp): ...@@ -994,8 +990,10 @@ def convert(ifp, ofp):
join_adjacent_elements(fragment, "option") join_adjacent_elements(fragment, "option")
# #
d = {} d = {}
for gi in p.get_empties(): for gi in events.parser.get_empties():
d[gi] = gi d[gi] = gi
if d.has_key("author"):
del d["author"]
if d.has_key("rfc"): if d.has_key("rfc"):
del d["rfc"] del d["rfc"]
knownempty = d.has_key knownempty = d.has_key
...@@ -1019,11 +1017,17 @@ def main(): ...@@ -1019,11 +1017,17 @@ def main():
ofp = sys.stdout ofp = sys.stdout
elif len(sys.argv) == 3: elif len(sys.argv) == 3:
ifp = open(sys.argv[1]) ifp = open(sys.argv[1])
ofp = open(sys.argv[2], "w") import StringIO
ofp = StringIO.StringIO()
else: else:
usage() usage()
sys.exit(2) sys.exit(2)
convert(ifp, ofp) convert(ifp, ofp)
if len(sys.argv) == 3:
fp = open(sys.argv[2], "w")
fp.write(ofp.getvalue())
fp.close()
ofp.close()
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment