Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
e779d4f0
Commit
e779d4f0
authored
May 10, 1999
by
Fred Drake
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Lots of adjustments to deal with the document content now being stored
in a fragment rather than the main document object.
parent
54fb7fb9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
94 additions
and
91 deletions
+94
-91
Doc/tools/sgmlconv/docfixer.py
Doc/tools/sgmlconv/docfixer.py
+94
-91
No files found.
Doc/tools/sgmlconv/docfixer.py
View file @
e779d4f0
...
@@ -12,7 +12,10 @@ import re
...
@@ -12,7 +12,10 @@ import re
import
string
import
string
import
sys
import
sys
import
xml.dom.core
import
xml.dom.core
import
xml.dom.esis_builder
from
xml.dom.core
import
\
ELEMENT
,
\
TEXT
class
ConversionError
(
Exception
):
class
ConversionError
(
Exception
):
...
@@ -32,11 +35,11 @@ else:
...
@@ -32,11 +35,11 @@ else:
# Workaround to deal with invalid documents (multiple root elements). This
# Workaround to deal with invalid documents (multiple root elements). This
# does not indicate a bug in the DOM implementation.
# does not indicate a bug in the DOM implementation.
#
#
def
get_documentElement
(
self
):
def
get_documentElement
(
doc
):
docelem
=
None
docelem
=
None
for
n
in
self
.
_node
.
children
:
for
n
in
doc
.
childNodes
:
if
n
.
type
==
xml
.
dom
.
core
.
ELEMENT
:
if
n
.
nodeType
==
ELEMENT
:
docelem
=
xml
.
dom
.
core
.
Element
(
n
,
self
,
self
)
docelem
=
n
return
docelem
return
docelem
xml
.
dom
.
core
.
Document
.
get_documentElement
=
get_documentElement
xml
.
dom
.
core
.
Document
.
get_documentElement
=
get_documentElement
...
@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
...
@@ -46,15 +49,15 @@ xml.dom.core.Document.get_documentElement = get_documentElement
# accessed from the Document object via .childNodes (no matter how many
# accessed from the Document object via .childNodes (no matter how many
# levels of access are used) will be given an ownerDocument of None.
# levels of access are used) will be given an ownerDocument of None.
#
#
def
get_childNodes
(
self
):
def
get_childNodes
(
doc
):
return
xml
.
dom
.
core
.
NodeList
(
self
.
_node
.
children
,
self
,
self
)
return
xml
.
dom
.
core
.
NodeList
(
doc
.
_node
.
children
,
doc
.
_node
)
xml
.
dom
.
core
.
Document
.
get_childNodes
=
get_childNodes
xml
.
dom
.
core
.
Document
.
get_childNodes
=
get_childNodes
def
get_first_element
(
doc
,
gi
):
def
get_first_element
(
doc
,
gi
):
for
n
in
doc
.
childNodes
:
for
n
in
doc
.
childNodes
:
if
n
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
n
.
tagName
==
gi
:
if
n
.
nodeType
==
ELEMENT
and
n
.
tagName
==
gi
:
return
n
return
n
def
extract_first_element
(
doc
,
gi
):
def
extract_first_element
(
doc
,
gi
):
...
@@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
...
@@ -66,10 +69,10 @@ def extract_first_element(doc, gi):
def
find_all_elements
(
doc
,
gi
):
def
find_all_elements
(
doc
,
gi
):
nodes
=
[]
nodes
=
[]
if
doc
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
doc
.
tagName
==
gi
:
if
doc
.
nodeType
==
ELEMENT
and
doc
.
tagName
==
gi
:
nodes
.
append
(
doc
)
nodes
.
append
(
doc
)
for
child
in
doc
.
childNodes
:
for
child
in
doc
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
if
child
.
tagName
==
gi
:
if
child
.
tagName
==
gi
:
nodes
.
append
(
child
)
nodes
.
append
(
child
)
for
node
in
child
.
getElementsByTagName
(
gi
):
for
node
in
child
.
getElementsByTagName
(
gi
):
...
@@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
...
@@ -77,36 +80,36 @@ def find_all_elements(doc, gi):
return
nodes
return
nodes
def
simplify
(
doc
):
def
simplify
(
doc
,
fragment
):
# Try to rationalize the document a bit, since these things are simply
# Try to rationalize the document a bit, since these things are simply
# not valid SGML/XML documents as they stand, and need a little work.
# not valid SGML/XML documents as they stand, and need a little work.
documentclass
=
"document"
documentclass
=
"document"
inputs
=
[]
inputs
=
[]
node
=
extract_first_element
(
doc
,
"documentclass"
)
node
=
extract_first_element
(
fragment
,
"documentclass"
)
if
node
is
not
None
:
if
node
is
not
None
:
documentclass
=
node
.
getAttribute
(
"classname"
)
documentclass
=
node
.
getAttribute
(
"classname"
)
node
=
extract_first_element
(
doc
,
"title"
)
node
=
extract_first_element
(
fragment
,
"title"
)
if
node
is
not
None
:
if
node
is
not
None
:
inputs
.
append
(
node
)
inputs
.
append
(
node
)
# update the name of the root element
# update the name of the root element
node
=
get_first_element
(
doc
,
"document"
)
node
=
get_first_element
(
fragment
,
"document"
)
if
node
is
not
None
:
if
node
is
not
None
:
node
.
_node
.
name
=
documentclass
node
.
_node
.
name
=
documentclass
while
1
:
while
1
:
node
=
extract_first_element
(
doc
,
"input"
)
node
=
extract_first_element
(
fragment
,
"input"
)
if
node
is
None
:
if
node
is
None
:
break
break
inputs
.
append
(
node
)
inputs
.
append
(
node
)
if
inputs
:
if
inputs
:
docelem
=
doc
.
documentElement
docelem
=
get_documentElement
(
fragment
)
inputs
.
reverse
()
inputs
.
reverse
()
for
node
in
inputs
:
for
node
in
inputs
:
text
=
doc
.
createTextNode
(
"
\
n
"
)
text
=
doc
.
createTextNode
(
"
\
n
"
)
docelem
.
insertBefore
(
text
,
docelem
.
firstChild
)
docelem
.
insertBefore
(
text
,
docelem
.
firstChild
)
docelem
.
insertBefore
(
node
,
text
)
docelem
.
insertBefore
(
node
,
text
)
docelem
.
insertBefore
(
doc
.
createTextNode
(
"
\
n
"
),
docelem
.
firstChild
)
docelem
.
insertBefore
(
doc
.
createTextNode
(
"
\
n
"
),
docelem
.
firstChild
)
while
doc
.
firstChild
.
nodeType
==
xml
.
dom
.
core
.
TEXT
:
while
fragment
.
firstChild
.
nodeType
==
TEXT
:
doc
.
removeChild
(
doc
.
firstChild
)
fragment
.
removeChild
(
fragment
.
firstChild
)
def
cleanup_root_text
(
doc
):
def
cleanup_root_text
(
doc
):
...
@@ -115,9 +118,9 @@ def cleanup_root_text(doc):
...
@@ -115,9 +118,9 @@ def cleanup_root_text(doc):
for
n
in
doc
.
childNodes
:
for
n
in
doc
.
childNodes
:
prevskip
=
skip
prevskip
=
skip
skip
=
0
skip
=
0
if
n
.
nodeType
==
xml
.
dom
.
core
.
TEXT
and
not
prevskip
:
if
n
.
nodeType
==
TEXT
and
not
prevskip
:
discards
.
append
(
n
)
discards
.
append
(
n
)
elif
n
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
n
.
tagName
==
"COMMENT"
:
elif
n
.
nodeType
==
ELEMENT
and
n
.
tagName
==
"COMMENT"
:
skip
=
1
skip
=
1
for
node
in
discards
:
for
node
in
discards
:
doc
.
removeChild
(
node
)
doc
.
removeChild
(
node
)
...
@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
...
@@ -130,8 +133,8 @@ DESCRIPTOR_ELEMENTS = (
"datadesc"
,
"datadescni"
,
"datadesc"
,
"datadescni"
,
)
)
def
fixup_descriptors
(
doc
):
def
fixup_descriptors
(
doc
,
fragment
):
sections
=
find_all_elements
(
doc
,
"section"
)
sections
=
find_all_elements
(
fragment
,
"section"
)
for
section
in
sections
:
for
section
in
sections
:
find_and_fix_descriptors
(
doc
,
section
)
find_and_fix_descriptors
(
doc
,
section
)
...
@@ -139,7 +142,7 @@ def fixup_descriptors(doc):
...
@@ -139,7 +142,7 @@ def fixup_descriptors(doc):
def
find_and_fix_descriptors
(
doc
,
container
):
def
find_and_fix_descriptors
(
doc
,
container
):
children
=
container
.
childNodes
children
=
container
.
childNodes
for
child
in
children
:
for
child
in
children
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
tagName
=
child
.
tagName
tagName
=
child
.
tagName
if
tagName
in
DESCRIPTOR_ELEMENTS
:
if
tagName
in
DESCRIPTOR_ELEMENTS
:
rewrite_descriptor
(
doc
,
child
)
rewrite_descriptor
(
doc
,
child
)
...
@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
...
@@ -191,7 +194,7 @@ def rewrite_descriptor(doc, descriptor):
pos
=
skip_leading_nodes
(
children
,
0
)
pos
=
skip_leading_nodes
(
children
,
0
)
if
pos
<
len
(
children
):
if
pos
<
len
(
children
):
child
=
children
[
pos
]
child
=
children
[
pos
]
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
child
.
tagName
==
"args"
:
if
child
.
nodeType
==
ELEMENT
and
child
.
tagName
==
"args"
:
# create an <args> in <signature>:
# create an <args> in <signature>:
args
=
doc
.
createElement
(
"args"
)
args
=
doc
.
createElement
(
"args"
)
argchildren
=
[]
argchildren
=
[]
...
@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
...
@@ -205,7 +208,7 @@ def rewrite_descriptor(doc, descriptor):
# 3, 4.
# 3, 4.
pos
=
skip_leading_nodes
(
children
,
pos
+
1
)
pos
=
skip_leading_nodes
(
children
,
pos
+
1
)
while
pos
<
len
(
children
)
\
while
pos
<
len
(
children
)
\
and
children
[
pos
].
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
and
children
[
pos
].
nodeType
==
ELEMENT
\
and
children
[
pos
].
tagName
in
(
linename
,
"versionadded"
):
and
children
[
pos
].
tagName
in
(
linename
,
"versionadded"
):
if
children
[
pos
].
tagName
==
linename
:
if
children
[
pos
].
tagName
==
linename
:
# this is really a supplemental signature, create <signature>
# this is really a supplemental signature, create <signature>
...
@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
...
@@ -222,7 +225,7 @@ def rewrite_descriptor(doc, descriptor):
newchildren
.
append
(
description
)
newchildren
.
append
(
description
)
move_children
(
descriptor
,
description
,
pos
)
move_children
(
descriptor
,
description
,
pos
)
last
=
description
.
childNodes
[
-
1
]
last
=
description
.
childNodes
[
-
1
]
if
last
.
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
last
.
nodeType
==
TEXT
:
last
.
data
=
string
.
rstrip
(
last
.
data
)
+
"
\
n
"
last
.
data
=
string
.
rstrip
(
last
.
data
)
+
"
\
n
"
# 6.
# 6.
# should have nothing but whitespace and signature lines in <descriptor>;
# should have nothing but whitespace and signature lines in <descriptor>;
...
@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
...
@@ -259,16 +262,16 @@ def move_children(origin, dest, start=0):
dest
.
appendChild
(
node
)
dest
.
appendChild
(
node
)
def
handle_appendix
(
doc
):
def
handle_appendix
(
doc
,
fragment
):
# must be called after simplfy() if document is multi-rooted to begin with
# must be called after simplfy() if document is multi-rooted to begin with
docelem
=
doc
.
documentElement
docelem
=
get_documentElement
(
fragment
)
toplevel
=
docelem
.
tagName
==
"manual"
and
"chapter"
or
"section"
toplevel
=
docelem
.
tagName
==
"manual"
and
"chapter"
or
"section"
appendices
=
0
appendices
=
0
nodes
=
[]
nodes
=
[]
for
node
in
docelem
.
childNodes
:
for
node
in
docelem
.
childNodes
:
if
appendices
:
if
appendices
:
nodes
.
append
(
node
)
nodes
.
append
(
node
)
elif
node
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
elif
node
.
nodeType
==
ELEMENT
:
appnodes
=
node
.
getElementsByTagName
(
"appendix"
)
appnodes
=
node
.
getElementsByTagName
(
"appendix"
)
if
appnodes
:
if
appnodes
:
appendices
=
1
appendices
=
1
...
@@ -281,7 +284,7 @@ def handle_appendix(doc):
...
@@ -281,7 +284,7 @@ def handle_appendix(doc):
back
=
doc
.
createElement
(
"back-matter"
)
back
=
doc
.
createElement
(
"back-matter"
)
docelem
.
appendChild
(
back
)
docelem
.
appendChild
(
back
)
back
.
appendChild
(
doc
.
createTextNode
(
"
\
n
"
))
back
.
appendChild
(
doc
.
createTextNode
(
"
\
n
"
))
while
nodes
and
nodes
[
0
].
nodeType
==
xml
.
dom
.
core
.
TEXT
\
while
nodes
and
nodes
[
0
].
nodeType
==
TEXT
\
and
not
string
.
strip
(
nodes
[
0
].
data
):
and
not
string
.
strip
(
nodes
[
0
].
data
):
del
nodes
[
0
]
del
nodes
[
0
]
map
(
back
.
appendChild
,
nodes
)
map
(
back
.
appendChild
,
nodes
)
...
@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
...
@@ -307,28 +310,28 @@ def fixup_trailing_whitespace(doc, wsmap):
while
queue
:
while
queue
:
node
=
queue
[
0
]
node
=
queue
[
0
]
del
queue
[
0
]
del
queue
[
0
]
if
node
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
if
node
.
nodeType
==
ELEMENT
\
and
wsmap
.
has_key
(
node
.
tagName
):
and
wsmap
.
has_key
(
node
.
tagName
):
ws
=
wsmap
[
node
.
tagName
]
ws
=
wsmap
[
node
.
tagName
]
children
=
node
.
childNodes
children
=
node
.
childNodes
children
.
reverse
()
children
.
reverse
()
if
children
[
0
].
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
children
[
0
].
nodeType
==
TEXT
:
data
=
string
.
rstrip
(
children
[
0
].
data
)
+
ws
data
=
string
.
rstrip
(
children
[
0
].
data
)
+
ws
children
[
0
].
data
=
data
children
[
0
].
data
=
data
children
.
reverse
()
children
.
reverse
()
# hack to get the title in place:
# hack to get the title in place:
if
node
.
tagName
==
"title"
\
if
node
.
tagName
==
"title"
\
and
node
.
parentNode
.
firstChild
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
and
node
.
parentNode
.
firstChild
.
nodeType
==
ELEMENT
:
node
.
parentNode
.
insertBefore
(
doc
.
createText
(
"
\
n
"
),
node
.
parentNode
.
insertBefore
(
doc
.
createText
(
"
\
n
"
),
node
.
parentNode
.
firstChild
)
node
.
parentNode
.
firstChild
)
for
child
in
node
.
childNodes
:
for
child
in
node
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
queue
.
append
(
child
)
queue
.
append
(
child
)
def
normalize
(
doc
):
def
normalize
(
doc
):
for
node
in
doc
.
childNodes
:
for
node
in
doc
.
childNodes
:
if
node
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
node
.
nodeType
==
ELEMENT
:
node
.
normalize
()
node
.
normalize
()
...
@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
...
@@ -339,7 +342,7 @@ def cleanup_trailing_parens(doc, element_names):
rewrite_element
=
d
.
has_key
rewrite_element
=
d
.
has_key
queue
=
[]
queue
=
[]
for
node
in
doc
.
childNodes
:
for
node
in
doc
.
childNodes
:
if
node
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
node
.
nodeType
==
ELEMENT
:
queue
.
append
(
node
)
queue
.
append
(
node
)
while
queue
:
while
queue
:
node
=
queue
[
0
]
node
=
queue
[
0
]
...
@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
...
@@ -347,13 +350,13 @@ def cleanup_trailing_parens(doc, element_names):
if
rewrite_element
(
node
.
tagName
):
if
rewrite_element
(
node
.
tagName
):
children
=
node
.
childNodes
children
=
node
.
childNodes
if
len
(
children
)
==
1
\
if
len
(
children
)
==
1
\
and
children
[
0
].
nodeType
==
xml
.
dom
.
core
.
TEXT
:
and
children
[
0
].
nodeType
==
TEXT
:
data
=
children
[
0
].
data
data
=
children
[
0
].
data
if
data
[
-
2
:]
==
"()"
:
if
data
[
-
2
:]
==
"()"
:
children
[
0
].
data
=
data
[:
-
2
]
children
[
0
].
data
=
data
[:
-
2
]
else
:
else
:
for
child
in
node
.
childNodes
:
for
child
in
node
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
queue
.
append
(
child
)
queue
.
append
(
child
)
...
@@ -366,13 +369,13 @@ def contents_match(left, right):
...
@@ -366,13 +369,13 @@ def contents_match(left, right):
nodeType
=
l
.
nodeType
nodeType
=
l
.
nodeType
if
nodeType
!=
r
.
nodeType
:
if
nodeType
!=
r
.
nodeType
:
return
0
return
0
if
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
nodeType
==
ELEMENT
:
if
l
.
tagName
!=
r
.
tagName
:
if
l
.
tagName
!=
r
.
tagName
:
return
0
return
0
# should check attributes, but that's not a problem here
# should check attributes, but that's not a problem here
if
not
contents_match
(
l
,
r
):
if
not
contents_match
(
l
,
r
):
return
0
return
0
elif
nodeType
==
xml
.
dom
.
core
.
TEXT
:
elif
nodeType
==
TEXT
:
if
l
.
data
!=
r
.
data
:
if
l
.
data
!=
r
.
data
:
return
0
return
0
else
:
else
:
...
@@ -388,7 +391,7 @@ def create_module_info(doc, section):
...
@@ -388,7 +391,7 @@ def create_module_info(doc, section):
return
return
node
.
_node
.
name
=
"synopsis"
node
.
_node
.
name
=
"synopsis"
lastchild
=
node
.
childNodes
[
-
1
]
lastchild
=
node
.
childNodes
[
-
1
]
if
lastchild
.
nodeType
==
xml
.
dom
.
core
.
TEXT
\
if
lastchild
.
nodeType
==
TEXT
\
and
lastchild
.
data
[
-
1
:]
==
"."
:
and
lastchild
.
data
[
-
1
:]
==
"."
:
lastchild
.
data
=
lastchild
.
data
[:
-
1
]
lastchild
.
data
=
lastchild
.
data
[:
-
1
]
modauthor
=
extract_first_element
(
section
,
"moduleauthor"
)
modauthor
=
extract_first_element
(
section
,
"moduleauthor"
)
...
@@ -423,7 +426,7 @@ def create_module_info(doc, section):
...
@@ -423,7 +426,7 @@ def create_module_info(doc, section):
if
title
:
if
title
:
children
=
title
.
childNodes
children
=
title
.
childNodes
if
len
(
children
)
>=
2
\
if
len
(
children
)
>=
2
\
and
children
[
0
].
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
and
children
[
0
].
nodeType
==
ELEMENT
\
and
children
[
0
].
tagName
==
"module"
\
and
children
[
0
].
tagName
==
"module"
\
and
children
[
0
].
childNodes
[
0
].
data
==
name
:
and
children
[
0
].
childNodes
[
0
].
data
==
name
:
# this is it; morph the <title> into <short-synopsis>
# this is it; morph the <title> into <short-synopsis>
...
@@ -431,7 +434,7 @@ def create_module_info(doc, section):
...
@@ -431,7 +434,7 @@ def create_module_info(doc, section):
if
first_data
.
data
[:
4
]
==
" ---"
:
if
first_data
.
data
[:
4
]
==
" ---"
:
first_data
.
data
=
string
.
lstrip
(
first_data
.
data
[
4
:])
first_data
.
data
=
string
.
lstrip
(
first_data
.
data
[
4
:])
title
.
_node
.
name
=
"short-synopsis"
title
.
_node
.
name
=
"short-synopsis"
if
children
[
-
1
].
nodeType
==
xml
.
dom
.
core
.
TEXT
\
if
children
[
-
1
].
nodeType
==
TEXT
\
and
children
[
-
1
].
data
[
-
1
:]
==
"."
:
and
children
[
-
1
].
data
[
-
1
:]
==
"."
:
children
[
-
1
].
data
=
children
[
-
1
].
data
[:
-
1
]
children
[
-
1
].
data
=
children
[
-
1
].
data
[:
-
1
]
section
.
removeChild
(
title
)
section
.
removeChild
(
title
)
...
@@ -470,10 +473,10 @@ def create_module_info(doc, section):
...
@@ -470,10 +473,10 @@ def create_module_info(doc, section):
children
=
section
.
childNodes
children
=
section
.
childNodes
for
i
in
range
(
len
(
children
)):
for
i
in
range
(
len
(
children
)):
node
=
children
[
i
]
node
=
children
[
i
]
if
node
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
if
node
.
nodeType
==
ELEMENT
\
and
node
.
tagName
==
"moduleinfo"
:
and
node
.
tagName
==
"moduleinfo"
:
nextnode
=
children
[
i
+
1
]
nextnode
=
children
[
i
+
1
]
if
nextnode
.
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
nextnode
.
nodeType
==
TEXT
:
data
=
nextnode
.
data
data
=
nextnode
.
data
if
len
(
string
.
lstrip
(
data
))
<
(
len
(
data
)
-
4
):
if
len
(
string
.
lstrip
(
data
))
<
(
len
(
data
)
-
4
):
nextnode
.
data
=
"
\
n
\
n
\
n
"
+
string
.
lstrip
(
data
)
nextnode
.
data
=
"
\
n
\
n
\
n
"
+
string
.
lstrip
(
data
)
...
@@ -487,7 +490,7 @@ def cleanup_synopses(doc):
...
@@ -487,7 +490,7 @@ def cleanup_synopses(doc):
def
remap_element_names
(
root
,
name_map
):
def
remap_element_names
(
root
,
name_map
):
queue
=
[]
queue
=
[]
for
child
in
root
.
childNodes
:
for
child
in
root
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
queue
.
append
(
child
)
queue
.
append
(
child
)
while
queue
:
while
queue
:
node
=
queue
.
pop
()
node
=
queue
.
pop
()
...
@@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
...
@@ -498,13 +501,13 @@ def remap_element_names(root, name_map):
for
attr
,
value
in
attrs
.
items
():
for
attr
,
value
in
attrs
.
items
():
node
.
setAttribute
(
attr
,
value
)
node
.
setAttribute
(
attr
,
value
)
for
child
in
node
.
childNodes
:
for
child
in
node
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
queue
.
append
(
child
)
queue
.
append
(
child
)
def
fixup_table_structures
(
doc
):
def
fixup_table_structures
(
doc
,
fragment
):
# must be done after remap_element_names(), or the tables won't be found
# must be done after remap_element_names(), or the tables won't be found
for
table
in
find_all_elements
(
doc
,
"table"
):
for
table
in
find_all_elements
(
fragment
,
"table"
):
fixup_table
(
doc
,
table
)
fixup_table
(
doc
,
table
)
...
@@ -522,7 +525,7 @@ def fixup_table(doc, table):
...
@@ -522,7 +525,7 @@ def fixup_table(doc, table):
last_was_hline
=
0
last_was_hline
=
0
children
=
table
.
childNodes
children
=
table
.
childNodes
for
child
in
children
:
for
child
in
children
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
tagName
=
child
.
tagName
tagName
=
child
.
tagName
if
tagName
==
"hline"
and
prev_row
is
not
None
:
if
tagName
==
"hline"
and
prev_row
is
not
None
:
prev_row
.
setAttribute
(
"rowsep"
,
"1"
)
prev_row
.
setAttribute
(
"rowsep"
,
"1"
)
...
@@ -535,12 +538,12 @@ def fixup_table(doc, table):
...
@@ -535,12 +538,12 @@ def fixup_table(doc, table):
while
children
:
while
children
:
child
=
children
[
0
]
child
=
children
[
0
]
nodeType
=
child
.
nodeType
nodeType
=
child
.
nodeType
if
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
nodeType
==
TEXT
:
if
string
.
strip
(
child
.
data
):
if
string
.
strip
(
child
.
data
):
raise
ConversionError
(
"unexpected free data in table"
)
raise
ConversionError
(
"unexpected free data in table"
)
table
.
removeChild
(
child
)
table
.
removeChild
(
child
)
continue
continue
if
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
nodeType
==
ELEMENT
:
if
child
.
tagName
!=
"hline"
:
if
child
.
tagName
!=
"hline"
:
raise
ConversionError
(
raise
ConversionError
(
"unexpected <%s> in table"
%
child
.
tagName
)
"unexpected <%s> in table"
%
child
.
tagName
)
...
@@ -572,7 +575,7 @@ def fixup_row(doc, row):
...
@@ -572,7 +575,7 @@ def fixup_row(doc, row):
def
move_elements_by_name
(
doc
,
source
,
dest
,
name
,
sep
=
None
):
def
move_elements_by_name
(
doc
,
source
,
dest
,
name
,
sep
=
None
):
nodes
=
[]
nodes
=
[]
for
child
in
source
.
childNodes
:
for
child
in
source
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
child
.
tagName
==
name
:
if
child
.
nodeType
==
ELEMENT
and
child
.
tagName
==
name
:
nodes
.
append
(
child
)
nodes
.
append
(
child
)
for
node
in
nodes
:
for
node
in
nodes
:
source
.
removeChild
(
node
)
source
.
removeChild
(
node
)
...
@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
...
@@ -606,13 +609,13 @@ PARA_LEVEL_PRECEEDERS = (
)
)
def
fixup_paras
(
doc
):
def
fixup_paras
(
doc
,
fragment
):
for
child
in
doc
.
childNodes
:
for
child
in
fragment
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
if
child
.
nodeType
==
ELEMENT
\
and
child
.
tagName
in
RECURSE_INTO_PARA_CONTAINERS
:
and
child
.
tagName
in
RECURSE_INTO_PARA_CONTAINERS
:
#
#
fixup_paras_helper
(
doc
,
child
)
fixup_paras_helper
(
doc
,
child
)
descriptions
=
find_all_elements
(
doc
,
"description"
)
descriptions
=
find_all_elements
(
fragment
,
"description"
)
for
description
in
descriptions
:
for
description
in
descriptions
:
fixup_paras_helper
(
doc
,
description
)
fixup_paras_helper
(
doc
,
description
)
...
@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
...
@@ -628,7 +631,7 @@ def fixup_paras_helper(doc, container, depth=0):
#
#
# Either paragraph material or something to recurse into:
# Either paragraph material or something to recurse into:
#
#
if
(
children
[
start
].
nodeType
==
xml
.
dom
.
core
.
ELEMENT
)
\
if
(
children
[
start
].
nodeType
==
ELEMENT
)
\
and
(
children
[
start
].
tagName
in
RECURSE_INTO_PARA_CONTAINERS
):
and
(
children
[
start
].
tagName
in
RECURSE_INTO_PARA_CONTAINERS
):
fixup_paras_helper
(
doc
,
children
[
start
])
fixup_paras_helper
(
doc
,
children
[
start
])
start
=
skip_leading_nodes
(
children
,
start
+
1
)
start
=
skip_leading_nodes
(
children
,
start
+
1
)
...
@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
...
@@ -653,11 +656,11 @@ def build_para(doc, parent, start, i):
after
=
j
+
1
after
=
j
+
1
child
=
children
[
j
]
child
=
children
[
j
]
nodeType
=
child
.
nodeType
nodeType
=
child
.
nodeType
if
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
nodeType
==
ELEMENT
:
if
child
.
tagName
in
BREAK_ELEMENTS
:
if
child
.
tagName
in
BREAK_ELEMENTS
:
after
=
j
after
=
j
break
break
elif
nodeType
==
xml
.
dom
.
core
.
TEXT
:
elif
nodeType
==
TEXT
:
pos
=
string
.
find
(
child
.
data
,
"
\
n
\
n
"
)
pos
=
string
.
find
(
child
.
data
,
"
\
n
\
n
"
)
if
pos
==
0
:
if
pos
==
0
:
after
=
j
after
=
j
...
@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
...
@@ -670,7 +673,7 @@ def build_para(doc, parent, start, i):
if
(
start
+
1
)
>
after
:
if
(
start
+
1
)
>
after
:
raise
ConversionError
(
raise
ConversionError
(
"build_para() could not identify content to turn into a paragraph"
)
"build_para() could not identify content to turn into a paragraph"
)
if
children
[
after
-
1
].
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
children
[
after
-
1
].
nodeType
==
TEXT
:
# we may need to split off trailing white space:
# we may need to split off trailing white space:
child
=
children
[
after
-
1
]
child
=
children
[
after
-
1
]
data
=
child
.
data
data
=
child
.
data
...
@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
...
@@ -707,7 +710,7 @@ def skip_leading_nodes(children, start):
# skip over leading comments and whitespace:
# skip over leading comments and whitespace:
child
=
children
[
start
]
child
=
children
[
start
]
nodeType
=
child
.
nodeType
nodeType
=
child
.
nodeType
if
nodeType
==
xml
.
dom
.
core
.
TEXT
:
if
nodeType
==
TEXT
:
data
=
child
.
data
data
=
child
.
data
shortened
=
string
.
lstrip
(
data
)
shortened
=
string
.
lstrip
(
data
)
if
shortened
:
if
shortened
:
...
@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
...
@@ -717,7 +720,7 @@ def skip_leading_nodes(children, start):
return
start
+
1
return
start
+
1
return
start
return
start
# all whitespace, just skip
# all whitespace, just skip
elif
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
elif
nodeType
==
ELEMENT
:
tagName
=
child
.
tagName
tagName
=
child
.
tagName
if
tagName
in
RECURSE_INTO_PARA_CONTAINERS
:
if
tagName
in
RECURSE_INTO_PARA_CONTAINERS
:
return
start
return
start
...
@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
...
@@ -727,15 +730,15 @@ def skip_leading_nodes(children, start):
return
start
return
start
def
fixup_rfc_references
(
doc
):
def
fixup_rfc_references
(
doc
,
fragment
):
for
rfcnode
in
find_all_elements
(
doc
,
"rfc"
):
for
rfcnode
in
find_all_elements
(
fragment
,
"rfc"
):
rfcnode
.
appendChild
(
doc
.
createTextNode
(
rfcnode
.
appendChild
(
doc
.
createTextNode
(
"RFC "
+
rfcnode
.
getAttribute
(
"num"
)))
"RFC "
+
rfcnode
.
getAttribute
(
"num"
)))
def
fixup_signatures
(
doc
):
def
fixup_signatures
(
doc
,
fragment
):
for
child
in
doc
.
childNodes
:
for
child
in
fragment
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
child
.
nodeType
==
ELEMENT
:
args
=
child
.
getElementsByTagName
(
"args"
)
args
=
child
.
getElementsByTagName
(
"args"
)
for
arg
in
args
:
for
arg
in
args
:
fixup_args
(
doc
,
arg
)
fixup_args
(
doc
,
arg
)
...
@@ -748,7 +751,7 @@ def fixup_signatures(doc):
...
@@ -748,7 +751,7 @@ def fixup_signatures(doc):
def
fixup_args
(
doc
,
arglist
):
def
fixup_args
(
doc
,
arglist
):
for
child
in
arglist
.
childNodes
:
for
child
in
arglist
.
childNodes
:
if
child
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
\
if
child
.
nodeType
==
ELEMENT
\
and
child
.
tagName
==
"optional"
:
and
child
.
tagName
==
"optional"
:
# found it; fix and return
# found it; fix and return
arglist
.
insertBefore
(
doc
.
createTextNode
(
"["
),
child
)
arglist
.
insertBefore
(
doc
.
createTextNode
(
"["
),
child
)
...
@@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
...
@@ -762,8 +765,8 @@ def fixup_args(doc, arglist):
return
fixup_args
(
doc
,
arglist
)
return
fixup_args
(
doc
,
arglist
)
def
fixup_sectionauthors
(
doc
):
def
fixup_sectionauthors
(
doc
,
fragment
):
for
sectauth
in
find_all_elements
(
doc
,
"sectionauthor"
):
for
sectauth
in
find_all_elements
(
fragment
,
"sectionauthor"
):
section
=
sectauth
.
parentNode
section
=
sectauth
.
parentNode
section
.
removeChild
(
sectauth
)
section
.
removeChild
(
sectauth
)
sectauth
.
_node
.
name
=
"author"
sectauth
.
_node
.
name
=
"author"
...
@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
...
@@ -772,7 +775,7 @@ def fixup_sectionauthors(doc):
sectauth
.
removeAttribute
(
"name"
)
sectauth
.
removeAttribute
(
"name"
)
after
=
section
.
childNodes
[
2
]
after
=
section
.
childNodes
[
2
]
title
=
section
.
childNodes
[
1
]
title
=
section
.
childNodes
[
1
]
if
title
.
nodeType
==
xml
.
dom
.
core
.
ELEMENT
and
title
.
tagName
!=
"title"
:
if
title
.
nodeType
==
ELEMENT
and
title
.
tagName
!=
"title"
:
after
=
section
.
childNodes
[
0
]
after
=
section
.
childNodes
[
0
]
section
.
insertBefore
(
doc
.
createTextNode
(
"
\
n
"
),
after
)
section
.
insertBefore
(
doc
.
createTextNode
(
"
\
n
"
),
after
)
section
.
insertBefore
(
sectauth
,
after
)
section
.
insertBefore
(
sectauth
,
after
)
...
@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
...
@@ -781,10 +784,9 @@ def fixup_sectionauthors(doc):
def
fixup_verbatims
(
doc
):
def
fixup_verbatims
(
doc
):
for
verbatim
in
find_all_elements
(
doc
,
"verbatim"
):
for
verbatim
in
find_all_elements
(
doc
,
"verbatim"
):
child
=
verbatim
.
childNodes
[
0
]
child
=
verbatim
.
childNodes
[
0
]
if
child
.
nodeType
==
xml
.
dom
.
core
.
TEXT
\
if
child
.
nodeType
==
TEXT
\
and
string
.
lstrip
(
child
.
data
)[:
3
]
==
">>>"
:
and
string
.
lstrip
(
child
.
data
)[:
3
]
==
">>>"
:
verbatim
.
_node
.
name
=
"interpreter-session"
verbatim
.
_node
.
name
=
"interactive-session"
#verbatim.setAttribute("interactive", "interactive")
_token_rx
=
re
.
compile
(
r"[a-zA-Z][a-zA-Z0-9.-]*$"
)
_token_rx
=
re
.
compile
(
r"[a-zA-Z][a-zA-Z0-9.-]*$"
)
...
@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
...
@@ -792,7 +794,7 @@ _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
def
write_esis
(
doc
,
ofp
,
knownempty
):
def
write_esis
(
doc
,
ofp
,
knownempty
):
for
node
in
doc
.
childNodes
:
for
node
in
doc
.
childNodes
:
nodeType
=
node
.
nodeType
nodeType
=
node
.
nodeType
if
nodeType
==
xml
.
dom
.
core
.
ELEMENT
:
if
nodeType
==
ELEMENT
:
gi
=
node
.
tagName
gi
=
node
.
tagName
if
knownempty
(
gi
):
if
knownempty
(
gi
):
if
node
.
hasChildNodes
():
if
node
.
hasChildNodes
():
...
@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
...
@@ -808,7 +810,7 @@ def write_esis(doc, ofp, knownempty):
ofp
.
write
(
"(%s
\
n
"
%
gi
)
ofp
.
write
(
"(%s
\
n
"
%
gi
)
write_esis
(
node
,
ofp
,
knownempty
)
write_esis
(
node
,
ofp
,
knownempty
)
ofp
.
write
(
")%s
\
n
"
%
gi
)
ofp
.
write
(
")%s
\
n
"
%
gi
)
elif
nodeType
==
xml
.
dom
.
core
.
TEXT
:
elif
nodeType
==
TEXT
:
ofp
.
write
(
"-%s
\
n
"
%
esistools
.
encode
(
node
.
data
))
ofp
.
write
(
"-%s
\
n
"
%
esistools
.
encode
(
node
.
data
))
else
:
else
:
raise
RuntimeError
,
"unsupported node type: %s"
%
nodeType
raise
RuntimeError
,
"unsupported node type: %s"
%
nodeType
...
@@ -818,10 +820,11 @@ def convert(ifp, ofp):
...
@@ -818,10 +820,11 @@ def convert(ifp, ofp):
p
=
esistools
.
ExtendedEsisBuilder
()
p
=
esistools
.
ExtendedEsisBuilder
()
p
.
feed
(
ifp
.
read
())
p
.
feed
(
ifp
.
read
())
doc
=
p
.
document
doc
=
p
.
document
normalize
(
doc
)
fragment
=
p
.
fragment
simplify
(
doc
)
normalize
(
fragment
)
handle_labels
(
doc
)
simplify
(
doc
,
fragment
)
handle_appendix
(
doc
)
handle_labels
(
fragment
)
handle_appendix
(
doc
,
fragment
)
fixup_trailing_whitespace
(
doc
,
{
fixup_trailing_whitespace
(
doc
,
{
"abstract"
:
"
\
n
"
,
"abstract"
:
"
\
n
"
,
"title"
:
""
,
"title"
:
""
,
...
@@ -835,12 +838,12 @@ def convert(ifp, ofp):
...
@@ -835,12 +838,12 @@ def convert(ifp, ofp):
cleanup_root_text
(
doc
)
cleanup_root_text
(
doc
)
cleanup_trailing_parens
(
doc
,
[
"function"
,
"method"
,
"cfunction"
])
cleanup_trailing_parens
(
doc
,
[
"function"
,
"method"
,
"cfunction"
])
cleanup_synopses
(
doc
)
cleanup_synopses
(
doc
)
fixup_descriptors
(
doc
)
fixup_descriptors
(
doc
,
fragment
)
fixup_verbatims
(
doc
)
fixup_verbatims
(
fragment
)
normalize
(
doc
)
normalize
(
fragment
)
fixup_paras
(
doc
)
fixup_paras
(
doc
,
fragment
)
fixup_sectionauthors
(
doc
)
fixup_sectionauthors
(
doc
,
fragment
)
remap_element_names
(
doc
,
{
remap_element_names
(
fragment
,
{
"tableii"
:
(
"table"
,
{
"cols"
:
"2"
}),
"tableii"
:
(
"table"
,
{
"cols"
:
"2"
}),
"tableiii"
:
(
"table"
,
{
"cols"
:
"3"
}),
"tableiii"
:
(
"table"
,
{
"cols"
:
"3"
}),
"tableiv"
:
(
"table"
,
{
"cols"
:
"4"
}),
"tableiv"
:
(
"table"
,
{
"cols"
:
"4"
}),
...
@@ -849,9 +852,9 @@ def convert(ifp, ofp):
...
@@ -849,9 +852,9 @@ def convert(ifp, ofp):
"lineiv"
:
(
"row"
,
{}),
"lineiv"
:
(
"row"
,
{}),
"refmodule"
:
(
"module"
,
{
"link"
:
"link"
}),
"refmodule"
:
(
"module"
,
{
"link"
:
"link"
}),
})
})
fixup_table_structures
(
doc
)
fixup_table_structures
(
doc
,
fragment
)
fixup_rfc_references
(
doc
)
fixup_rfc_references
(
doc
,
fragment
)
fixup_signatures
(
doc
)
fixup_signatures
(
doc
,
fragment
)
#
#
d
=
{}
d
=
{}
for
gi
in
p
.
get_empties
():
for
gi
in
p
.
get_empties
():
...
@@ -861,7 +864,7 @@ def convert(ifp, ofp):
...
@@ -861,7 +864,7 @@ def convert(ifp, ofp):
knownempty
=
d
.
has_key
knownempty
=
d
.
has_key
#
#
try
:
try
:
write_esis
(
doc
,
ofp
,
knownempty
)
write_esis
(
fragment
,
ofp
,
knownempty
)
except
IOError
,
(
err
,
msg
):
except
IOError
,
(
err
,
msg
):
# Ignore EPIPE; it just means that whoever we're writing to stopped
# Ignore EPIPE; it just means that whoever we're writing to stopped
# reading. The rest of the output would be ignored. All other errors
# reading. The rest of the output would be ignored. All other errors
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment