Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
efa44b6f
Commit
efa44b6f
authored
Aug 04, 1995
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
major rewrite using different formatting paradigm
parent
aa876c40
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
330 additions
and
586 deletions
+330
-586
Lib/htmllib.py
Lib/htmllib.py
+330
-586
No files found.
Lib/htmllib.py
View file @
efa44b6f
#
A parser for HTML document
s
#
New HTML clas
s
# XXX Check against HTML 2.0 spec
# HTML: HyperText Markup Language; an SGML-like syntax used by WWW to
# describe hypertext documents
#
# SGML: Standard Generalized Markup Language
#
# WWW: World-Wide Web; a distributed hypertext system develped at CERN
#
# CERN: European Particle Physics Laboratory in Geneva, Switzerland
# XXX reorder methods according to hierarchy
# - html structure: head, body, title, isindex
# - headers
# - lists, items
# - paragraph styles
# - forms
# - character styles
# - images
# - bookkeeping
# - output generation
# This file is only concerned with parsing and formatting HTML
# documents, not with the other (hypertext and networking) aspects of
# the WWW project. (It does support highlighting of anchors.)
import
os
import
sys
import
reg
ex
import
reg
sub
import
string
import
sgmllib
class
HTMLParser
(
sgmllib
.
SGMLParser
):
# Copy base class entities and add some
entitydefs
=
{}
for
key
in
sgmllib
.
SGMLParser
.
entitydefs
.
keys
():
entitydefs
[
key
]
=
sgmllib
.
SGMLParser
.
entitydefs
[
key
]
entitydefs
[
'bullet'
]
=
'*'
# Provided -- handlers for tags introducing literal text
def
start_listing
(
self
,
attrs
):
self
.
setliteral
(
'listing'
)
self
.
literal_bgn
(
'listing'
,
attrs
)
def
end_listing
(
self
):
self
.
literal_end
(
'listing'
)
def
start_xmp
(
self
,
attrs
):
self
.
setliteral
(
'xmp'
)
self
.
literal_bgn
(
'xmp'
,
attrs
)
def
end_xmp
(
self
):
self
.
literal_end
(
'xmp'
)
def
do_plaintext
(
self
,
attrs
):
self
.
setnomoretags
()
self
.
literal_bgn
(
'plaintext'
,
attrs
)
# To be overridden -- begin/end literal mode
def
literal_bgn
(
self
,
tag
,
attrs
):
pass
def
literal_end
(
self
,
tag
):
pass
# Next level of sophistication -- collect anchors, title, nextid and isindex
class
CollectingParser
(
HTMLParser
):
#
def
__init__
(
self
):
HTMLParser
.
__init__
(
self
)
self
.
savetext
=
None
self
.
nextid
=
[]
self
.
isindex
=
0
self
.
title
=
''
self
.
inanchor
=
0
self
.
anchors
=
[]
self
.
anchornames
=
[]
self
.
anchortypes
=
[]
#
def
start_a
(
self
,
attrs
):
self
.
inanchor
=
0
href
=
''
name
=
''
type
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
href
=
value
if
attrname
==
'name='
:
name
=
value
if
attrname
==
'type='
:
type
=
string
.
lower
(
value
)
if
not
(
href
or
name
):
return
self
.
anchors
.
append
(
href
)
self
.
anchornames
.
append
(
name
)
self
.
anchortypes
.
append
(
type
)
self
.
inanchor
=
len
(
self
.
anchors
)
if
not
href
:
self
.
inanchor
=
-
self
.
inanchor
#
def
end_a
(
self
):
if
self
.
inanchor
>
0
:
# Don't show anchors pointing into the current document
if
self
.
anchors
[
self
.
inanchor
-
1
][:
1
]
<>
'#'
:
self
.
handle_data
(
'['
+
`self.inanchor`
+
']'
)
self
.
inanchor
=
0
#
def
start_html
(
self
,
attrs
):
pass
def
end_html
(
self
):
pass
#
def
start_head
(
self
,
attrs
):
pass
def
end_head
(
self
):
pass
#
def
start_body
(
self
,
attrs
):
pass
def
end_body
(
self
):
pass
#
def
do_nextid
(
self
,
attrs
):
self
.
nextid
=
attrs
#
def
do_isindex
(
self
,
attrs
):
self
.
isindex
=
1
#
def
start_title
(
self
,
attrs
):
self
.
savetext
=
''
#
def
end_title
(
self
):
if
self
.
savetext
<>
None
:
self
.
title
=
self
.
savetext
self
.
savetext
=
None
#
def
handle_data
(
self
,
text
):
if
self
.
savetext
is
not
None
:
self
.
savetext
=
self
.
savetext
+
text
# Formatting parser -- takes a formatter and a style sheet as arguments
# XXX The use of style sheets should change: for each tag and end tag
# there should be a style definition, and a style definition should
# encompass many more parameters: font, justification, indentation,
# vspace before, vspace after, hanging tag...
wordprog
=
regex
.
compile
(
'[^
\
t
\
n
]*'
)
spaceprog
=
regex
.
compile
(
'[
\
t
\
n
]*'
)
class
FormattingParser
(
CollectingParser
):
def
__init__
(
self
,
formatter
,
stylesheet
):
CollectingParser
.
__init__
(
self
)
self
.
fmt
=
formatter
self
.
stl
=
stylesheet
self
.
savetext
=
None
self
.
compact
=
0
self
.
nofill
=
0
self
.
resetfont
()
self
.
setindent
(
self
.
stl
.
stdindent
)
def
resetfont
(
self
):
self
.
fontstack
=
[]
self
.
stylestack
=
[]
self
.
fontset
=
self
.
stl
.
stdfontset
self
.
style
=
ROMAN
self
.
passfont
()
def
passfont
(
self
):
font
=
self
.
fontset
[
self
.
style
]
self
.
fmt
.
setfont
(
font
)
def
pushstyle
(
self
,
style
):
self
.
stylestack
.
append
(
self
.
style
)
self
.
style
=
min
(
style
,
len
(
self
.
fontset
)
-
1
)
self
.
passfont
()
def
popstyle
(
self
):
self
.
style
=
self
.
stylestack
[
-
1
]
del
self
.
stylestack
[
-
1
]
self
.
passfont
()
def
pushfontset
(
self
,
fontset
,
style
):
self
.
fontstack
.
append
(
self
.
fontset
)
self
.
fontset
=
fontset
self
.
pushstyle
(
style
)
def
popfontset
(
self
):
self
.
fontset
=
self
.
fontstack
[
-
1
]
del
self
.
fontstack
[
-
1
]
self
.
popstyle
()
def
flush
(
self
):
self
.
fmt
.
flush
()
def
setindent
(
self
,
n
):
self
.
fmt
.
setleftindent
(
n
)
def
needvspace
(
self
,
n
):
self
.
fmt
.
needvspace
(
n
)
def
close
(
self
):
HTMLParser
.
close
(
self
)
self
.
fmt
.
flush
()
def
handle_literal
(
self
,
text
):
lines
=
string
.
splitfields
(
text
,
'
\
n
'
)
for
i
in
range
(
1
,
len
(
lines
)):
lines
[
i
]
=
string
.
expandtabs
(
lines
[
i
],
8
)
for
line
in
lines
[:
-
1
]:
self
.
fmt
.
addword
(
line
,
0
)
self
.
fmt
.
flush
()
self
.
fmt
.
nospace
=
0
for
line
in
lines
[
-
1
:]:
self
.
fmt
.
addword
(
line
,
0
)
def
handle_data
(
self
,
text
):
if
self
.
savetext
is
not
None
:
self
.
savetext
=
self
.
savetext
+
text
return
if
self
.
literal
:
self
.
handle_literal
(
text
)
return
i
=
0
n
=
len
(
text
)
while
i
<
n
:
j
=
i
+
wordprog
.
match
(
text
,
i
)
word
=
text
[
i
:
j
]
i
=
j
+
spaceprog
.
match
(
text
,
j
)
self
.
fmt
.
addword
(
word
,
i
-
j
)
if
self
.
nofill
and
'
\
n
'
in
text
[
j
:
i
]:
self
.
fmt
.
flush
()
self
.
fmt
.
nospace
=
0
i
=
j
+
1
while
text
[
i
-
1
]
<>
'
\
n
'
:
i
=
i
+
1
def
literal_bgn
(
self
,
tag
,
attrs
):
if
tag
==
'plaintext'
:
self
.
flush
()
else
:
self
.
needvspace
(
1
)
self
.
pushfontset
(
self
.
stl
.
stdfontset
,
FIXED
)
self
.
setindent
(
self
.
stl
.
literalindent
)
def
literal_end
(
self
,
tag
):
self
.
needvspace
(
1
)
self
.
popfontset
()
self
.
setindent
(
self
.
stl
.
stdindent
)
def
start_title
(
self
,
attrs
):
self
.
flush
()
self
.
savetext
=
''
# NB end_title is unchanged
def
do_p
(
self
,
attrs
):
if
self
.
compact
:
self
.
flush
()
else
:
self
.
needvspace
(
1
)
def
start_h1
(
self
,
attrs
):
self
.
needvspace
(
2
)
self
.
setindent
(
self
.
stl
.
h1indent
)
self
.
pushfontset
(
self
.
stl
.
h1fontset
,
BOLD
)
self
.
fmt
.
setjust
(
'c'
)
def
end_h1
(
self
):
self
.
popfontset
()
self
.
needvspace
(
2
)
self
.
setindent
(
self
.
stl
.
stdindent
)
self
.
fmt
.
setjust
(
'l'
)
def
start_h2
(
self
,
attrs
):
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
h2indent
)
self
.
pushfontset
(
self
.
stl
.
h2fontset
,
BOLD
)
def
end_h2
(
self
):
self
.
popfontset
()
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
def
start_h3
(
self
,
attrs
):
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
self
.
pushfontset
(
self
.
stl
.
h3fontset
,
BOLD
)
def
end_h3
(
self
):
self
.
popfontset
()
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
def
start_h4
(
self
,
attrs
):
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
self
.
pushfontset
(
self
.
stl
.
stdfontset
,
BOLD
)
def
end_h4
(
self
):
self
.
popfontset
()
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
start_h5
=
start_h4
end_h5
=
end_h4
start_h6
=
start_h5
end_h6
=
end_h5
start_h7
=
start_h6
end_h7
=
end_h6
def
start_ul
(
self
,
attrs
):
self
.
needvspace
(
1
)
for
attrname
,
value
in
attrs
:
if
attrname
==
'compact'
:
self
.
compact
=
1
self
.
setindent
(
0
)
break
else
:
self
.
setindent
(
self
.
stl
.
ulindent
)
start_dir
=
start_menu
=
start_ol
=
start_ul
do_li
=
do_p
def
end_ul
(
self
):
self
.
compact
=
0
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
end_dir
=
end_menu
=
end_ol
=
end_ul
def
start_dl
(
self
,
attrs
):
for
attrname
,
value
in
attrs
:
if
attrname
==
'compact'
:
self
.
compact
=
1
self
.
needvspace
(
1
)
def
end_dl
(
self
):
self
.
compact
=
0
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
from
sgmllib
import
SGMLParser
def
do_dt
(
self
,
attrs
):
if
self
.
compact
:
self
.
flush
()
else
:
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
def
do_dd
(
self
,
attrs
):
self
.
fmt
.
addword
(
''
,
1
)
self
.
setindent
(
self
.
stl
.
ddindent
)
ROMAN
=
0
ITALIC
=
1
BOLD
=
2
FIXED
=
3
def
start_address
(
self
,
attrs
):
self
.
compact
=
1
self
.
needvspace
(
1
)
self
.
fmt
.
setjust
(
'r'
)
def
end_address
(
self
):
self
.
compact
=
0
self
.
needvspace
(
1
)
self
.
setindent
(
self
.
stl
.
stdindent
)
self
.
fmt
.
setjust
(
'l'
)
class
HTMLParser
(
SGMLParser
):
def
__init__
(
self
):
SGMLParser
.
__init__
(
self
)
self
.
savedata
=
None
self
.
isindex
=
0
self
.
title
=
''
self
.
para
=
None
self
.
lists
=
[]
self
.
styles
=
[]
self
.
nofill
=
0
self
.
nospace
=
1
self
.
softspace
=
0
# --- Data
def
handle_image
(
self
,
src
,
alt
):
self
.
handle_data
(
alt
)
def
handle_data
(
self
,
data
):
if
self
.
nofill
:
self
.
handle_literal
(
data
)
return
data
=
regsub
.
gsub
(
'[
\
t
\
n
\
r
]+'
,
' '
,
data
)
if
self
.
nospace
and
data
[:
1
]
==
' '
:
data
=
data
[
1
:]
if
not
data
:
return
self
.
nospace
=
0
if
self
.
softspace
and
data
[:
1
]
!=
' '
:
data
=
' '
+
data
if
data
[
-
1
:]
==
' '
:
data
=
data
[:
-
1
]
self
.
softspace
=
1
self
.
output_data
(
data
)
def
start_pre
(
self
,
attrs
):
self
.
needvspace
(
1
)
self
.
nofill
=
self
.
nofill
+
1
self
.
pushstyle
(
FIXED
)
def
handle_literal
(
self
,
data
):
self
.
nospace
=
0
self
.
softspace
=
0
self
.
output_data
(
data
)
def
end_pre
(
self
):
self
.
popstyle
()
self
.
nofill
=
self
.
nofill
-
1
self
.
needvspace
(
1
)
def
output_data
(
self
,
data
):
if
self
.
savedata
is
not
None
:
self
.
savedata
=
self
.
savedata
+
data
else
:
self
.
write_data
(
data
)
start_typewriter
=
start_pre
end_typewriter
=
end_pre
def
write_data
(
self
,
data
):
sys
.
stdout
.
write
(
data
)
def
do_img
(
self
,
attrs
):
self
.
fmt
.
addword
(
'(image)'
,
0
)
def
save_bgn
(
self
):
self
.
savedata
=
''
self
.
nospace
=
1
self
.
softspace
=
0
# Physical styles
def
save_end
(
self
):
saved
=
self
.
savedata
self
.
savedata
=
None
self
.
nospace
=
1
self
.
softspace
=
0
return
saved
def
new_para
(
self
):
pass
def
new_style
(
self
):
pass
# --- Generic style changes
def
para_bgn
(
self
,
tag
):
if
not
self
.
nospace
:
self
.
handle_literal
(
'
\
n
'
)
self
.
nospace
=
1
self
.
softspace
=
0
if
tag
is
not
None
:
self
.
para
=
tag
self
.
new_para
()
def
para_end
(
self
):
self
.
para_bgn
(
''
)
def
push_list
(
self
,
tag
):
self
.
lists
.
append
(
tag
)
self
.
para_bgn
(
None
)
def
pop_list
(
self
):
del
self
.
lists
[
-
1
]
self
.
para_end
()
def
literal_bgn
(
self
,
tag
,
attrs
):
self
.
para_bgn
(
tag
)
def
literal_end
(
self
,
tag
):
self
.
para_end
()
def
start_tt
(
self
,
attrs
):
self
.
pushstyle
(
FIXED
)
def
end_tt
(
self
):
self
.
popstyle
()
def
push_style
(
self
,
tag
):
self
.
styles
.
append
(
tag
)
self
.
new_style
()
def
start_b
(
self
,
attrs
):
self
.
pushstyle
(
BOLD
)
def
end_b
(
self
):
self
.
popstyle
()
def
pop_style
(
self
):
del
self
.
styles
[
-
1
]
self
.
new_style
()
def
anchor_bgn
(
self
,
href
,
name
,
type
):
self
.
push_style
(
href
and
'a'
or
None
)
def
anchor_end
(
self
):
self
.
pop_style
()
# --- Top level tags
def
start_i
(
self
,
attrs
):
self
.
pushstyle
(
ITALIC
)
def
end_i
(
self
):
self
.
popstyle
()
def
start_html
(
self
,
attrs
):
pass
def
end_html
(
self
):
pass
def
start_u
(
self
,
attrs
):
self
.
pushstyle
(
ITALIC
)
# Underline???
def
end_u
(
self
):
self
.
popstyle
()
def
start_head
(
self
,
attrs
):
pass
def
end_head
(
self
):
pass
def
start_r
(
self
,
attrs
):
self
.
pushstyle
(
ROMAN
)
# Not official
def
end_r
(
self
):
self
.
popstyle
()
def
start_body
(
self
,
attrs
):
pass
def
end_body
(
self
):
pass
# Logical styles
def
do_isindex
(
self
,
attrs
):
self
.
isindex
=
1
start_em
=
start_i
end_em
=
end_i
def
start_title
(
self
,
attrs
):
self
.
save_bgn
()
start_strong
=
start_b
end_strong
=
end_b
def
end_title
(
self
):
self
.
title
=
self
.
save_end
()
start_code
=
start_tt
end_code
=
end_tt
# --- Old HTML 'literal text' tags
start_samp
=
start_tt
end_samp
=
end_tt
def
start_listing
(
self
,
attrs
):
self
.
setliteral
(
'listing'
)
self
.
literal_bgn
(
'listing'
,
attrs
)
start_kbd
=
start_tt
end_kbd
=
end_tt
def
end_listing
(
self
):
self
.
literal_end
(
'listing'
)
start_file
=
start_tt
# unofficial
end_file
=
end_tt
def
start_xmp
(
self
,
attrs
):
self
.
setliteral
(
'xmp'
)
self
.
literal_bgn
(
'xmp'
,
attrs
)
start_var
=
start_i
end_var
=
end_i
def
end_xmp
(
self
):
self
.
literal_end
(
'xmp'
)
start_dfn
=
start_i
end_dfn
=
end_i
def
do_plaintext
(
self
,
attrs
):
self
.
setnomoretags
()
self
.
literal_bgn
(
'plaintext'
,
attrs
)
start_cite
=
start_i
end_cite
=
end_i
# --- Anchors
start_hp1
=
start_i
end_hp1
=
start_i
def
start_a
(
self
,
attrs
):
href
=
''
name
=
''
type
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
href
=
value
if
attrname
==
'name'
:
name
=
value
if
attrname
==
'type'
:
type
=
string
.
lower
(
value
)
if
not
(
href
or
name
):
return
self
.
anchor_bgn
(
href
,
name
,
type
)
start_hp2
=
start_b
end_hp2
=
end_b
def
end_a
(
self
):
self
.
anchor_end
()
def
unknown_starttag
(
self
,
tag
,
attrs
):
print
'*** unknown <'
+
tag
+
'>'
# --- Paragraph tags
def
unknown_endtag
(
self
,
tag
):
print
'*** unknown </'
+
tag
+
'>'
def
do_p
(
self
,
attrs
):
self
.
para_bgn
(
None
)
def
do_br
(
self
,
attrs
):
self
.
handle_literal
(
'
\
n
'
)
self
.
nospace
=
1
self
.
softspace
=
0
# An extension of the formatting parser which formats anchors differently.
class
AnchoringParser
(
FormattingParser
):
def
do_hr
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
handle_literal
(
'-'
*
40
)
self
.
para_end
()
def
start_a
(
self
,
attrs
):
FormattingParser
.
start_a
(
self
,
attrs
)
if
self
.
inanchor
:
self
.
fmt
.
bgn_anchor
(
self
.
inanchor
)
def
start_h1
(
self
,
attrs
):
self
.
para_bgn
(
'h1'
)
def
end_a
(
self
):
if
self
.
inanchor
:
self
.
fmt
.
end_anchor
(
self
.
inanchor
)
self
.
inanchor
=
0
def
start_h2
(
self
,
attrs
):
self
.
para_bgn
(
'h2'
)
def
start_h3
(
self
,
attrs
):
self
.
para_bgn
(
'h3'
)
# Style sheet -- this is never instantiated, but the attributes
# of the class object itself are used to specify fonts to be used
# for various paragraph styles.
# A font set is a non-empty list of fonts, in the order:
# [roman, italic, bold, fixed].
# When a style is not available the nearest lower style is used
def
start_h4
(
self
,
attrs
):
self
.
para_bgn
(
'h4'
)
ROMAN
=
0
ITALIC
=
1
BOLD
=
2
FIXED
=
3
def
start_h5
(
self
,
attrs
):
self
.
para_bgn
(
'h5'
)
def
start_h6
(
self
,
attrs
):
self
.
para_bgn
(
'h6'
)
def
end_h1
(
self
):
self
.
para_end
()
end_h2
=
end_h1
end_h3
=
end_h2
end_h4
=
end_h3
end_h5
=
end_h4
end_h6
=
end_h5
def
start_ul
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'ul'
)
def
start_ol
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'ol'
)
def
end_ul
(
self
):
self
.
pop_list
()
self
.
para_end
()
def
do_li
(
self
,
attrs
):
self
.
para_bgn
(
'li%d'
%
len
(
self
.
lists
))
start_dir
=
start_menu
=
start_ul
end_dir
=
end_menu
=
end_ol
=
end_ul
def
start_dl
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'dl'
)
def
end_dl
(
self
):
self
.
pop_list
()
self
.
para_end
()
def
do_dt
(
self
,
attrs
):
self
.
para_bgn
(
'dt%d'
%
len
(
self
.
lists
))
def
do_dd
(
self
,
attrs
):
self
.
para_bgn
(
'dd%d'
%
len
(
self
.
lists
))
def
start_address
(
self
,
attrs
):
self
.
para_bgn
(
'address'
)
def
end_address
(
self
):
self
.
para_end
()
def
start_pre
(
self
,
attrs
):
self
.
para_bgn
(
'pre'
)
self
.
nofill
=
self
.
nofill
+
1
def
end_pre
(
self
):
self
.
nofill
=
self
.
nofill
-
1
self
.
para_end
()
start_typewriter
=
start_pre
end_typewriter
=
end_pre
def
do_img
(
self
,
attrs
):
src
=
''
alt
=
' (image) '
for
attrname
,
value
in
attrs
:
if
attrname
==
'alt'
:
alt
=
value
if
attrname
==
'src'
:
src
=
value
self
.
handle_image
(
src
,
alt
)
# --- Character tags -- physical styles
def
start_tt
(
self
,
attrs
):
self
.
push_style
(
FIXED
)
def
end_tt
(
self
):
self
.
pop_style
()
def
start_b
(
self
,
attrs
):
self
.
push_style
(
BOLD
)
def
end_b
(
self
):
self
.
pop_style
()
def
start_i
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
def
end_i
(
self
):
self
.
pop_style
()
def
start_u
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
# Underline???
def
end_u
(
self
):
self
.
pop_style
()
def
start_r
(
self
,
attrs
):
self
.
push_style
(
ROMAN
)
# Not official
def
end_r
(
self
):
self
.
pop_style
()
# --- Charaacter tags -- logical styles
start_em
=
start_i
end_em
=
end_i
start_strong
=
start_b
end_strong
=
end_b
start_code
=
start_tt
end_code
=
end_tt
start_samp
=
start_tt
end_samp
=
end_tt
start_kbd
=
start_tt
end_kbd
=
end_tt
start_file
=
start_tt
# unofficial
end_file
=
end_tt
start_var
=
start_i
end_var
=
end_i
start_dfn
=
start_i
end_dfn
=
end_i
start_cite
=
start_i
end_cite
=
end_i
start_hp1
=
start_i
end_hp1
=
start_i
start_hp2
=
start_b
end_hp2
=
end_b
# --- Form tags
def
start_form
(
self
,
attrs
):
self
.
para_bgn
(
None
)
def
end_form
(
self
):
self
.
para_end
()
# --- Unhandled tags
def
unknown_starttag
(
self
,
tag
,
attrs
):
pass
def
unknown_endtag
(
self
,
tag
):
pass
class
NullStylesheet
:
# Fonts -- none
stdfontset
=
[
None
]
h1fontset
=
[
None
]
h2fontset
=
[
None
]
h3fontset
=
[
None
]
# Indents
stdindent
=
2
ddindent
=
25
ulindent
=
4
h1indent
=
0
h2indent
=
0
literalindent
=
0
class
X11Stylesheet
(
NullStylesheet
):
stdfontset
=
[
'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*'
,
'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*'
,
'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*'
,
'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*'
,
]
h1fontset
=
[
'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*'
,
'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*'
,
'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*'
,
]
h2fontset
=
[
'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*'
,
'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*'
,
'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*'
,
]
h3fontset
=
[
'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*'
,
'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*'
,
'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*'
,
]
ddindent
=
40
class
MacStylesheet
(
NullStylesheet
):
stdfontset
=
[
(
'Geneva'
,
'p'
,
10
),
(
'Geneva'
,
'i'
,
10
),
(
'Geneva'
,
'b'
,
10
),
(
'Monaco'
,
'p'
,
10
),
]
h1fontset
=
[
(
'Geneva'
,
'p'
,
18
),
(
'Geneva'
,
'i'
,
18
),
(
'Geneva'
,
'b'
,
18
),
(
'Monaco'
,
'p'
,
18
),
]
h3fontset
=
[
(
'Geneva'
,
'p'
,
14
),
(
'Geneva'
,
'i'
,
14
),
(
'Geneva'
,
'b'
,
14
),
(
'Monaco'
,
'p'
,
14
),
]
h3fontset
=
[
(
'Geneva'
,
'p'
,
12
),
(
'Geneva'
,
'i'
,
12
),
(
'Geneva'
,
'b'
,
12
),
(
'Monaco'
,
'p'
,
12
),
]
if
os
.
name
==
'mac'
:
StdwinStylesheet
=
MacStylesheet
else
:
StdwinStylesheet
=
X11Stylesheet
class
GLStylesheet
(
NullStylesheet
):
stdfontset
=
[
'Helvetica 10'
,
'Helvetica-Italic 10'
,
'Helvetica-Bold 10'
,
'Courier 10'
,
]
h1fontset
=
[
'Helvetica 18'
,
'Helvetica-Italic 18'
,
'Helvetica-Bold 18'
,
'Courier 18'
,
]
h2fontset
=
[
'Helvetica 14'
,
'Helvetica-Italic 14'
,
'Helvetica-Bold 14'
,
'Courier 14'
,
]
h3fontset
=
[
'Helvetica 12'
,
'Helvetica-Italic 12'
,
'Helvetica-Bold 12'
,
'Courier 12'
,
]
# Test program -- produces no output but times how long it takes
# to send a document to a null formatter, exclusive of I/O
def
test
():
import
fmt
import
time
if
sys
.
argv
[
1
:]:
file
=
sys
.
argv
[
1
]
else
:
file
=
'test.html'
data
=
open
(
file
,
'r'
).
read
()
t0
=
time
.
time
()
fmtr
=
fmt
.
WritingFormatter
(
sys
.
stdout
,
79
)
p
=
FormattingParser
(
fmtr
,
NullStylesheet
)
p
.
feed
(
data
)
p
.
close
()
t1
=
time
.
time
()
print
print
'*** Formatting time:'
,
round
(
t1
-
t0
,
3
),
'seconds.'
# Test program using stdwin
def
testStdwin
():
import
stdwin
,
fmt
from
stdwinevents
import
*
if
sys
.
argv
[
1
:]:
file
=
sys
.
argv
[
1
]
else
:
file
=
'test.html'
data
=
open
(
file
,
'r'
).
read
()
window
=
stdwin
.
open
(
'testStdwin'
)
b
=
None
while
1
:
etype
,
ewin
,
edetail
=
stdwin
.
getevent
()
if
etype
==
WE_CLOSE
:
break
if
etype
==
WE_SIZE
:
window
.
setdocsize
(
0
,
0
)
window
.
setorigin
(
0
,
0
)
window
.
change
((
0
,
0
),
(
10000
,
30000
))
# XXX
if
etype
==
WE_DRAW
:
if
not
b
:
b
=
fmt
.
StdwinBackEnd
(
window
,
1
)
f
=
fmt
.
BaseFormatter
(
b
.
d
,
b
)
p
=
FormattingParser
(
f
,
MacStylesheet
)
p
.
feed
(
data
)
p
.
close
()
b
.
finish
()
else
:
b
.
redraw
(
edetail
)
window
.
close
()
# Test program using GL
def
testGL
():
import
gl
,
GL
,
fmt
if
sys
.
argv
[
1
:]:
file
=
sys
.
argv
[
1
]
else
:
file
=
'test.html'
data
=
open
(
file
,
'r'
).
read
()
W
,
H
=
600
,
600
gl
.
foreground
()
gl
.
prefsize
(
W
,
H
)
wid
=
gl
.
winopen
(
'testGL'
)
gl
.
ortho2
(
0
,
W
,
H
,
0
)
gl
.
color
(
GL
.
WHITE
)
gl
.
clear
()
gl
.
color
(
GL
.
BLACK
)
b
=
fmt
.
GLBackEnd
(
wid
)
f
=
fmt
.
BaseFormatter
(
b
.
d
,
b
)
p
=
FormattingParser
(
f
,
GLStylesheet
)
p
.
feed
(
data
)
p
.
close
()
b
.
finish
()
#
import
time
time
.
sleep
(
5
)
file
=
'test.html'
f
=
open
(
file
,
'r'
)
data
=
f
.
read
()
f
.
close
()
p
=
HTMLParser
()
p
.
feed
(
data
)
p
.
close
()
if
__name__
==
'__main__'
:
test
()
test
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment