Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
f54d967f
Commit
f54d967f
authored
Aug 07, 1995
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new formatter module; redid htmllib module to use it
parent
a0eab1d3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
356 additions
and
249 deletions
+356
-249
Lib/htmllib.py
Lib/htmllib.py
+356
-249
No files found.
Lib/htmllib.py
View file @
f54d967f
# New HTML class
"""HTML 2.0 parser.
# XXX Check against HTML 2.0 spec
See the HTML 2.0 specification:
http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
# XXX reorder methods according to hierarchy
"""
# - html structure: head, body, title, isindex
# - headers
# - lists, items
# - paragraph styles
# - forms
# - character styles
# - images
# - bookkeeping
# - output generation
import
sys
import
sys
import
regsub
import
regsub
import
string
import
string
from
sgmllib
import
SGMLParser
from
sgmllib
import
SGMLParser
from
formatter
import
AS_IS
ROMAN
=
0
ITALIC
=
1
BOLD
=
2
FIXED
=
3
class
HTMLParser
(
SGMLParser
):
class
HTMLParser
(
SGMLParser
):
def
__init__
(
self
):
def
__init__
(
self
,
formatter
):
SGMLParser
.
__init__
(
self
)
SGMLParser
.
__init__
(
self
)
self
.
savedata
=
None
self
.
formatter
=
formatter
self
.
isindex
=
0
self
.
savedata
=
None
self
.
title
=
''
self
.
isindex
=
0
self
.
para
=
None
self
.
title
=
None
self
.
lists
=
[]
self
.
base
=
None
self
.
styles
=
[]
self
.
anchor
=
None
self
.
nofill
=
0
self
.
anchorlist
=
[]
self
.
nospace
=
1
self
.
nofill
=
0
self
.
softspace
=
0
self
.
list_stack
=
[]
# ---
Data
# ---
--- Methods used internally; some may be overridden
def
handle_image
(
self
,
src
,
alt
):
# --- Formatter interface, taking care of 'savedata' mode;
self
.
handle_data
(
alt
)
# shouldn't need to be overridden
def
handle_data
(
self
,
data
):
def
handle_data
(
self
,
data
):
if
self
.
nofill
:
if
self
.
savedata
is
not
None
:
self
.
handle_literal
(
data
)
return
data
=
regsub
.
gsub
(
'[
\
t
\
n
\
r
]+'
,
' '
,
data
)
if
self
.
nospace
and
data
[:
1
]
==
' '
:
data
=
data
[
1
:]
if
not
data
:
return
self
.
nospace
=
0
if
self
.
softspace
and
data
[:
1
]
!=
' '
:
data
=
' '
+
data
if
data
[
-
1
:]
==
' '
:
data
=
data
[:
-
1
]
self
.
softspace
=
1
self
.
output_data
(
data
)
def
handle_literal
(
self
,
data
):
self
.
nospace
=
0
self
.
softspace
=
0
self
.
output_data
(
data
)
def
output_data
(
self
,
data
):
if
self
.
savedata
is
not
None
:
self
.
savedata
=
self
.
savedata
+
data
self
.
savedata
=
self
.
savedata
+
data
else
:
else
:
self
.
write_data
(
data
)
if
self
.
nofill
:
self
.
formatter
.
add_literal_data
(
data
)
else
:
self
.
formatter
.
add_flowing_data
(
data
)
def
write_data
(
self
,
data
):
# --- Hooks to save data; shouldn't need to be overridden
sys
.
stdout
.
write
(
data
)
def
save_bgn
(
self
):
def
save_bgn
(
self
):
self
.
savedata
=
''
self
.
savedata
=
''
self
.
nospace
=
1
self
.
softspace
=
0
def
save_end
(
self
):
def
save_end
(
self
):
saved
=
self
.
savedata
data
=
self
.
savedata
self
.
savedata
=
None
self
.
savedata
=
None
self
.
nospace
=
1
return
string
.
join
(
string
.
split
(
data
))
self
.
softspace
=
0
return
saved
# --- Hooks for anchors; should probably be overridden
def
new_para
(
self
):
def
anchor_bgn
(
self
,
href
,
name
,
type
):
pass
self
.
anchor
=
href
if
self
.
anchor
:
self
.
anchorlist
.
append
(
href
)
def
new_style
(
self
):
def
anchor_end
(
self
):
pass
if
self
.
anchor
:
self
.
handle_data
(
"[%d]"
%
len
(
self
.
anchorlist
))
self
.
anchor
=
None
# ---
Generic style changes
# ---
Hook for images; should probably be overridden
def
para_bgn
(
self
,
tag
):
def
handle_image
(
self
,
src
,
alt
):
if
not
self
.
nospace
:
self
.
handle_data
(
alt
)
self
.
handle_literal
(
'
\
n
'
)
self
.
nospace
=
1
self
.
softspace
=
0
if
tag
is
not
None
:
self
.
para
=
tag
self
.
new_para
()
def
para_end
(
self
):
# --- Hooks for forms; should probably be overridden
self
.
para_bgn
(
''
)
def
push_list
(
self
,
tag
):
def
form_bgn
(
self
,
action
,
method
,
enctype
):
self
.
lists
.
append
(
tag
)
self
.
do_p
([]
)
self
.
para_bgn
(
None
)
self
.
handle_data
(
"<FORM>"
)
def
pop_list
(
self
):
def
form_end
(
self
):
del
self
.
lists
[
-
1
]
self
.
handle_data
(
"</FORM>"
)
self
.
para_end
(
)
self
.
do_p
([]
)
def
literal_bgn
(
self
,
tag
,
attr
s
):
def
handle_input
(
self
,
type
,
option
s
):
self
.
para_bgn
(
tag
)
self
.
handle_data
(
"<INPUT>"
)
def
literal_end
(
self
,
tag
):
def
select_bgn
(
self
,
name
,
size
,
multiple
):
self
.
para_end
(
)
self
.
handle_data
(
"<SELECT>"
)
def
push_style
(
self
,
tag
):
def
select_end
(
self
):
self
.
styles
.
append
(
tag
)
self
.
handle_data
(
"</SELECT>"
)
self
.
new_style
()
def
pop_style
(
self
):
def
handle_option
(
self
,
value
,
selected
):
del
self
.
styles
[
-
1
]
self
.
handle_data
(
"<OPTION>"
)
self
.
new_style
()
def
anchor_bgn
(
self
,
href
,
name
,
type
):
def
textarea_bgn
(
self
,
name
,
rows
,
cols
):
self
.
push_style
(
href
and
'a'
or
None
)
self
.
handle_data
(
"<TEXTAREA>"
)
self
.
start_pre
([])
def
anchor_end
(
self
):
def
textarea_end
(
self
):
self
.
pop_style
()
self
.
end_pre
()
self
.
handle_data
(
"</TEXTAREA>"
)
# ---
Top level tag
s
# ---
------ Top level elememt
s
def
start_html
(
self
,
attrs
):
pass
def
start_html
(
self
,
attrs
):
pass
def
end_html
(
self
):
pass
def
end_html
(
self
):
pass
...
@@ -144,231 +108,374 @@ class HTMLParser(SGMLParser):
...
@@ -144,231 +108,374 @@ class HTMLParser(SGMLParser):
def
start_body
(
self
,
attrs
):
pass
def
start_body
(
self
,
attrs
):
pass
def
end_body
(
self
):
pass
def
end_body
(
self
):
pass
def
do_isindex
(
self
,
attrs
):
# ------ Head elements
self
.
isindex
=
1
def
start_title
(
self
,
attrs
):
def
start_title
(
self
,
attrs
):
self
.
save_bgn
()
self
.
save_bgn
()
def
end_title
(
self
):
def
end_title
(
self
):
self
.
title
=
self
.
save_end
()
self
.
title
=
self
.
save_end
()
# --- Old HTML 'literal text' tags
def
do_base
(
self
,
attrs
):
for
a
,
v
in
attrs
:
if
a
==
'href'
:
self
.
base
=
v
def
start_listing
(
self
,
attrs
):
def
do_isindex
(
self
,
attrs
):
self
.
setliteral
(
'listing'
)
self
.
isindex
=
1
self
.
literal_bgn
(
'listing'
,
attrs
)
def
end_listing
(
self
):
def
do_link
(
self
,
attrs
):
self
.
literal_end
(
'listing'
)
pass
def
start_xmp
(
self
,
attrs
):
def
do_meta
(
self
,
attrs
):
self
.
setliteral
(
'xmp'
)
pass
self
.
literal_bgn
(
'xmp'
,
attrs
)
def
end_xmp
(
self
):
def
do_nextid
(
self
,
attrs
):
# Deprecated
self
.
literal_end
(
'xmp'
)
pass
def
do_plaintext
(
self
,
attrs
):
# ------ Body elements
self
.
setnomoretags
()
self
.
literal_bgn
(
'plaintext'
,
attrs
)
# ---
Anchor
s
# ---
Heading
s
def
start_a
(
self
,
attrs
):
def
start_h1
(
self
,
attrs
):
href
=
''
self
.
formatter
.
end_paragraph
(
1
)
name
=
''
self
.
formatter
.
push_font
((
'h1'
,
0
,
1
,
0
))
type
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
href
=
value
if
attrname
==
'name'
:
name
=
value
if
attrname
==
'type'
:
type
=
string
.
lower
(
value
)
if
not
(
href
or
name
):
return
self
.
anchor_bgn
(
href
,
name
,
type
)
def
end_a
(
self
):
def
end_h1
(
self
):
self
.
anchor_end
()
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h2
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h2'
,
0
,
1
,
0
))
def
end_h2
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h3
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h3'
,
0
,
1
,
0
))
def
end_h3
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
# --- Paragraph tags
def
start_h4
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h4'
,
0
,
1
,
0
))
def
end_h4
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h5
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h5'
,
0
,
1
,
0
))
def
end_h5
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h6
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h6'
,
0
,
1
,
0
))
def
end_h6
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
# --- Block Structuring Elements
def
do_p
(
self
,
attrs
):
def
do_p
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
formatter
.
end_paragraph
(
1
)
def
do_br
(
self
,
attrs
):
def
start_pre
(
self
,
attrs
):
self
.
handle_literal
(
'
\
n
'
)
self
.
formatter
.
end_paragraph
(
1
)
self
.
nospace
=
1
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
AS_IS
,
1
))
self
.
softspace
=
0
self
.
nofill
=
self
.
nofill
+
1
def
do_hr
(
self
,
attrs
):
def
end_pre
(
self
):
self
.
para_bgn
(
None
)
self
.
formatter
.
end_paragraph
(
1
)
self
.
handle_literal
(
'-'
*
40
)
self
.
formatter
.
pop_font
(
)
self
.
para_end
(
)
self
.
nofill
=
max
(
0
,
self
.
nofill
-
1
)
def
start_h1
(
self
,
attrs
):
def
start_xmp
(
self
,
attrs
):
self
.
para_bgn
(
'h1'
)
self
.
start_pre
(
attrs
)
self
.
setliteral
(
'xmp'
)
# Tell SGML parser
def
start_h2
(
self
,
attrs
):
def
end_xmp
(
self
):
self
.
para_bgn
(
'h2'
)
self
.
end_pre
(
)
def
start_h3
(
self
,
attrs
):
def
start_listing
(
self
,
attrs
):
self
.
para_bgn
(
'h3'
)
self
.
start_pre
(
attrs
)
self
.
setliteral
(
'listing'
)
# Tell SGML parser
def
start_h4
(
self
,
attrs
):
def
end_listing
(
self
):
self
.
para_bgn
(
'h4'
)
self
.
end_pre
(
)
def
start_h5
(
self
,
attrs
):
def
start_address
(
self
,
attrs
):
self
.
para_bgn
(
'h5'
)
self
.
formatter
.
end_paragraph
(
0
)
self
.
formatter
.
push_font
((
AS_IS
,
1
,
AS_IS
,
AS_IS
))
def
start_h6
(
self
,
attrs
):
def
end_address
(
self
):
self
.
para_bgn
(
'h6'
)
self
.
formatter
.
end_paragraph
(
0
)
self
.
formatter
.
pop_font
()
def
end_h1
(
self
):
def
start_blockquote
(
self
,
attrs
):
self
.
para_end
()
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_margin
(
'blockquote'
)
end_h2
=
end_h1
def
end_blockquote
(
self
):
end_h3
=
end_h2
self
.
formatter
.
end_paragraph
(
0
)
end_h4
=
end_h3
self
.
formatter
.
pop_margin
()
end_h5
=
end_h4
end_h6
=
end_h5
def
start_ul
(
self
,
attrs
):
# --- List Elements
self
.
para_bgn
(
None
)
self
.
push_list
(
'ul'
)
def
start_ol
(
self
,
attrs
):
def
start_ul
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
push_list
(
'ol'
)
self
.
formatter
.
push_margin
(
'ul'
)
self
.
list_stack
.
append
([
'ul'
,
'*'
,
0
])
def
end_ul
(
self
):
def
end_ul
(
self
):
self
.
pop_list
()
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
self
.
para_end
()
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
pop_margin
()
def
do_li
(
self
,
attrs
):
def
do_li
(
self
,
attrs
):
self
.
para_bgn
(
'li%d'
%
len
(
self
.
lists
))
self
.
formatter
.
end_paragraph
(
0
)
if
self
.
list_stack
:
[
dummy
,
label
,
counter
]
=
top
=
self
.
list_stack
[
-
1
]
top
[
2
]
=
counter
=
counter
+
1
else
:
label
,
counter
=
'*'
,
0
self
.
formatter
.
add_label_data
(
label
,
counter
)
def
start_ol
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
push_margin
(
'ol'
)
label
=
'1.'
for
a
,
v
in
attrs
:
if
a
==
'type'
:
if
len
(
v
)
==
1
:
v
=
v
+
'.'
label
=
v
self
.
list_stack
.
append
([
'ol'
,
label
,
0
])
start_dir
=
start_menu
=
start_ul
def
end_ol
(
self
):
end_dir
=
end_menu
=
end_ol
=
end_ul
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
pop_margin
()
def
start_menu
(
self
,
attrs
):
self
.
start_ul
(
attrs
)
def
end_menu
(
self
):
self
.
end_ul
()
def
start_dir
(
self
,
attrs
):
self
.
start_ul
(
attrs
)
def
end_dir
(
self
):
self
.
end_ul
()
def
start_dl
(
self
,
attrs
):
def
start_dl
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
formatter
.
end_paragraph
(
0
)
self
.
push_list
(
'dl'
)
self
.
list_stack
.
append
([
'dl'
,
''
,
0
]
)
def
end_dl
(
self
):
def
end_dl
(
self
):
self
.
pop_list
()
self
.
ddpop
()
self
.
para_end
()
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
def
do_dt
(
self
,
attrs
):
def
do_dt
(
self
,
attrs
):
self
.
para_bgn
(
'dt%d'
%
len
(
self
.
lists
)
)
self
.
ddpop
(
)
def
do_dd
(
self
,
attrs
):
def
do_dd
(
self
,
attrs
):
self
.
para_bgn
(
'dd%d'
%
len
(
self
.
lists
))
self
.
ddpop
()
self
.
formatter
.
push_margin
(
'dd'
)
def
start_address
(
self
,
attrs
):
self
.
list_stack
.
append
([
'dd'
,
''
,
0
])
self
.
para_bgn
(
'address'
)
def
end_address
(
self
):
def
ddpop
(
self
):
self
.
para_end
()
self
.
formatter
.
end_paragraph
(
0
)
if
self
.
list_stack
:
if
self
.
list_stack
[
-
1
][
0
]
==
'dd'
:
del
self
.
list_stack
[
-
1
]
self
.
formatter
.
pop_margin
()
def
start_pre
(
self
,
attrs
):
# --- Phrase Markup
self
.
para_bgn
(
'pre'
)
self
.
nofill
=
self
.
nofill
+
1
def
end_pre
(
self
):
# Idiomatic Elements
self
.
nofill
=
self
.
nofill
-
1
self
.
para_end
()
start_typewriter
=
start_pre
def
start_cite
(
self
,
attrs
):
self
.
start_i
(
attrs
)
end_typewriter
=
end_pre
def
end_cite
(
self
):
self
.
end_i
()
def
do_img
(
self
,
attrs
):
def
start_code
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
src
=
''
def
end_code
(
self
):
self
.
end_tt
()
alt
=
' (image) '
for
attrname
,
value
in
attrs
:
if
attrname
==
'alt'
:
alt
=
value
if
attrname
==
'src'
:
src
=
value
self
.
handle_image
(
src
,
alt
)
# --- Character tags -- physical styles
def
start_em
(
self
,
attrs
):
self
.
start_i
(
attrs
)
def
end_em
(
self
):
self
.
end_i
()
def
start_
tt
(
self
,
attrs
):
self
.
push_style
(
FIXED
)
def
start_
kbd
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
def
end_
tt
(
self
):
self
.
pop_style
()
def
end_
kbd
(
self
):
self
.
end_tt
()
def
start_
b
(
self
,
attrs
):
self
.
push_style
(
BOLD
)
def
start_
samp
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
def
end_
b
(
self
):
self
.
pop_style
()
def
end_
samp
(
self
):
self
.
end_tt
()
def
start_
i
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
def
start_
string
(
self
,
attrs
):
self
.
start_b
(
attrs
)
def
end_
i
(
self
):
self
.
pop_style
()
def
end_
b
(
self
):
self
.
end_b
()
def
start_
u
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
# Underline???
def
start_
var
(
self
,
attrs
):
self
.
start_i
(
attrs
)
def
end_
u
(
self
):
self
.
pop_style
()
def
end_
var
(
self
):
self
.
end_var
()
def
start_r
(
self
,
attrs
):
self
.
push_style
(
ROMAN
)
# Not official
# Typographic Elements
def
end_r
(
self
):
self
.
pop_style
()
# --- Charaacter tags -- logical styles
def
start_i
(
self
,
attrs
):
self
.
formatter
.
push_font
((
AS_IS
,
1
,
AS_IS
,
AS_IS
))
def
end_i
(
self
):
self
.
formatter
.
pop_font
()
start_em
=
start_i
def
start_b
(
self
,
attrs
):
end_em
=
end_i
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
1
,
AS_IS
))
def
end_b
(
self
):
self
.
formatter
.
pop_font
()
start_strong
=
start_b
def
start_tt
(
self
,
attrs
):
end_strong
=
end_b
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
AS_IS
,
1
))
def
end_tt
(
self
):
self
.
formatter
.
pop_font
()
start_code
=
start_tt
def
start_a
(
self
,
attrs
):
end_code
=
end_tt
href
=
''
name
=
''
start_samp
=
start_tt
type
=
''
end_samp
=
end_tt
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
start_kbd
=
start_tt
href
=
value
end_kbd
=
end_tt
if
attrname
==
'name'
:
name
=
value
if
attrname
==
'type'
:
type
=
string
.
lower
(
value
)
self
.
anchor_bgn
(
href
,
name
,
type
)
start_file
=
start_tt
# unofficial
def
end_a
(
self
):
end_file
=
end_tt
self
.
anchor_end
()
start_var
=
start_i
# --- Line Break
end_var
=
end_i
start_dfn
=
start_i
def
do_br
(
self
,
attrs
):
end_dfn
=
end_i
self
.
formatter
.
add_line_break
()
start_cite
=
start_i
# --- Horizontal Rule
end_cite
=
end_i
start_hp1
=
start_i
def
do_hr
(
self
,
attrs
):
end_hp1
=
start_i
self
.
formatter
.
add_hor_rule
()
start_hp2
=
start_b
# --- Image
end_hp2
=
end_b
# --- Form tags
def
do_img
(
self
,
attrs
):
align
=
''
alt
=
'(image)'
ismap
=
''
src
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'align'
:
align
=
value
if
attrname
==
'alt'
:
alt
=
value
if
attrname
==
'ismap'
:
ismap
=
value
if
attrname
==
'src'
:
src
=
value
self
.
handle_image
(
src
,
alt
)
# ------ Forms
def
start_form
(
self
,
attrs
):
def
start_form
(
self
,
attrs
):
self
.
para_bgn
(
None
)
action
=
''
method
=
''
enctype
=
''
for
a
,
v
in
attrs
:
if
a
==
'action'
:
action
=
v
if
a
==
'method'
:
method
=
v
if
a
==
'enctype'
:
enctype
=
v
self
.
form_bgn
(
action
,
method
,
enctype
)
def
end_form
(
self
):
def
end_form
(
self
):
self
.
para_end
()
self
.
form_end
()
def
do_input
(
self
,
attrs
):
type
=
''
options
=
{}
for
a
,
v
in
attrs
:
if
a
==
'type'
:
type
=
string
.
lower
(
v
)
else
:
options
[
a
]
=
v
self
.
handle_input
(
type
,
options
)
def
start_select
(
self
,
attrs
):
name
=
''
size
=
0
multiple
=
0
for
a
,
v
in
attrs
:
if
a
==
'multiple'
:
multiple
=
1
if
a
==
'name'
:
name
=
v
if
a
==
'size'
:
try
:
size
=
string
.
atoi
(
size
)
except
:
pass
self
.
select_bgn
(
name
,
size
,
multiple
)
def
end_select
(
self
):
self
.
select_end
()
def
do_option
(
self
,
attrs
):
value
=
''
selected
=
1
for
a
,
v
in
attrs
:
if
a
==
'value'
:
value
=
v
if
a
==
'selected'
:
selected
=
1
self
.
handle_option
(
value
,
selected
)
def
start_textarea
(
self
,
attrs
):
name
=
''
rows
=
0
cols
=
0
for
a
,
v
in
attrs
:
if
a
==
'name'
:
name
=
v
if
a
==
'rows'
:
try
:
rows
=
string
.
atoi
(
v
)
except
:
pass
if
a
==
'cols'
:
try
:
cols
=
string
.
atoi
(
v
)
except
:
pass
self
.
textarea_bgn
(
name
,
rows
,
cols
)
def
end_textarea
(
self
):
self
.
textarea_end
()
# --- Really Old Unofficial Deprecated Stuff
def
do_plaintext
(
self
,
attrs
):
self
.
start_pre
(
attrs
)
self
.
setnomoretags
()
# Tell SGML parser
# --- Unhandled tags
# --- Unhandled tags
def
unknown_starttag
(
self
,
tag
,
attrs
):
def
unknown_starttag
(
self
,
tag
,
attrs
):
pass
pass
def
unknown_endtag
(
self
,
tag
):
def
unknown_endtag
(
self
,
tag
):
pass
pass
def
test
():
def
test
():
import
sys
file
=
'test.html'
file
=
'test.html'
f
=
open
(
file
,
'r'
)
if
sys
.
argv
[
1
:]:
file
=
sys
.
argv
[
1
]
data
=
f
.
read
()
fp
=
open
(
file
,
'r'
)
f
.
close
()
data
=
fp
.
read
()
p
=
HTMLParser
()
fp
.
close
()
from
formatter
import
DumbWriter
,
AbstractFormatter
w
=
DumbWriter
()
f
=
AbstractFormatter
(
w
)
p
=
HTMLParser
(
f
)
p
.
feed
(
data
)
p
.
feed
(
data
)
p
.
close
()
p
.
close
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment