Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
f54d967f
Commit
f54d967f
authored
Aug 07, 1995
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new formatter module; redid htmllib module to use it
parent
a0eab1d3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
356 additions
and
249 deletions
+356
-249
Lib/htmllib.py
Lib/htmllib.py
+356
-249
No files found.
Lib/htmllib.py
View file @
f54d967f
# New HTML class
"""HTML 2.0 parser.
# XXX Check against HTML 2.0 spec
# XXX reorder methods according to hierarchy
# - html structure: head, body, title, isindex
# - headers
# - lists, items
# - paragraph styles
# - forms
# - character styles
# - images
# - bookkeeping
# - output generation
See the HTML 2.0 specification:
http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_toc.html
"""
import
sys
import
regsub
import
string
from
sgmllib
import
SGMLParser
ROMAN
=
0
ITALIC
=
1
BOLD
=
2
FIXED
=
3
from
formatter
import
AS_IS
class
HTMLParser
(
SGMLParser
):
def
__init__
(
self
):
SGMLParser
.
__init__
(
self
)
self
.
savedata
=
None
self
.
isindex
=
0
self
.
title
=
''
self
.
para
=
None
self
.
lists
=
[]
self
.
styles
=
[]
self
.
nofill
=
0
self
.
nospace
=
1
self
.
softspace
=
0
def
__init__
(
self
,
formatter
):
SGMLParser
.
__init__
(
self
)
self
.
formatter
=
formatter
self
.
savedata
=
None
self
.
isindex
=
0
self
.
title
=
None
self
.
base
=
None
self
.
anchor
=
None
self
.
anchorlist
=
[]
self
.
nofill
=
0
self
.
list_stack
=
[]
# ---
Data
# ---
--- Methods used internally; some may be overridden
def
handle_image
(
self
,
src
,
alt
):
self
.
handle_data
(
alt
)
# --- Formatter interface, taking care of 'savedata' mode;
# shouldn't need to be overridden
def
handle_data
(
self
,
data
):
if
self
.
nofill
:
self
.
handle_literal
(
data
)
return
data
=
regsub
.
gsub
(
'[
\
t
\
n
\
r
]+'
,
' '
,
data
)
if
self
.
nospace
and
data
[:
1
]
==
' '
:
data
=
data
[
1
:]
if
not
data
:
return
self
.
nospace
=
0
if
self
.
softspace
and
data
[:
1
]
!=
' '
:
data
=
' '
+
data
if
data
[
-
1
:]
==
' '
:
data
=
data
[:
-
1
]
self
.
softspace
=
1
self
.
output_data
(
data
)
def
handle_literal
(
self
,
data
):
self
.
nospace
=
0
self
.
softspace
=
0
self
.
output_data
(
data
)
def
output_data
(
self
,
data
):
if
self
.
savedata
is
not
None
:
if
self
.
savedata
is
not
None
:
self
.
savedata
=
self
.
savedata
+
data
else
:
self
.
write_data
(
data
)
else
:
if
self
.
nofill
:
self
.
formatter
.
add_literal_data
(
data
)
else
:
self
.
formatter
.
add_flowing_data
(
data
)
def
write_data
(
self
,
data
):
sys
.
stdout
.
write
(
data
)
# --- Hooks to save data; shouldn't need to be overridden
def
save_bgn
(
self
):
self
.
savedata
=
''
self
.
nospace
=
1
self
.
softspace
=
0
self
.
savedata
=
''
def
save_end
(
self
):
saved
=
self
.
savedata
self
.
savedata
=
None
self
.
nospace
=
1
self
.
softspace
=
0
return
saved
data
=
self
.
savedata
self
.
savedata
=
None
return
string
.
join
(
string
.
split
(
data
))
# --- Hooks for anchors; should probably be overridden
def
new_para
(
self
):
pass
def
anchor_bgn
(
self
,
href
,
name
,
type
):
self
.
anchor
=
href
if
self
.
anchor
:
self
.
anchorlist
.
append
(
href
)
def
new_style
(
self
):
pass
def
anchor_end
(
self
):
if
self
.
anchor
:
self
.
handle_data
(
"[%d]"
%
len
(
self
.
anchorlist
))
self
.
anchor
=
None
# ---
Generic style changes
# ---
Hook for images; should probably be overridden
def
para_bgn
(
self
,
tag
):
if
not
self
.
nospace
:
self
.
handle_literal
(
'
\
n
'
)
self
.
nospace
=
1
self
.
softspace
=
0
if
tag
is
not
None
:
self
.
para
=
tag
self
.
new_para
()
def
handle_image
(
self
,
src
,
alt
):
self
.
handle_data
(
alt
)
def
para_end
(
self
):
self
.
para_bgn
(
''
)
# --- Hooks for forms; should probably be overridden
def
push_list
(
self
,
tag
):
self
.
lists
.
append
(
tag
)
self
.
para_bgn
(
None
)
def
form_bgn
(
self
,
action
,
method
,
enctype
):
self
.
do_p
([]
)
self
.
handle_data
(
"<FORM>"
)
def
pop_list
(
self
):
del
self
.
lists
[
-
1
]
self
.
para_end
(
)
def
form_end
(
self
):
self
.
handle_data
(
"</FORM>"
)
self
.
do_p
([]
)
def
literal_bgn
(
self
,
tag
,
attr
s
):
self
.
para_bgn
(
tag
)
def
handle_input
(
self
,
type
,
option
s
):
self
.
handle_data
(
"<INPUT>"
)
def
literal_end
(
self
,
tag
):
self
.
para_end
(
)
def
select_bgn
(
self
,
name
,
size
,
multiple
):
self
.
handle_data
(
"<SELECT>"
)
def
push_style
(
self
,
tag
):
self
.
styles
.
append
(
tag
)
self
.
new_style
()
def
select_end
(
self
):
self
.
handle_data
(
"</SELECT>"
)
def
pop_style
(
self
):
del
self
.
styles
[
-
1
]
self
.
new_style
()
def
handle_option
(
self
,
value
,
selected
):
self
.
handle_data
(
"<OPTION>"
)
def
anchor_bgn
(
self
,
href
,
name
,
type
):
self
.
push_style
(
href
and
'a'
or
None
)
def
textarea_bgn
(
self
,
name
,
rows
,
cols
):
self
.
handle_data
(
"<TEXTAREA>"
)
self
.
start_pre
([])
def
anchor_end
(
self
):
self
.
pop_style
()
def
textarea_end
(
self
):
self
.
end_pre
()
self
.
handle_data
(
"</TEXTAREA>"
)
# ---
Top level tag
s
# ---
------ Top level elememt
s
def
start_html
(
self
,
attrs
):
pass
def
end_html
(
self
):
pass
...
...
@@ -144,231 +108,374 @@ class HTMLParser(SGMLParser):
def
start_body
(
self
,
attrs
):
pass
def
end_body
(
self
):
pass
def
do_isindex
(
self
,
attrs
):
self
.
isindex
=
1
# ------ Head elements
def
start_title
(
self
,
attrs
):
self
.
save_bgn
()
self
.
save_bgn
()
def
end_title
(
self
):
self
.
title
=
self
.
save_end
()
self
.
title
=
self
.
save_end
()
# --- Old HTML 'literal text' tags
def
do_base
(
self
,
attrs
):
for
a
,
v
in
attrs
:
if
a
==
'href'
:
self
.
base
=
v
def
start_listing
(
self
,
attrs
):
self
.
setliteral
(
'listing'
)
self
.
literal_bgn
(
'listing'
,
attrs
)
def
do_isindex
(
self
,
attrs
):
self
.
isindex
=
1
def
end_listing
(
self
):
self
.
literal_end
(
'listing'
)
def
do_link
(
self
,
attrs
):
pass
def
start_xmp
(
self
,
attrs
):
self
.
setliteral
(
'xmp'
)
self
.
literal_bgn
(
'xmp'
,
attrs
)
def
do_meta
(
self
,
attrs
):
pass
def
end_xmp
(
self
):
self
.
literal_end
(
'xmp'
)
def
do_nextid
(
self
,
attrs
):
# Deprecated
pass
def
do_plaintext
(
self
,
attrs
):
self
.
setnomoretags
()
self
.
literal_bgn
(
'plaintext'
,
attrs
)
# ------ Body elements
# ---
Anchor
s
# ---
Heading
s
def
start_a
(
self
,
attrs
):
href
=
''
name
=
''
type
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
href
=
value
if
attrname
==
'name'
:
name
=
value
if
attrname
==
'type'
:
type
=
string
.
lower
(
value
)
if
not
(
href
or
name
):
return
self
.
anchor_bgn
(
href
,
name
,
type
)
def
start_h1
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h1'
,
0
,
1
,
0
))
def
end_a
(
self
):
self
.
anchor_end
()
def
end_h1
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h2
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h2'
,
0
,
1
,
0
))
def
end_h2
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h3
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h3'
,
0
,
1
,
0
))
def
end_h3
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
# --- Paragraph tags
def
start_h4
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h4'
,
0
,
1
,
0
))
def
end_h4
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h5
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h5'
,
0
,
1
,
0
))
def
end_h5
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
def
start_h6
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
'h6'
,
0
,
1
,
0
))
def
end_h6
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
()
# --- Block Structuring Elements
def
do_p
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
formatter
.
end_paragraph
(
1
)
def
do_br
(
self
,
attrs
):
self
.
handle_literal
(
'
\
n
'
)
self
.
nospace
=
1
self
.
softspace
=
0
def
start_pre
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
AS_IS
,
1
))
self
.
nofill
=
self
.
nofill
+
1
def
do_hr
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
handle_literal
(
'-'
*
40
)
self
.
para_end
(
)
def
end_pre
(
self
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
pop_font
(
)
self
.
nofill
=
max
(
0
,
self
.
nofill
-
1
)
def
start_h1
(
self
,
attrs
):
self
.
para_bgn
(
'h1'
)
def
start_xmp
(
self
,
attrs
):
self
.
start_pre
(
attrs
)
self
.
setliteral
(
'xmp'
)
# Tell SGML parser
def
start_h2
(
self
,
attrs
):
self
.
para_bgn
(
'h2'
)
def
end_xmp
(
self
):
self
.
end_pre
(
)
def
start_h3
(
self
,
attrs
):
self
.
para_bgn
(
'h3'
)
def
start_listing
(
self
,
attrs
):
self
.
start_pre
(
attrs
)
self
.
setliteral
(
'listing'
)
# Tell SGML parser
def
start_h4
(
self
,
attrs
):
self
.
para_bgn
(
'h4'
)
def
end_listing
(
self
):
self
.
end_pre
(
)
def
start_h5
(
self
,
attrs
):
self
.
para_bgn
(
'h5'
)
def
start_address
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
0
)
self
.
formatter
.
push_font
((
AS_IS
,
1
,
AS_IS
,
AS_IS
))
def
start_h6
(
self
,
attrs
):
self
.
para_bgn
(
'h6'
)
def
end_address
(
self
):
self
.
formatter
.
end_paragraph
(
0
)
self
.
formatter
.
pop_font
()
def
end_h1
(
self
):
self
.
para_end
()
def
start_blockquote
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
1
)
self
.
formatter
.
push_margin
(
'blockquote'
)
end_h2
=
end_h1
end_h3
=
end_h2
end_h4
=
end_h3
end_h5
=
end_h4
end_h6
=
end_h5
def
end_blockquote
(
self
):
self
.
formatter
.
end_paragraph
(
0
)
self
.
formatter
.
pop_margin
()
def
start_ul
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'ul'
)
# --- List Elements
def
start_ol
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'ol'
)
def
start_ul
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
push_margin
(
'ul'
)
self
.
list_stack
.
append
([
'ul'
,
'*'
,
0
])
def
end_ul
(
self
):
self
.
pop_list
()
self
.
para_end
()
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
pop_margin
()
def
do_li
(
self
,
attrs
):
self
.
para_bgn
(
'li%d'
%
len
(
self
.
lists
))
self
.
formatter
.
end_paragraph
(
0
)
if
self
.
list_stack
:
[
dummy
,
label
,
counter
]
=
top
=
self
.
list_stack
[
-
1
]
top
[
2
]
=
counter
=
counter
+
1
else
:
label
,
counter
=
'*'
,
0
self
.
formatter
.
add_label_data
(
label
,
counter
)
def
start_ol
(
self
,
attrs
):
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
push_margin
(
'ol'
)
label
=
'1.'
for
a
,
v
in
attrs
:
if
a
==
'type'
:
if
len
(
v
)
==
1
:
v
=
v
+
'.'
label
=
v
self
.
list_stack
.
append
([
'ol'
,
label
,
0
])
start_dir
=
start_menu
=
start_ul
end_dir
=
end_menu
=
end_ol
=
end_ul
def
end_ol
(
self
):
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
self
.
formatter
.
end_paragraph
(
not
self
.
list_stack
)
self
.
formatter
.
pop_margin
()
def
start_menu
(
self
,
attrs
):
self
.
start_ul
(
attrs
)
def
end_menu
(
self
):
self
.
end_ul
()
def
start_dir
(
self
,
attrs
):
self
.
start_ul
(
attrs
)
def
end_dir
(
self
):
self
.
end_ul
()
def
start_dl
(
self
,
attrs
):
self
.
para_bgn
(
None
)
self
.
push_list
(
'dl'
)
self
.
formatter
.
end_paragraph
(
0
)
self
.
list_stack
.
append
([
'dl'
,
''
,
0
]
)
def
end_dl
(
self
):
self
.
pop_list
()
self
.
para_end
()
self
.
ddpop
()
if
self
.
list_stack
:
del
self
.
list_stack
[
-
1
]
def
do_dt
(
self
,
attrs
):
self
.
para_bgn
(
'dt%d'
%
len
(
self
.
lists
)
)
self
.
ddpop
(
)
def
do_dd
(
self
,
attrs
):
self
.
para_bgn
(
'dd%d'
%
len
(
self
.
lists
))
def
start_address
(
self
,
attrs
):
self
.
para_bgn
(
'address'
)
self
.
ddpop
()
self
.
formatter
.
push_margin
(
'dd'
)
self
.
list_stack
.
append
([
'dd'
,
''
,
0
])
def
end_address
(
self
):
self
.
para_end
()
def
ddpop
(
self
):
self
.
formatter
.
end_paragraph
(
0
)
if
self
.
list_stack
:
if
self
.
list_stack
[
-
1
][
0
]
==
'dd'
:
del
self
.
list_stack
[
-
1
]
self
.
formatter
.
pop_margin
()
def
start_pre
(
self
,
attrs
):
self
.
para_bgn
(
'pre'
)
self
.
nofill
=
self
.
nofill
+
1
# --- Phrase Markup
def
end_pre
(
self
):
self
.
nofill
=
self
.
nofill
-
1
self
.
para_end
()
# Idiomatic Elements
start_typewriter
=
start_pre
end_typewriter
=
end_pre
def
start_cite
(
self
,
attrs
):
self
.
start_i
(
attrs
)
def
end_cite
(
self
):
self
.
end_i
()
def
do_img
(
self
,
attrs
):
src
=
''
alt
=
' (image) '
for
attrname
,
value
in
attrs
:
if
attrname
==
'alt'
:
alt
=
value
if
attrname
==
'src'
:
src
=
value
self
.
handle_image
(
src
,
alt
)
def
start_code
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
def
end_code
(
self
):
self
.
end_tt
()
# --- Character tags -- physical styles
def
start_em
(
self
,
attrs
):
self
.
start_i
(
attrs
)
def
end_em
(
self
):
self
.
end_i
()
def
start_
tt
(
self
,
attrs
):
self
.
push_style
(
FIXED
)
def
end_
tt
(
self
):
self
.
pop_style
()
def
start_
kbd
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
def
end_
kbd
(
self
):
self
.
end_tt
()
def
start_
b
(
self
,
attrs
):
self
.
push_style
(
BOLD
)
def
end_
b
(
self
):
self
.
pop_style
()
def
start_
samp
(
self
,
attrs
):
self
.
start_tt
(
attrs
)
def
end_
samp
(
self
):
self
.
end_tt
()
def
start_
i
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
def
end_
i
(
self
):
self
.
pop_style
()
def
start_
string
(
self
,
attrs
):
self
.
start_b
(
attrs
)
def
end_
b
(
self
):
self
.
end_b
()
def
start_
u
(
self
,
attrs
):
self
.
push_style
(
ITALIC
)
# Underline???
def
end_
u
(
self
):
self
.
pop_style
()
def
start_
var
(
self
,
attrs
):
self
.
start_i
(
attrs
)
def
end_
var
(
self
):
self
.
end_var
()
def
start_r
(
self
,
attrs
):
self
.
push_style
(
ROMAN
)
# Not official
def
end_r
(
self
):
self
.
pop_style
()
# Typographic Elements
# --- Charaacter tags -- logical styles
def
start_i
(
self
,
attrs
):
self
.
formatter
.
push_font
((
AS_IS
,
1
,
AS_IS
,
AS_IS
))
def
end_i
(
self
):
self
.
formatter
.
pop_font
()
start_em
=
start_i
end_em
=
end_i
def
start_b
(
self
,
attrs
):
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
1
,
AS_IS
))
def
end_b
(
self
):
self
.
formatter
.
pop_font
()
start_strong
=
start_b
end_strong
=
end_b
def
start_tt
(
self
,
attrs
):
self
.
formatter
.
push_font
((
AS_IS
,
AS_IS
,
AS_IS
,
1
))
def
end_tt
(
self
):
self
.
formatter
.
pop_font
()
start_code
=
start_tt
end_code
=
end_tt
start_samp
=
start_tt
end_samp
=
end_tt
start_kbd
=
start_tt
end_kbd
=
end_tt
def
start_a
(
self
,
attrs
):
href
=
''
name
=
''
type
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'href'
:
href
=
value
if
attrname
==
'name'
:
name
=
value
if
attrname
==
'type'
:
type
=
string
.
lower
(
value
)
self
.
anchor_bgn
(
href
,
name
,
type
)
start_file
=
start_tt
# unofficial
end_file
=
end_tt
def
end_a
(
self
):
self
.
anchor_end
()
start_var
=
start_i
end_var
=
end_i
# --- Line Break
start_dfn
=
start_i
end_dfn
=
end_i
def
do_br
(
self
,
attrs
):
self
.
formatter
.
add_line_break
()
start_cite
=
start_i
end_cite
=
end_i
# --- Horizontal Rule
start_hp1
=
start_i
end_hp1
=
start_i
def
do_hr
(
self
,
attrs
):
self
.
formatter
.
add_hor_rule
()
start_hp2
=
start_b
end_hp2
=
end_b
# --- Image
# --- Form tags
def
do_img
(
self
,
attrs
):
align
=
''
alt
=
'(image)'
ismap
=
''
src
=
''
for
attrname
,
value
in
attrs
:
if
attrname
==
'align'
:
align
=
value
if
attrname
==
'alt'
:
alt
=
value
if
attrname
==
'ismap'
:
ismap
=
value
if
attrname
==
'src'
:
src
=
value
self
.
handle_image
(
src
,
alt
)
# ------ Forms
def
start_form
(
self
,
attrs
):
self
.
para_bgn
(
None
)
action
=
''
method
=
''
enctype
=
''
for
a
,
v
in
attrs
:
if
a
==
'action'
:
action
=
v
if
a
==
'method'
:
method
=
v
if
a
==
'enctype'
:
enctype
=
v
self
.
form_bgn
(
action
,
method
,
enctype
)
def
end_form
(
self
):
self
.
para_end
()
self
.
form_end
()
def
do_input
(
self
,
attrs
):
type
=
''
options
=
{}
for
a
,
v
in
attrs
:
if
a
==
'type'
:
type
=
string
.
lower
(
v
)
else
:
options
[
a
]
=
v
self
.
handle_input
(
type
,
options
)
def
start_select
(
self
,
attrs
):
name
=
''
size
=
0
multiple
=
0
for
a
,
v
in
attrs
:
if
a
==
'multiple'
:
multiple
=
1
if
a
==
'name'
:
name
=
v
if
a
==
'size'
:
try
:
size
=
string
.
atoi
(
size
)
except
:
pass
self
.
select_bgn
(
name
,
size
,
multiple
)
def
end_select
(
self
):
self
.
select_end
()
def
do_option
(
self
,
attrs
):
value
=
''
selected
=
1
for
a
,
v
in
attrs
:
if
a
==
'value'
:
value
=
v
if
a
==
'selected'
:
selected
=
1
self
.
handle_option
(
value
,
selected
)
def
start_textarea
(
self
,
attrs
):
name
=
''
rows
=
0
cols
=
0
for
a
,
v
in
attrs
:
if
a
==
'name'
:
name
=
v
if
a
==
'rows'
:
try
:
rows
=
string
.
atoi
(
v
)
except
:
pass
if
a
==
'cols'
:
try
:
cols
=
string
.
atoi
(
v
)
except
:
pass
self
.
textarea_bgn
(
name
,
rows
,
cols
)
def
end_textarea
(
self
):
self
.
textarea_end
()
# --- Really Old Unofficial Deprecated Stuff
def
do_plaintext
(
self
,
attrs
):
self
.
start_pre
(
attrs
)
self
.
setnomoretags
()
# Tell SGML parser
# --- Unhandled tags
def
unknown_starttag
(
self
,
tag
,
attrs
):
pass
pass
def
unknown_endtag
(
self
,
tag
):
pass
pass
def
test
():
import
sys
file
=
'test.html'
f
=
open
(
file
,
'r'
)
data
=
f
.
read
()
f
.
close
()
p
=
HTMLParser
()
if
sys
.
argv
[
1
:]:
file
=
sys
.
argv
[
1
]
fp
=
open
(
file
,
'r'
)
data
=
fp
.
read
()
fp
.
close
()
from
formatter
import
DumbWriter
,
AbstractFormatter
w
=
DumbWriter
()
f
=
AbstractFormatter
(
w
)
p
=
HTMLParser
(
f
)
p
.
feed
(
data
)
p
.
close
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment