Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
534fb1cc
Commit
534fb1cc
authored
Mar 22, 2001
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Another round of cleanup. Change finish_ to handle_.
parent
6f8682bc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
195 deletions
+29
-195
lib/python/TAL/HTMLParser.py
lib/python/TAL/HTMLParser.py
+20
-186
lib/python/TAL/HTMLTALParser.py
lib/python/TAL/HTMLTALParser.py
+3
-3
lib/python/TAL/test/test_htmlparser.py
lib/python/TAL/test/test_htmlparser.py
+3
-3
lib/python/TAL/tests/test_htmlparser.py
lib/python/TAL/tests/test_htmlparser.py
+3
-3
No files found.
lib/python/TAL/HTMLParser.py
View file @
534fb1cc
...
@@ -296,9 +296,9 @@ class HTMLParser:
...
@@ -296,9 +296,9 @@ class HTMLParser:
(lineno, offset))
(lineno, offset))
if end[-2:] == '
/>
':
if end[-2:] == '
/>
':
# XHTML-style empty tag: <span attr="value" />
# XHTML-style empty tag: <span attr="value" />
self.
finish
_startendtag(tag, attrs)
self.
handle
_startendtag(tag, attrs)
else:
else:
self.
finish
_starttag(tag, attrs)
self.
handle
_starttag(tag, attrs)
return endpos
return endpos
# Internal -- parse endtag, return end or -1 if incomplete
# Internal -- parse endtag, return end or -1 if incomplete
...
@@ -312,122 +312,47 @@ class HTMLParser:
...
@@ -312,122 +312,47 @@ class HTMLParser:
tag = string.lower(string.strip(rawdata[i+2:j-1]))
tag = string.lower(string.strip(rawdata[i+2:j-1]))
if not tag:
if not tag:
raise HTMLParseError("empty start tag", self.getpos())
raise HTMLParseError("empty start tag", self.getpos())
self.
finish
_endtag(tag)
self.
handle
_endtag(tag)
return j
return j
# Overridable -- finish processing of start+end tag: <tag.../>
# Overridable -- finish processing of start+end tag: <tag.../>
def finish_startendtag(self, tag, attrs):
def handle_startendtag(self, tag, attrs):
self.finish_starttag(tag, attrs)
self.handle_starttag(tag, attrs)
self.finish_endtag(tag)
self.handle_endtag(tag)
# Overridable -- finish processing of start tag
def finish_starttag(self, tag, attrs):
try:
method = getattr(self, '
start_
' + tag)
except AttributeError:
try:
method = getattr(self, '
do_
' + tag)
except AttributeError:
self.unknown_starttag(tag, attrs)
else:
self.handle_starttag(tag, method, attrs)
else:
self.stack.append(tag)
self.handle_starttag(tag, method, attrs)
# Overridable -- finish processing of end tag
def finish_endtag(self, tag):
if not tag:
found = len(self.stack) - 1
if found < 0:
self.unknown_endtag(tag)
return
else:
if tag not in self.stack:
try:
method = getattr(self, '
end_
' + tag)
except AttributeError:
self.unknown_endtag(tag)
else:
self.report_unbalanced(tag)
return
found = len(self.stack)
for i in range(found):
if self.stack[i] == tag: found = i
while len(self.stack) > found:
tag = self.stack[-1]
try:
method = getattr(self, '
end_
' + tag)
except AttributeError:
method = None
if method:
self.handle_endtag(tag, method)
else:
self.unknown_endtag(tag)
del self.stack[-1]
# Overridable -- handle start tag
# Overridable -- handle start tag
def handle_starttag(self, tag,
method,
attrs):
def handle_starttag(self, tag, attrs):
method(attrs)
pass
# Overridable -- handle end tag
# Overridable -- handle end tag
def handle_endtag(self, tag, method):
def handle_endtag(self, tag):
method()
pass
# Example -- report an unbalanced </...> tag.
def report_unbalanced(self, tag):
if self.verbose:
print '
***
Unbalanced
</
' + tag + '
>
'
print '
***
Stack
:
', self.stack
#
Example -- handle character reference, no need to overrid
e
#
Overridable -- handle character referenc
e
def handle_charref(self, name):
def handle_charref(self, name):
try:
pass
n = int(name)
except ValueError:
# Overridable -- handle entity reference
self.unknown_charref(name)
return
if not 0 <= n <= 255:
self.unknown_charref(name)
return
self.handle_data(chr(n))
# Definition of entities -- derived classes may override
entitydefs =
\
{'
lt
': '
<
', '
gt
': '
>
', '
amp
': '
&
', '
quot
': '"', 'apos': '
\
'
'}
# Example -- handle entity reference, no need to override
def handle_entityref(self, name):
def handle_entityref(self, name):
table = self.entitydefs
pass
if table.has_key(name):
self.handle_data(table[name])
else:
self.unknown_entityref(name)
return
#
Example -- handle data, should be overridden
#
Overridable -- handle data
def handle_data(self, data):
def handle_data(self, data):
pass
pass
#
Example -- handle comment, could be overridden
#
Overridable -- handle comment
def handle_comment(self, data):
def handle_comment(self, data):
pass
pass
#
Example -- handle declaration, could be overridde
n
#
Overridable -- handle declaratio
n
def handle_decl(self, decl):
def handle_decl(self, decl):
pass
pass
#
Example -- handle processing instruction, could be overridde
n
#
Overridable -- handle processing instructio
n
def handle_pi(self, data):
def handle_pi(self, data):
pass
pass
# To be overridden -- handlers for unknown objects
# Internal -- helper to remove special character quoting
def unknown_starttag(self, tag, attrs): pass
def unknown_endtag(self, tag): pass
def unknown_charref(self, ref): pass
def unknown_entityref(self, ref): pass
# Helper to remove special character quoting
def unescape(self, s):
def unescape(self, s):
if '
&
' not in s:
if '
&
' not in s:
return s
return s
...
@@ -437,94 +362,3 @@ class HTMLParser:
...
@@ -437,94 +362,3 @@ class HTMLParser:
s = string.replace(s, "
&
quot
;
", '"')
s = string.replace(s, "
&
quot
;
", '"')
s = string.replace(s, "&", "&") # Must be last
s = string.replace(s, "&", "&") # Must be last
return s
return s
class TestHTMLParser(HTMLParser):
def __init__(self, verbose=0):
self.testdata = ""
HTMLParser.__init__(self, verbose)
def handle_data(self, data):
self.testdata = self.testdata + data
if len(`self.testdata`) >= 70:
self.flush()
def flush(self):
data = self.testdata
if data:
self.testdata = ""
print 'data:', `data`
def handle_comment(self, data):
self.flush()
r = `data`
if len(r) > 68:
r = r[:32] + '...' + r[-32:]
print 'comment:', r
def unknown_starttag(self, tag, attrs):
self.flush()
if not attrs:
print 'start tag: <' + tag + '>'
else:
print 'start tag: <' + tag,
for name, value in attrs:
print name + '=' + '"' + value + '"',
print '>'
def unknown_endtag(self, tag):
self.flush()
print 'end tag: </' + tag + '>'
def unknown_entityref(self, ref):
self.flush()
print '*** unknown entity ref: &' + ref + ';'
def unknown_charref(self, ref):
self.flush()
print '*** unknown char ref: &#' + ref + ';'
def close(self):
HTMLParser.close(self)
self.flush()
def test(args = None):
import sys
if not args:
args = sys.argv[1:]
if args and args[0] == '-s':
args = args[1:]
klass = HTMLParser
else:
klass = TestHTMLParser
if args:
file = args[0]
else:
file = 'test.html'
if file == '-':
f = sys.stdin
else:
try:
f = open(file, 'r')
except IOError, msg:
print file, "
:
", msg
sys.exit(1)
data = f.read()
if f is not sys.stdin:
f.close()
x = klass()
for c in data:
x.feed(c)
x.close()
if __name__ == '__main__':
test()
lib/python/TAL/HTMLTALParser.py
View file @
534fb1cc
...
@@ -175,7 +175,7 @@ class HTMLTALParser(HTMLParser):
...
@@ -175,7 +175,7 @@ class HTMLTALParser(HTMLParser):
# Overriding HTMLParser methods
# Overriding HTMLParser methods
def
finish
_starttag
(
self
,
tag
,
attrs
):
def
handle
_starttag
(
self
,
tag
,
attrs
):
self
.
close_para_tags
(
tag
)
self
.
close_para_tags
(
tag
)
self
.
tagstack
.
append
(
tag
)
self
.
tagstack
.
append
(
tag
)
self
.
scan_xmlns
(
attrs
)
self
.
scan_xmlns
(
attrs
)
...
@@ -185,7 +185,7 @@ class HTMLTALParser(HTMLParser):
...
@@ -185,7 +185,7 @@ class HTMLTALParser(HTMLParser):
if
tag
in
EMPTY_HTML_TAGS
:
if
tag
in
EMPTY_HTML_TAGS
:
self
.
implied_endtag
(
tag
,
-
1
)
self
.
implied_endtag
(
tag
,
-
1
)
def
finish
_startendtag
(
self
,
tag
,
attrs
):
def
handle
_startendtag
(
self
,
tag
,
attrs
):
self
.
close_para_tags
(
tag
)
self
.
close_para_tags
(
tag
)
self
.
scan_xmlns
(
attrs
)
self
.
scan_xmlns
(
attrs
)
attrlist
,
taldict
,
metaldict
=
self
.
extract_attrs
(
attrs
)
attrlist
,
taldict
,
metaldict
=
self
.
extract_attrs
(
attrs
)
...
@@ -198,7 +198,7 @@ class HTMLTALParser(HTMLParser):
...
@@ -198,7 +198,7 @@ class HTMLTALParser(HTMLParser):
self
.
getpos
(),
isend
=
1
)
self
.
getpos
(),
isend
=
1
)
self
.
pop_xmlns
()
self
.
pop_xmlns
()
def
finish
_endtag
(
self
,
tag
):
def
handle
_endtag
(
self
,
tag
):
if
tag
in
EMPTY_HTML_TAGS
:
if
tag
in
EMPTY_HTML_TAGS
:
# </img> etc. in the source is an error
# </img> etc. in the source is an error
raise
NestingError
(
tag
,
self
.
getpos
())
raise
NestingError
(
tag
,
self
.
getpos
())
...
...
lib/python/TAL/test/test_htmlparser.py
View file @
534fb1cc
...
@@ -33,13 +33,13 @@ class EventCollector(HTMLParser.HTMLParser):
...
@@ -33,13 +33,13 @@ class EventCollector(HTMLParser.HTMLParser):
# structure markup
# structure markup
def
finish
_starttag
(
self
,
tag
,
attrs
):
def
handle
_starttag
(
self
,
tag
,
attrs
):
self
.
append
((
"starttag"
,
tag
,
attrs
))
self
.
append
((
"starttag"
,
tag
,
attrs
))
def
finish
_startendtag
(
self
,
tag
,
attrs
):
def
handle
_startendtag
(
self
,
tag
,
attrs
):
self
.
append
((
"startendtag"
,
tag
,
attrs
))
self
.
append
((
"startendtag"
,
tag
,
attrs
))
def
finish
_endtag
(
self
,
tag
):
def
handle
_endtag
(
self
,
tag
):
self
.
append
((
"endtag"
,
tag
))
self
.
append
((
"endtag"
,
tag
))
# all other markup
# all other markup
...
...
lib/python/TAL/tests/test_htmlparser.py
View file @
534fb1cc
...
@@ -33,13 +33,13 @@ class EventCollector(HTMLParser.HTMLParser):
...
@@ -33,13 +33,13 @@ class EventCollector(HTMLParser.HTMLParser):
# structure markup
# structure markup
def
finish
_starttag
(
self
,
tag
,
attrs
):
def
handle
_starttag
(
self
,
tag
,
attrs
):
self
.
append
((
"starttag"
,
tag
,
attrs
))
self
.
append
((
"starttag"
,
tag
,
attrs
))
def
finish
_startendtag
(
self
,
tag
,
attrs
):
def
handle
_startendtag
(
self
,
tag
,
attrs
):
self
.
append
((
"startendtag"
,
tag
,
attrs
))
self
.
append
((
"startendtag"
,
tag
,
attrs
))
def
finish
_endtag
(
self
,
tag
):
def
handle
_endtag
(
self
,
tag
):
self
.
append
((
"endtag"
,
tag
))
self
.
append
((
"endtag"
,
tag
))
# all other markup
# all other markup
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment