Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
138
Merge Requests
138
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
nexedi
erp5
Commits
ae8085d2
Commit
ae8085d2
authored
Aug 28, 2024
by
Arnaud Fontaine
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
py3: PortalTransforms: sgmllib removed from standard library in favor of html.parser.HTMLParser.
Use scrubHTML() from safe_html module instead.
parent
ccd0b4ee
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1 addition
and
93 deletions
+1
-93
product/PortalTransforms/libtransforms/utils.py
product/PortalTransforms/libtransforms/utils.py
+1
-93
No files found.
product/PortalTransforms/libtransforms/utils.py
View file @
ae8085d2
import
re
import
os
import
sys
from
sgmllib
import
SGMLParser
,
SGMLParseError
try
:
# Need to be imported before win32api to avoid dll loading
...
...
@@ -142,95 +141,4 @@ NASTY_TAGS = { 'script' : 1
class
IllegalHTML
(
ValueError
):
pass
class
StrippingParser
(
SGMLParser
):
""" Pass only allowed tags; raise exception for known-bad. """
from
htmlentitydefs
import
entitydefs
# replace entitydefs from sgmllib
def
__init__
(
self
):
SGMLParser
.
__init__
(
self
)
self
.
result
=
""
def
handle_data
(
self
,
data
):
if
data
:
self
.
result
=
self
.
result
+
data
def
handle_charref
(
self
,
name
):
self
.
result
=
"%s&#%s;"
%
(
self
.
result
,
name
)
def
handle_entityref
(
self
,
name
):
if
name
in
self
.
entitydefs
:
x
=
';'
else
:
# this breaks unstandard entities that end with ';'
x
=
''
self
.
result
=
"%s&%s%s"
%
(
self
.
result
,
name
,
x
)
def
unknown_starttag
(
self
,
tag
,
attrs
):
""" Delete all tags except for legal ones.
"""
if
tag
in
VALID_TAGS
:
self
.
result
=
self
.
result
+
'<'
+
tag
for
k
,
v
in
attrs
:
if
k
.
lower
().
startswith
(
'on'
):
raise
IllegalHTML
(
'Javascipt event "%s" not allowed.'
%
k
)
if
v
.
lower
().
startswith
(
'javascript:'
):
raise
IllegalHTML
(
'Javascipt URI "%s" not allowed.'
%
v
)
self
.
result
=
'%s %s="%s"'
%
(
self
.
result
,
k
,
v
)
endTag
=
'</%s>'
%
tag
if
VALID_TAGS
.
get
(
tag
):
self
.
result
=
self
.
result
+
'>'
else
:
self
.
result
=
self
.
result
+
' />'
elif
NASTY_TAGS
.
get
(
tag
):
raise
IllegalHTML
(
'Dynamic tag "%s" not allowed.'
%
tag
)
else
:
pass
# omit tag
def
unknown_endtag
(
self
,
tag
):
if
VALID_TAGS
.
get
(
tag
):
self
.
result
=
"%s</%s>"
%
(
self
.
result
,
tag
)
remTag
=
'</%s>'
%
tag
def
parse_declaration
(
self
,
i
):
"""Fix handling of CDATA sections. Code borrowed from BeautifulSoup.
"""
j
=
None
if
self
.
rawdata
[
i
:
i
+
9
]
==
'<![CDATA['
:
k
=
self
.
rawdata
.
find
(
']]>'
,
i
)
if
k
==
-
1
:
k
=
len
(
self
.
rawdata
)
data
=
self
.
rawdata
[
i
+
9
:
k
]
j
=
k
+
3
self
.
result
.
append
(
"<![CDATA[%s]]>"
%
data
)
else
:
try
:
j
=
SGMLParser
.
parse_declaration
(
self
,
i
)
except
SGMLParseError
:
toHandle
=
self
.
rawdata
[
i
:]
self
.
result
.
append
(
toHandle
)
j
=
i
+
len
(
toHandle
)
return
j
def
scrubHTML
(
html
):
""" Strip illegal HTML tags from string text. """
parser
=
StrippingParser
()
parser
.
feed
(
html
)
parser
.
close
()
return
parser
.
result
from
Products.PortalTransforms.transforms.safe_html
import
scrubHTML
Jérome Perrin
@jerome
mentioned in merge request
!1751
·
Aug 30, 2024
mentioned in merge request
!1751
mentioned in merge request !1751
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment