Commit d885f18a authored by Tristan Cavelier's avatar Tristan Cavelier

erp5_web: allow to extract specific referred object from a web page

Add allow_tag_list and deny_tag_list parameter to WebPage_extractReferredObjectDict
parent 87aedaf7
......@@ -2,12 +2,22 @@ from zExceptions import Unauthorized
portal = context.getPortalObject()
href_object_dict = {}
if not isinstance(allow_tag_list, (list, tuple)):
allow_tag_list = None
if not isinstance(deny_tag_list, (list, tuple)):
deny_tag_list = []
def main():
for part in context.Base_parseHtml(context.getTextContent("").decode("utf-8")):
handleHtmlPart(part)
return href_object_dict
def handleHtmlTag(tag, attrs):
if allow_tag_list is not None:
if tag not in allow_tag_list:
return
if tag in deny_tag_list:
return
#if tag == "base": and "href" in attrs: # should not exist in safe-html
# NotImplemented
if tag == "object":
......
......@@ -50,7 +50,7 @@
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
<value> <string>allow_tag_list=None, deny_tag_list=None</string> </value>
</item>
<item>
<key> <string>id</string> </key>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment