Commit 7722657f authored by Tristan Cavelier's avatar Tristan Cavelier

erp5_web: make html hreferenced objects implicit successors

parent e05fc555
"""
This parameters are default browser behavior with url normalization
- if keep_empty is True: `a//` -> `a//` else `a//` -> `a/`;
- if keep_single_dot is False: `./a` -> `a` else `./a` -> `./a`;
- if keep_double_dot is False: `/../a` -> `/a` else `/../a` -> `/../a`
- if keep_trailing_slash is True: `/a//` -> `/a//` else `/a//` -> `/a`
"""
outer_component_list = []
inner_component_list = []
suffix_list = []
dont_keep_empty = not keep_empty
dont_keep_single_dot = not keep_single_dot
dont_keep_double_dot = not keep_double_dot
starts_with_slash = False
if pathname[:1] == "/":
pathname = pathname[1:]
starts_with_slash = True
if pathname[-1:] == "/":
pathname = pathname[:-1]
if keep_trailing_slash:
suffix_list.append("")
component_list = pathname.split("/")
for component in component_list:
if component == ".." and dont_keep_double_dot:
if inner_component_list:
inner_component_list.pop()
else:
outer_component_list.append("..")
elif not (component == "" and dont_keep_empty or
component == "." and dont_keep_single_dot):
inner_component_list.append(component)
if starts_with_slash:
return "/" + "/".join(inner_component_list + suffix_list)
return "/".join(outer_component_list + inner_component_list + suffix_list)
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>pathname, keep_empty=True, keep_single_dot=False, keep_double_dot=False, keep_trailing_slash=True</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_normalizeUrlPathname</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
from zExceptions import Unauthorized
portal = context.getPortalObject()
href_object_dict = {}
def main():
for part in context.Base_parseHtml(context.getTextContent("").decode("utf-8")):
handleHtmlPart(part)
return href_object_dict
def handleHtmlTag(tag, attrs):
#if tag == "base": and "href" in attrs: # should not exist in safe-html
# NotImplemented
if tag == "object":
for i in range(len(attrs)):
if attrs[i][0] == "data":
handleHref(attrs[i][1])
elif tag == "style":
# for style tags, next data will always be the entire text until </style>
on_next_data[0] = handleCss
else:
for i in range(len(attrs)):
if attrs[i][0] in ("src", "href"):
handleHref(attrs[i][1])
on_next_data = [lambda x: x]
def handleHtmlPart(part):
part_type = part[0]
if part_type in ("starttag", "startendtag"):
return handleHtmlTag(part[1], part[2])
if part_type == "data":
if on_next_data[0] is None:
return part[1]
on_next_data[0](part[1])
on_next_data[0] = None
return None
def handleHref(href):
# handles "base_url/document_module/id"
# handles "base_url/R-Document.Reference"
# handles "base_url/R-Document.Reference/view"
if not isHrefAUrl(href):
return href
try:
obj = traverseHref(href, allow_method=False)
except (KeyError, Unauthorized):
obj = None
href_object_dict[href] = obj
def handleCss(data):
for part in context.Base_parseCssForUrl(data):
if part[0] == "url":
handleHref(part[2])
def isHrefAUrl(href):
return href.startswith("https://") or href.startswith("http://") or not href.split(":", 1)[0].isalpha()
def traverseHref(url, allow_method=True, allow_hash=False):
base_obj, relative_path = prepareHrefTraverse(url, allow_hash=allow_hash)
obj = base_obj.restrictedTraverse(relative_path)
if allow_method or obj is None:
return obj
try:
obj.getUid()
except AttributeError:
obj = base_obj.restrictedTraverse("/".join(relative_path.split("/")[:-1]))
return obj
site_object_dict = context.ERP5Site_getWebSiteDomainDict()
base_url_root_object = getattr(context, "getWebSiteValue", str)() or portal
base_url_object = context
assert base_url_object.getRelativeUrl().startswith(base_url_root_object.getRelativeUrl())
base_url = base_url_object.getRelativeUrl()[len(base_url_root_object.getRelativeUrl()):]
if not base_url.startswith("/"):
base_url = "/" + base_url
normalize_kw = {"keep_empty": False, "keep_trailing_slash": False}
def prepareHrefTraverse(url, allow_hash=False):
url = url.split("?")[0]
if not allow_hash:
url = url.split("#")[0]
if url.startswith("https://") or url.startswith("http://") or url.startswith("//"): # absolute url possibly on other sites
site_url = "/".join(url.split("/", 3)[:3])
domain = url.split("/", 3)[2]
site_object = site_object_dict[domain]
relative_path = url[len(site_url):]
relative_path = (relative_path[1:] if relative_path[:1] == "/" else relative_path)
relative_path = context.Base_normalizeUrlPathname("/" + relative_path, **normalize_kw)[1:]
return site_object, str(relative_path)
if url.startswith("/"): # absolute path, relative url
return base_url_root_object, str(context.Base_normalizeUrlPathname(url, **normalize_kw)[1:])
# relative path
return base_url_root_object, str(context.Base_normalizeUrlPathname(base_url + "/" + url, **normalize_kw)[1:])
return main()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>WebPage_extractReferredObjectDict</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
"""
Called by WebPage.getImplicitSuccessorValueList
`reference_list` is list of dicts containing reference and/or version
and/or language and maybe some more things. But this implementation
just ignores it.
It extracts href and their according objects from this ERP5 instance,
and returns the list of uniq objects.
"""
uid_set = set()
for obj in context.WebPage_extractReferredObjectDict().values():
if obj is not None:
uid_set.add(obj.getUid())
if uid_set:
return context.portal_catalog(uid=list(uid_set))
return ()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>reference_list=None</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>WebPage_getImplicitSuccessorValueList</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Get referenced by us objects</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment