Commit e7fee233 authored by Tristan Cavelier's avatar Tristan Cavelier Committed by Sven Franck

erp5_web: add tools to export web page as single file (mhtml or embedded html)

parent 7fd7a9eb
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from HTMLParser import HTMLParser
class HtmlParseHelper(HTMLParser):
"""
Listens to all the HTMLParser methods and push results in a list of tuple.
Tuple contains every method arguments, for instance the `handle_starttag`
method pushes `('starttag', tag, attrs)` to the tuple list.
See https://docs.python.org/2/library/htmlparser.html
"""
def __init__(self, *args, **kw):
HTMLParser.__init__(self, *args, **kw)
self.result = []
def handle_starttag(self, tag, attrs):
self.result.append(("starttag", tag, attrs))
def handle_startendtag(self, tag, attrs):
self.result.append(("startendtag", tag, attrs))
def handle_endtag(self, tag):
self.result.append(("endtag", tag))
def handle_data(self, data):
self.result.append(("data", data))
def handle_entityref(self, name):
self.result.append(("entityref", name))
def handle_charref(self, name):
self.result.append(("charref", name))
def handle_comment(self, data):
self.result.append(("comment", data))
def handle_decl(self, decl):
self.result.append(("decl", decl))
def handle_pi(self, data):
self.result.append(("pi", data))
def unknown_decl(self, data):
self.result.append(("unknown_decl", data))
def parseHtml(text):
"""
Parses a string and returns html parts as tuple list.
Example:
input: 'Click <a href="destination">here</a> to see the documentation.'
return: [
('data', 'Click '),
('starttag', 'a', ('href', 'destination')),
('data', 'here'),
('endtag', 'a'),
('data', ' to see the documentation'),
]
"""
hr = HtmlParseHelper()
hr.feed(text)
hr.close()
return hr.result
import re
def partition(text, separatorRegexp):
"""
partition("abcba", re.compile("(b)")) -> [
("a",),
("b", "b"),
("c",),
("b", "b"),
("a",),
]
"""
result = []
lastIndex = 0
for match in separatorRegexp.finditer(text):
result.append((text[lastIndex:match.start()],))
result.append((match.group(0),) + match.groups())
lastIndex = match.end()
result.append((text[lastIndex:],))
return result
css_comment_filter_re = re.compile(r"/\*((?:[^\*]|\*[^/])*)\*/")
#css_url_re = re.compile(r"""(:[ \t]*url\()((")([^"]*)"|(')([^']*)'|([^\)]*))\)""")
css_url_re = re.compile(r"""(:[ \t]*url\()(\s*(")([^"]*)"\s*|\s*(')([^']*)'\s*|([^\)]*))\)""")
def parseCssForUrl(text):
"""
return tuple list like: [
("data", ""),
("comment", "/* set body background image */", " set body background image "),
("data", "\nbody {\n background-image: url("),
("url", " 'http://ima.ge/bg.png' ", "http://ima.ge/bg.png", "'"),
("data", ");\n}\n"),
]
"""
result = []
parts = partition(text, css_comment_filter_re) # filter comments
i = 0
for part in parts:
i += 1
if i % 2 == 0: # comment
result.append(("comment", part[0], part[1]))
else: # non comment
parts = partition(part[0], css_url_re)
data = ""
j = 0
for part in parts:
j += 1
if j % 2 == 1: # css data
data += part[0]
else: # url
result.append(("data", data + part[1]))
result.append(("url", part[2], (part[4] or part[6] or part[7] or "").strip(), part[3] or part[5] or ""))
data = ")"
result.append(("data", data))
return result
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Extension Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>default_reference</string> </key>
<value> <string>WebUtility</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>extension.erp5.WebUtility</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Extension Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
"""
Usage:
formatAttachmentListToMIMEMultipartString(
subtype="related",
header_dict={
"From": "<Saved by ERP5>",
"Subject": "Document Title",
},
param_list=[("type", "text/html")],
attachment_list=[
{
"mime_type": "text/html",
"charset": "utf-8",
"encode": "quoted-printable",
"header_dict": {"Content-Location": "https://www.erp5.com/My.Web.Page"}, # only add headers
"data": "<!DOCTYPE ...>.....................</...>",
},
{
"mime_type": "image/png",
"add_header_list": [("Content-Location", "https://www.erp5.com/My.Image")],
"data": "\x00............\x01",
}
]
);
Only attachtment_list property is mandatory.
Note: text/* content will not be automatically encoded to quoted-printable
because this encoding can lose some characters like "\r" and possibly others.
Default text/* is encoded in 7or8bit.
To send specific encoded data, please make your attachment dict look like:
{
"mime_type": "text/html",
"encode": "noop",
"add_header_list": [("Content-Transfer-Encoding", "my-encoding")],
"data": encodestring(html_data),
}
"""
from email.encoders import encode_noop, encode_7or8bit, \
encode_base64 as original_encode_base64
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.mime.audio import MIMEAudio
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
import quopri
def formatMultipartMessageToRFC2822String(msg):
"""
The `msg.as_string()` method does not exactly follow the RFC2822. The EOL are
not CRLF ("\r\n") by default, so we have to replace the actual newlines
(LF "\n") by CRLF if necessary.
Note: The first space in each line of a multiline header will be replaced by a
tabulation to make some mhtml viewers able to parse it, even if a simple space
follows the RFC2822.
"""
as_string = msg.as_string() # it also forces the boundary generation
if as_string.split("\n", 1)[0].endswith("\r"):
return as_string
boundary = msg.get_boundary()
parts = as_string.split("\n--" + boundary)
parts[0] = "\r\n".join(parts[0].split("\n")).replace("\r\n ", "\r\n\t")
i = 0
for part in parts[1:]:
i += 1
partsplit = part.split("\n\n", 1)
partsplit[0] = "\r\n".join(partsplit[0].split("\n")).replace("\r\n ", "\r\n\t")
parts[i] = "\r\n\r\n".join(partsplit)
return ("\r\n--" + boundary).join(parts)
def encode_quopri(msg):
"""Same as encoders.encode_quopri except that spaces are kept
when possible and end of lines are converted to CRLF ("\r\n")
when necessary.
"""
orig = msg.get_payload()
encdata = quopri.encodestring(orig).replace("=\n", "=\r\n")
msg.set_payload(encdata)
msg.add_header("Content-Transfer-Encoding", "quoted-printable")
def encode_base64(msg):
"""Extend encoders.encode_base64 to return CRLF at end of lines"""
original_encode_base64(msg)
msg.set_payload(msg.get_payload().replace("\n", "\r\n"))
outer = MIMEMultipart(subtype)
for key, value in param_list:
outer.set_param(key, value)
if boundary is not None:
outer.set_boundary(boundary)
if replace_header_list is not None:
for key, value in replace_header_list:
outer.replace_header(key, value)
if header_dict is not None: # adds headers, does not replace or set
for key, value in header_dict.items():
outer.add_header(key, value)
if add_header_list is not None:
for key, value in add_header_list:
outer.add_header(key, value)
for attachment in attachment_list:
mime_type = attachment.get("mime_type", "application/octet-stream")
data = attachment.get("data", "")
encoding = attachment.get("encode")
if encoding not in ("base64", "quoted-printable", "7or8bit", "noop", None):
raise ValueError("unknown attachment encoding %r" % encoding)
main_type, sub_type = mime_type.split("/")
if encoding is None:
if main_type == "image":
if sub_type == "svg+xml":
part = MIMEImage(data, sub_type, encode_quopri) # should we trust the mime_type ?
else:
part = MIMEImage(data, sub_type, encode_base64)
elif main_type == "text":
part = MIMEText(data, sub_type, attachment.get("charset", "us-ascii"))
elif main_type == "audio":
part = MIMEAudio(data, sub_type, encode_base64)
elif main_type == "application":
part = MIMEApplication(data, sub_type, encode_noop)
if sub_type == "javascript":
encode_quopri(part)
else:
encode_base64(part)
else:
part = MIMEBase(main_type, sub_type)
part.set_payload(data)
encode_base64(part)
else:
part = MIMEBase(main_type, sub_type)
part.set_payload(data)
if encoding == "base64":
encode_base64(part)
elif encoding == "quoted-printable":
encode_quopri(part)
elif encoding == "7or8bit":
encode_7or8bit(part)
else: # elif encoding == "noop":
encode_noop(part)
for key, value in attachment.get("replace_header_list", []):
part.replace_header(key, value)
for key, value in attachment.get("header_dict", {}).items(): # adds headers, does not replace or set
part.add_header(key, value)
for key, value in attachment.get("add_header_list", []):
part.add_header(key, value)
if attachment.get("filename", None) is not None:
part.add_header("Content-Disposition", "attachment", attachment["filename"])
outer.attach(part)
#return outer.as_string()
return formatMultipartMessageToRFC2822String(outer)
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>attachment_list, subtype="mixed", header_dict=None, param_list=(), replace_header_list=None, add_header_list=None, boundary=None</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_formatAttachmentListToMIMEMultipartString</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>parseCssForUrl</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>WebUtility</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_parseCssForUrl</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>parseHtml</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>WebUtility</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_parseHtml</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
# TODO: domain names should be exported to a web site property.
# domain_dict = {}
# for web_site in portal_catalog(portal_type="Web Site", validation_state="published"):
# domain = web_site.getDomainName("")
# if domain != "":
# domain_dict[domain] = web_site
# return domain_dict
return {}
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ERP5Site_getWebSiteDomainDict</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
"""
Export the web page and its components to a single (m)html file.
`format` parameter could also be "mhtml".
TODO: export same components into one mhtml attachment if possible.
"""
from zExceptions import Unauthorized
from base64 import b64encode, b64decode
portal = context.getPortalObject()
mhtml_message = {
"subtype": "related",
"param_list": [("type", "text/html")],
"header_dict": {"From": "<Saved by ERP5>", "Subject": "Untitled"},
"attachment_list": [],
}
def main():
data = context.getTextContent("").decode("utf-8")
data = "".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))])
if format == "mhtml":
mhtml_message["header_dict"]["Subject"] = context.getTitle() or "Untitled"
mhtml_message["attachment_list"].insert(0, {
"mime_type": "text/html",
"encode": "quoted-printable",
"add_header_list": [("Content-Location", context.absolute_url())],
"data": str(data.encode("utf-8")),
})
res = context.Base_formatAttachmentListToMIMEMultipartString(**mhtml_message)
if REQUEST is not None:
REQUEST.RESPONSE.setHeader("Content-Type", "multipart/related")
REQUEST.RESPONSE.setHeader("Content-Disposition", 'attachment;filename="%s-%s-%s.mhtml"' % (
context.getReference("untitled").replace('"', '\\"'),
context.getVersion("001").replace('"', '\\"'),
context.getLanguage("en").replace('"', '\\"'),
))
return res
if REQUEST is not None:
REQUEST.RESPONSE.setHeader("Content-Type", "text/html")
REQUEST.RESPONSE.setHeader("Content-Disposition", 'attachment;filename="%s-%s-%s.html"' % (
context.getReference("untitled").replace('"', '\\"'),
context.getVersion("001").replace('"', '\\"'),
context.getLanguage("en").replace('"', '\\"'),
))
return data
def handleHtmlTag(tag, attrs):
#if tag == "base": and "href" in attrs: # should not exist in safe-html
# NotImplemented
if tag == "object":
for i in range(len(attrs)):
if attrs[i][0] == "data":
attrs[i] = attrs[i][0], handleImageSource(attrs[i][1])
elif tag == "img":
for i in range(len(attrs)):
if attrs[i][0] == "src":
attrs[i] = attrs[i][0], handleImageSource(attrs[i][1])
elif tag == "link" and anny(attrs, key=lambda a: a[0] == "rel" and a[1] == "stylesheet"):
for i in range(len(attrs)):
if attrs[i][0] == "href":
attrs[i] = attrs[i][0], replaceFromDataUri(handleCssHref(attrs[i][1]), replaceCssUrl)
elif tag == "script":
for i in range(len(attrs)):
if attrs[i][0] == "src":
attrs[i] = attrs[i][0], handleJsSource(attrs[i][1])
else:
for i in range(len(attrs)):
if attrs[i][0] == "href" or attrs[i][0] == "src":
attrs[i] = attrs[i][0], makeHrefAbsolute(attrs[i][1])
return tag, attrs
def strHtmlPart(part):
part_type = part[0]
if part_type in ("starttag", "startendtag"):
tag, attrs = handleHtmlTag(part[1], part[2])
attrs_str = " ".join(["%s=\"%s\"" % (escapeHtml(k), escapeHtml(v or "")) for k, v in attrs])
return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "")
if part_type == "endtag":
return "</%s>" % part[1]
if part_type == "data":
return part[1]
if part_type == "entityref":
return "&%s;" % part[1]
if part_type == "charref":
return "&#%s;" % part[1]
if part_type == "comment":
return "<!--%s-->" % part[1]
if part_type in ("decl", "unknown_decl"):
return "<!%s>" % part[1]
if part_type == "pi":
return "<?%s>" % part[1]
disallow_script = not allow_script
def handleHtmlPartList(part_list):
res = []
style_data = ""
on_script = False
on_style = False
for part in part_list:
if on_script:
if part[0] == "endtag" and part[1] == "script":
on_script = False
# can only be data until </script> endtag
elif on_style:
if part[0] == "endtag" and part[1] == "style":
res.append((replaceCssUrl, style_data))
res.append((strHtmlPart, part))
style_data = ""
on_style = False
else:
# can only be data until </style> endtag
style_data += strHtmlPart(part)
else:
if part[0] == "starttag":
# when you save a page from a browser, every script tag are removed
if part[1] == "script" and disallow_script:
on_script = True
continue
elif part[1] == "style":
on_style = True
res.append((strHtmlPart, part))
return res
def handleCssHref(href):
return handleHref(href)
def handleJsSource(href):
return handleHref(href)
def handleHref(href):
if not isHrefAUrl(href):
return href
try:
o = traverseHref(href)
except (KeyError, Unauthorized):
return makeHrefAbsolute(href)
return handleHrefObject(o, href)
def handleImageSource(src):
if not isHrefAUrl(src):
return src
try:
o = traverseHref(src)
except (KeyError, Unauthorized):
return makeHrefAbsolute(src)
return handleImageSourceObject(o, src)
def replaceCssUrl(data):
parts = context.Base_parseCssForUrl(data)
data = ""
for part in parts:
if part[0] == "url":
url = part[2]
if isHrefAUrl(url):
data += handleImageSource(url)
else:
data += part[1]
else:
data += part[1]
return data
def handleImageSourceObject(o, src):
if hasattr(o, "convert"):
search = parseUrlSearch(extractUrlSearch(src))
format_kw = {}
for k, x in search:
if k == "format" and x is not None:
format_kw["format"] = x
elif k == "display" and x is not None:
format_kw["display"] = x
if format_kw:
mime, data = o.convert(**format_kw)
return handleLinkedData(mime, data, src)
return handleHrefObject(o, src, default_mimetype=bad_image_mime_type, default_data=bad_image_data)
def handleHrefObject(o, src, default_mimetype="text/html", default_data="<p>Linked page not found</p>"):
# handle File portal_skins/folder/file.png
# XXX handle "?portal_skin=" parameter ?
if hasattr(o, "getContentType"):
mime = o.getContentType("")
if mime:
data = getattr(o, "getData", lambda: str(o))() or ""
if isinstance(data, unicode):
data = data.encode("utf-8")
return handleLinkedData(mime, data, src)
return handleLinkedData(default_mimetype, default_data, src)
# handle Object.view
# XXX handle url query parameters ? Not so easy because we need to
# use the same behavior as when we call a script from browser URL bar.
if not hasattr(o, "getPortalType") and callable(o):
mime, data = "text/html", o()
if isinstance(data, unicode):
data = data.encode("utf-8")
return handleLinkedData(mime, data, src)
return handleLinkedData(default_mimetype, default_data, src)
bad_image_data_url = (
"data:image/png;base64," + # little image showing cannot load image
"iVBORw0KGgoAAAANSUhEUgAAABEAAAATCAIAAAD5x3GmAAACWklEQVQokZWSX0hTcRTHz/" +
"3TunMmMyxrQUzEPQSCFEI0fCi0HmSKdsUGg3q2h5I99dj7bE8Nw6cwLDb3kO7JP5m6h0TE" +
"CmYQjJYgpaPc7q67+93de04P0zvnQ+CP78Pvdzgfzjnf3+GICE55+NMCACACACKOj49rmv" +
"afvNHRUZ4/KkBEjLFQKJRTjXyRTqigUSwWI6JQKGSaJhEREQ8ApmkCgFrif+8bJ7RfMAGA" +
"MRYMBsPhMCLWzFPUUdVI1cjjEj0usXLXdLJ6sTCx2jIBAd1otVVe11vPbKT1iqeJRMLKKp" +
"fLVYaoChxGEAwDbt0ZsNs4ABAEbiLyoqYOEax/ZyfsYmX4q5iCAABQd1aoen3UGmDt/zod" +
"/EWnuJczcgcIABzHu91um81W9YCI8Jga6rirqUV41O9pQqeDR6J6iRvs7VUeDFQZJCKEih" +
"DxfINemIioq4ms7GtrwkaH4KovZ2WfujLL1/SGiIgZZSmavj2Veto0GYXO7vzawo7saztX" +
"3JF9+bUF6Oyu8YAAtnLvNrJBAOPb7lbkizQyPZuWfX8+LeTaG00NHDe7r8Rmju0oQaawVA" +
"Eqga+/Xkc+B1vexDSJzx+AJvEtk1FDEHjLAEXfXdt7ZgEA0H754UjH2GZgWFGR2UVFxc3A" +
"sIh4yDDGFjPPdfxhAdea/Y87xpJy//bqnN3b05XK2r0928n55P2+w3kMw9CXmy/AE4u5Fw" +
"h89A/tLM9d6urxTr9/G4/74zMfBvt+rsxzRKTruqIojNUsgSRJB+vrqVcv705Fc8ViqVSS" +
"JMnpcMz5h/4B1Qxz9NOjZCgAAAAASUVORK5CYII="
)
bad_image_data = b64decode(bad_image_data_url.split(",", 1)[1])
bad_image_mime_type = "image/png"
request_protocol = context.REQUEST.SERVER_URL.split(":", 1)[0] + ":"
site_object_dict = context.ERP5Site_getWebSiteDomainDict()
base_url_root_object = portal
base_url_object = context
def handleLinkedData(mime, data, href):
if format == "mhtml":
url = makeHrefAbsolute(href)
mhtml_message["attachment_list"].append({
"mime_type": mime,
"encode": "quoted-printable" if mime.startswith("text/") else None,
"add_header_list": [("Content-Location", url)],
"data": str(data),
})
return url
else:
return "data:%s;base64,%s" % (mime, b64encode(data))
def makeHrefAbsolute(href):
if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href):
return href
if href.startswith("//"):
return request_protocol + href
if href.startswith("/"):
return base_url_root_object.absolute_url() + href
return base_url_object.absolute_url() + "/" + href
def isHrefAnAbsoluteUrl(href):
return href.startswith("https://") or href.startswith("http://")
def isHrefAUrl(href):
return href.startswith("https://") or href.startswith("http://") or not href.split(":", 1)[0].isalpha()
def traverseHref(url, allow_hash=False):
url = url.split("?")[0]
if not allow_hash:
url = url.split("#")[0]
if url.startswith("https://") or url.startswith("http://") or url.startswith("//"): # absolute url possibly on other sites
site_url = "/".join(url.split("/", 3)[:3])
domain = url.split("/", 3)[2]
relative_path = url[len(site_url):]
relative_path = (relative_path[1:] if relative_path[:1] == "/" else relative_path)
site_object = site_object_dict.get(domain)
if site_object is None:
raise KeyError(relative_path.split("/")[0])
return site_object.restrictedTraverse(str(relative_path))
if url.startswith("/"): # absolute path, relative url
return base_url_root_object.restrictedTraverse(str(url[1:]))
# relative url (just use a base url)
return base_url_object.restrictedTraverse(str(url))
def replaceFromDataUri(data_uri, replacer):
header, data = data_uri.split(",")
if "text/css" not in header:
return data_uri
is_base64 = False
if ";base64" in header:
is_base64 = True
data = b64decode(data)
data = replacer(data)
return "%s,%s" % (header, b64encode(data) if is_base64 else data)
def extractUrlSearch(url):
url = url.split("#", 1)[0].split("?", 1)
url[0] = ""
return "?".join(url)
def parseUrlSearch(search):
if search[:1] == "?":
search = search[1:]
result = []
for part in search.split("&"):
k = part.split("=")
v = "=".join(k[1:]) if len(k) else None
result.append((k[0], v))
return result
def parseHtml(text):
return context.Base_parseHtml(text)
def escapeHtml(s):
return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def anny(iterable, key=None):
for i in iterable:
if key:
i = key(i)
if i:
return True
return False
return main()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>REQUEST=None, allow_script=False, format="embedded_html"</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>WebPage_exportAsSingleFile</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment