Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Richard
erp5
Commits
e7fee233
Commit
e7fee233
authored
May 17, 2016
by
Tristan Cavelier
Committed by
Sven Franck
Jun 02, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_web: add tools to export web page as single file (mhtml or embedded html)
parent
7fd7a9eb
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
952 additions
and
0 deletions
+952
-0
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py
...mplateItem/portal_components/extension.erp5.WebUtility.py
+133
-0
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.xml
...plateItem/portal_components/extension.erp5.WebUtility.xml
+102
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.py
...rp5_web/Base_formatAttachmentListToMIMEMultipartString.py
+155
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.xml
...p5_web/Base_formatAttachmentListToMIMEMultipartString.xml
+62
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
...emplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
+28
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
...SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
+28
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
...em/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
+8
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
...m/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
+62
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
...eItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
+312
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
...Item/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
+62
-0
No files found.
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py
0 → 100644
View file @
e7fee233
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from
HTMLParser
import
HTMLParser
class
HtmlParseHelper
(
HTMLParser
):
"""
Listens to all the HTMLParser methods and push results in a list of tuple.
Tuple contains every method arguments, for instance the `handle_starttag`
method pushes `('starttag', tag, attrs)` to the tuple list.
See https://docs.python.org/2/library/htmlparser.html
"""
def
__init__
(
self
,
*
args
,
**
kw
):
HTMLParser
.
__init__
(
self
,
*
args
,
**
kw
)
self
.
result
=
[]
def
handle_starttag
(
self
,
tag
,
attrs
):
self
.
result
.
append
((
"starttag"
,
tag
,
attrs
))
def
handle_startendtag
(
self
,
tag
,
attrs
):
self
.
result
.
append
((
"startendtag"
,
tag
,
attrs
))
def
handle_endtag
(
self
,
tag
):
self
.
result
.
append
((
"endtag"
,
tag
))
def
handle_data
(
self
,
data
):
self
.
result
.
append
((
"data"
,
data
))
def
handle_entityref
(
self
,
name
):
self
.
result
.
append
((
"entityref"
,
name
))
def
handle_charref
(
self
,
name
):
self
.
result
.
append
((
"charref"
,
name
))
def
handle_comment
(
self
,
data
):
self
.
result
.
append
((
"comment"
,
data
))
def
handle_decl
(
self
,
decl
):
self
.
result
.
append
((
"decl"
,
decl
))
def
handle_pi
(
self
,
data
):
self
.
result
.
append
((
"pi"
,
data
))
def
unknown_decl
(
self
,
data
):
self
.
result
.
append
((
"unknown_decl"
,
data
))
def
parseHtml
(
text
):
"""
Parses a string and returns html parts as tuple list.
Example:
input: 'Click <a href="destination">here</a> to see the documentation.'
return: [
('data', 'Click '),
('starttag', 'a', ('href', 'destination')),
('data', 'here'),
('endtag', 'a'),
('data', ' to see the documentation'),
]
"""
hr
=
HtmlParseHelper
()
hr
.
feed
(
text
)
hr
.
close
()
return
hr
.
result
import
re
def
partition
(
text
,
separatorRegexp
):
"""
partition("abcba", re.compile("(b)")) -> [
("a",),
("b", "b"),
("c",),
("b", "b"),
("a",),
]
"""
result
=
[]
lastIndex
=
0
for
match
in
separatorRegexp
.
finditer
(
text
):
result
.
append
((
text
[
lastIndex
:
match
.
start
()],))
result
.
append
((
match
.
group
(
0
),)
+
match
.
groups
())
lastIndex
=
match
.
end
()
result
.
append
((
text
[
lastIndex
:],))
return
result
css_comment_filter_re
=
re
.
compile
(
r"/\
*((?:[^
\*]|\
*[^/])*)
\*/"
)
#css_url_re = re.compile(r"""(:[ \t]*url\()((")([^"]*)"|(')([^']*)'|([^\)]*))\)""")
css_url_re
=
re
.
compile
(
r"""(:[ \t]*url\
()(
\s*(")([^"]*)"\
s*|
\s*(')([^']*)'\
s*|([^
\)]*))\
)
""")
def parseCssForUrl(text):
"""
return
tuple
list
like
:
[
(
"data"
,
""
),
(
"comment"
,
"/* set body background image */"
,
" set body background image "
),
(
"data"
,
"
\
n
body {
\
n
background-image: url("
),
(
"url"
,
" 'http://ima.ge/bg.png' "
,
"http://ima.ge/bg.png"
,
"'"
),
(
"data"
,
");
\
n
}
\
n
"
),
]
"""
result = []
parts = partition(text, css_comment_filter_re) # filter comments
i = 0
for part in parts:
i += 1
if i % 2 == 0: # comment
result.append(("comment", part[0], part[1]))
else: # non comment
parts = partition(part[0], css_url_re)
data = ""
j = 0
for part in parts:
j += 1
if j % 2 == 1: # css data
data += part[0]
else: # url
result.append(("data", data + part[1]))
result.append(("url", part[2], (part[4] or part[6] or part[7] or "").strip(), part[3] or part[5] or ""))
data = ")"
result.append(("data", data))
return result
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"Extension Component"
module=
"erp5.portal_type"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
default_reference
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
description
</string>
</key>
<value>
<none/>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
extension.erp5.WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
portal_type
</string>
</key>
<value>
<string>
Extension Component
</string>
</value>
</item>
<item>
<key>
<string>
sid
</string>
</key>
<value>
<none/>
</value>
</item>
<item>
<key>
<string>
text_content_error_message
</string>
</key>
<value>
<tuple/>
</value>
</item>
<item>
<key>
<string>
text_content_warning_message
</string>
</key>
<value>
<tuple/>
</value>
</item>
<item>
<key>
<string>
version
</string>
</key>
<value>
<string>
erp5
</string>
</value>
</item>
<item>
<key>
<string>
workflow_history
</string>
</key>
<value>
<persistent>
<string
encoding=
"base64"
>
AAAAAAAAAAI=
</string>
</persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record
id=
"2"
aka=
"AAAAAAAAAAI="
>
<pickle>
<global
name=
"PersistentMapping"
module=
"Persistence.mapping"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
data
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
component_validation_workflow
</string>
</key>
<value>
<persistent>
<string
encoding=
"base64"
>
AAAAAAAAAAM=
</string>
</persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record
id=
"3"
aka=
"AAAAAAAAAAM="
>
<pickle>
<global
name=
"WorkflowHistoryList"
module=
"Products.ERP5Type.patches.WorkflowTool"
/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key>
<string>
action
</string>
</key>
<value>
<string>
validate
</string>
</value>
</item>
<item>
<key>
<string>
validation_state
</string>
</key>
<value>
<string>
validated
</string>
</value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.py
0 → 100644
View file @
e7fee233
"""
Usage:
formatAttachmentListToMIMEMultipartString(
subtype="related",
header_dict={
"From": "<Saved by ERP5>",
"Subject": "Document Title",
},
param_list=[("type", "text/html")],
attachment_list=[
{
"mime_type": "text/html",
"charset": "utf-8",
"encode": "quoted-printable",
"header_dict": {"Content-Location": "https://www.erp5.com/My.Web.Page"}, # only add headers
"data": "<!DOCTYPE ...>.....................</...>",
},
{
"mime_type": "image/png",
"add_header_list": [("Content-Location", "https://www.erp5.com/My.Image")],
"data": "
\
x00
............
\
x01
",
}
]
);
Only attachtment_list property is mandatory.
Note: text/* content will not be automatically encoded to quoted-printable
because this encoding can lose some characters like "
\
r
" and possibly others.
Default text/* is encoded in 7or8bit.
To send specific encoded data, please make your attachment dict look like:
{
"mime_type": "text/html",
"encode": "noop",
"add_header_list": [("Content-Transfer-Encoding", "my-encoding")],
"data": encodestring(html_data),
}
"""
from
email.encoders
import
encode_noop
,
encode_7or8bit
,
\
encode_base64
as
original_encode_base64
from
email.mime.base
import
MIMEBase
from
email.mime.text
import
MIMEText
from
email.mime.image
import
MIMEImage
from
email.mime.audio
import
MIMEAudio
from
email.mime.application
import
MIMEApplication
from
email.mime.multipart
import
MIMEMultipart
import
quopri
def
formatMultipartMessageToRFC2822String
(
msg
):
"""
The `msg.as_string()` method does not exactly follow the RFC2822. The EOL are
not CRLF ("
\
r
\
n
") by default, so we have to replace the actual newlines
(LF "
\
n
") by CRLF if necessary.
Note: The first space in each line of a multiline header will be replaced by a
tabulation to make some mhtml viewers able to parse it, even if a simple space
follows the RFC2822.
"""
as_string
=
msg
.
as_string
()
# it also forces the boundary generation
if
as_string
.
split
(
"
\
n
"
,
1
)[
0
].
endswith
(
"
\
r
"
):
return
as_string
boundary
=
msg
.
get_boundary
()
parts
=
as_string
.
split
(
"
\
n
--"
+
boundary
)
parts
[
0
]
=
"
\
r
\
n
"
.
join
(
parts
[
0
].
split
(
"
\
n
"
)).
replace
(
"
\
r
\
n
"
,
"
\
r
\
n
\
t
"
)
i
=
0
for
part
in
parts
[
1
:]:
i
+=
1
partsplit
=
part
.
split
(
"
\
n
\
n
"
,
1
)
partsplit
[
0
]
=
"
\
r
\
n
"
.
join
(
partsplit
[
0
].
split
(
"
\
n
"
)).
replace
(
"
\
r
\
n
"
,
"
\
r
\
n
\
t
"
)
parts
[
i
]
=
"
\
r
\
n
\
r
\
n
"
.
join
(
partsplit
)
return
(
"
\
r
\
n
--"
+
boundary
).
join
(
parts
)
def
encode_quopri
(
msg
):
"""Same as encoders.encode_quopri except that spaces are kept
when possible and end of lines are converted to CRLF ("
\
r
\
n
")
when necessary.
"""
orig
=
msg
.
get_payload
()
encdata
=
quopri
.
encodestring
(
orig
).
replace
(
"=
\
n
"
,
"=
\
r
\
n
"
)
msg
.
set_payload
(
encdata
)
msg
.
add_header
(
"Content-Transfer-Encoding"
,
"quoted-printable"
)
def
encode_base64
(
msg
):
"""Extend encoders.encode_base64 to return CRLF at end of lines"""
original_encode_base64
(
msg
)
msg
.
set_payload
(
msg
.
get_payload
().
replace
(
"
\
n
"
,
"
\
r
\
n
"
))
outer
=
MIMEMultipart
(
subtype
)
for
key
,
value
in
param_list
:
outer
.
set_param
(
key
,
value
)
if
boundary
is
not
None
:
outer
.
set_boundary
(
boundary
)
if
replace_header_list
is
not
None
:
for
key
,
value
in
replace_header_list
:
outer
.
replace_header
(
key
,
value
)
if
header_dict
is
not
None
:
# adds headers, does not replace or set
for
key
,
value
in
header_dict
.
items
():
outer
.
add_header
(
key
,
value
)
if
add_header_list
is
not
None
:
for
key
,
value
in
add_header_list
:
outer
.
add_header
(
key
,
value
)
for
attachment
in
attachment_list
:
mime_type
=
attachment
.
get
(
"mime_type"
,
"application/octet-stream"
)
data
=
attachment
.
get
(
"data"
,
""
)
encoding
=
attachment
.
get
(
"encode"
)
if
encoding
not
in
(
"base64"
,
"quoted-printable"
,
"7or8bit"
,
"noop"
,
None
):
raise
ValueError
(
"unknown attachment encoding %r"
%
encoding
)
main_type
,
sub_type
=
mime_type
.
split
(
"/"
)
if
encoding
is
None
:
if
main_type
==
"image"
:
if
sub_type
==
"svg+xml"
:
part
=
MIMEImage
(
data
,
sub_type
,
encode_quopri
)
# should we trust the mime_type ?
else
:
part
=
MIMEImage
(
data
,
sub_type
,
encode_base64
)
elif
main_type
==
"text"
:
part
=
MIMEText
(
data
,
sub_type
,
attachment
.
get
(
"charset"
,
"us-ascii"
))
elif
main_type
==
"audio"
:
part
=
MIMEAudio
(
data
,
sub_type
,
encode_base64
)
elif
main_type
==
"application"
:
part
=
MIMEApplication
(
data
,
sub_type
,
encode_noop
)
if
sub_type
==
"javascript"
:
encode_quopri
(
part
)
else
:
encode_base64
(
part
)
else
:
part
=
MIMEBase
(
main_type
,
sub_type
)
part
.
set_payload
(
data
)
encode_base64
(
part
)
else
:
part
=
MIMEBase
(
main_type
,
sub_type
)
part
.
set_payload
(
data
)
if
encoding
==
"base64"
:
encode_base64
(
part
)
elif
encoding
==
"quoted-printable"
:
encode_quopri
(
part
)
elif
encoding
==
"7or8bit"
:
encode_7or8bit
(
part
)
else
:
# elif encoding == "noop":
encode_noop
(
part
)
for
key
,
value
in
attachment
.
get
(
"replace_header_list"
,
[]):
part
.
replace_header
(
key
,
value
)
for
key
,
value
in
attachment
.
get
(
"header_dict"
,
{}).
items
():
# adds headers, does not replace or set
part
.
add_header
(
key
,
value
)
for
key
,
value
in
attachment
.
get
(
"add_header_list"
,
[]):
part
.
add_header
(
key
,
value
)
if
attachment
.
get
(
"filename"
,
None
)
is
not
None
:
part
.
add_header
(
"Content-Disposition"
,
"attachment"
,
attachment
[
"filename"
])
outer
.
attach
(
part
)
#return outer.as_string()
return
formatMultipartMessageToRFC2822String
(
outer
)
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
attachment_list, subtype="mixed", header_dict=None, param_list=(), replace_header_list=None, add_header_list=None, boundary=None
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_formatAttachmentListToMIMEMultipartString
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"ExternalMethod"
module=
"Products.ExternalMethod.ExternalMethod"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
_function
</string>
</key>
<value>
<string>
parseCssForUrl
</string>
</value>
</item>
<item>
<key>
<string>
_module
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_parseCssForUrl
</string>
</value>
</item>
<item>
<key>
<string>
title
</string>
</key>
<value>
<string></string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"ExternalMethod"
module=
"Products.ExternalMethod.ExternalMethod"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
_function
</string>
</key>
<value>
<string>
parseHtml
</string>
</value>
</item>
<item>
<key>
<string>
_module
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_parseHtml
</string>
</value>
</item>
<item>
<key>
<string>
title
</string>
</key>
<value>
<string></string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
0 → 100644
View file @
e7fee233
# TODO: domain names should be exported to a web site property.
# domain_dict = {}
# for web_site in portal_catalog(portal_type="Web Site", validation_state="published"):
# domain = web_site.getDomainName("")
# if domain != "":
# domain_dict[domain] = web_site
# return domain_dict
return
{}
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string></string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5Site_getWebSiteDomainDict
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
0 → 100644
View file @
e7fee233
"""
Export the web page and its components to a single (m)html file.
`format` parameter could also be "mhtml".
TODO: export same components into one mhtml attachment if possible.
"""
from
zExceptions
import
Unauthorized
from
base64
import
b64encode
,
b64decode
portal
=
context
.
getPortalObject
()
mhtml_message
=
{
"subtype"
:
"related"
,
"param_list"
:
[(
"type"
,
"text/html"
)],
"header_dict"
:
{
"From"
:
"<Saved by ERP5>"
,
"Subject"
:
"Untitled"
},
"attachment_list"
:
[],
}
def
main
():
data
=
context
.
getTextContent
(
""
).
decode
(
"utf-8"
)
data
=
""
.
join
([
fn
(
p
)
for
fn
,
p
in
handleHtmlPartList
(
parseHtml
(
data
))])
if
format
==
"mhtml"
:
mhtml_message
[
"header_dict"
][
"Subject"
]
=
context
.
getTitle
()
or
"Untitled"
mhtml_message
[
"attachment_list"
].
insert
(
0
,
{
"mime_type"
:
"text/html"
,
"encode"
:
"quoted-printable"
,
"add_header_list"
:
[(
"Content-Location"
,
context
.
absolute_url
())],
"data"
:
str
(
data
.
encode
(
"utf-8"
)),
})
res
=
context
.
Base_formatAttachmentListToMIMEMultipartString
(
**
mhtml_message
)
if
REQUEST
is
not
None
:
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Type"
,
"multipart/related"
)
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Disposition"
,
'attachment;filename="%s-%s-%s.mhtml"'
%
(
context
.
getReference
(
"untitled"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getVersion
(
"001"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getLanguage
(
"en"
).
replace
(
'"'
,
'
\
\
"'
),
))
return
res
if
REQUEST
is
not
None
:
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Type"
,
"text/html"
)
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Disposition"
,
'attachment;filename="%s-%s-%s.html"'
%
(
context
.
getReference
(
"untitled"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getVersion
(
"001"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getLanguage
(
"en"
).
replace
(
'"'
,
'
\
\
"'
),
))
return
data
def
handleHtmlTag
(
tag
,
attrs
):
#if tag == "base": and "href" in attrs: # should not exist in safe-html
# NotImplemented
if
tag
==
"object"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"data"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleImageSource
(
attrs
[
i
][
1
])
elif
tag
==
"img"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleImageSource
(
attrs
[
i
][
1
])
elif
tag
==
"link"
and
anny
(
attrs
,
key
=
lambda
a
:
a
[
0
]
==
"rel"
and
a
[
1
]
==
"stylesheet"
):
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"href"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
replaceFromDataUri
(
handleCssHref
(
attrs
[
i
][
1
]),
replaceCssUrl
)
elif
tag
==
"script"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleJsSource
(
attrs
[
i
][
1
])
else
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"href"
or
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
makeHrefAbsolute
(
attrs
[
i
][
1
])
return
tag
,
attrs
def
strHtmlPart
(
part
):
part_type
=
part
[
0
]
if
part_type
in
(
"starttag"
,
"startendtag"
):
tag
,
attrs
=
handleHtmlTag
(
part
[
1
],
part
[
2
])
attrs_str
=
" "
.
join
([
"%s=
\
"
%s
\
"
"
%
(
escapeHtml
(
k
),
escapeHtml
(
v
or
""
))
for
k
,
v
in
attrs
])
return
"<%s%s%s>"
%
(
tag
,
" "
+
attrs_str
if
attrs_str
else
""
,
" /"
if
part_type
==
"startendtag"
else
""
)
if
part_type
==
"endtag"
:
return
"</%s>"
%
part
[
1
]
if
part_type
==
"data"
:
return
part
[
1
]
if
part_type
==
"entityref"
:
return
"&%s;"
%
part
[
1
]
if
part_type
==
"charref"
:
return
"&#%s;"
%
part
[
1
]
if
part_type
==
"comment"
:
return
"<!--%s-->"
%
part
[
1
]
if
part_type
in
(
"decl"
,
"unknown_decl"
):
return
"<!%s>"
%
part
[
1
]
if
part_type
==
"pi"
:
return
"<?%s>"
%
part
[
1
]
disallow_script
=
not
allow_script
def
handleHtmlPartList
(
part_list
):
res
=
[]
style_data
=
""
on_script
=
False
on_style
=
False
for
part
in
part_list
:
if
on_script
:
if
part
[
0
]
==
"endtag"
and
part
[
1
]
==
"script"
:
on_script
=
False
# can only be data until </script> endtag
elif
on_style
:
if
part
[
0
]
==
"endtag"
and
part
[
1
]
==
"style"
:
res
.
append
((
replaceCssUrl
,
style_data
))
res
.
append
((
strHtmlPart
,
part
))
style_data
=
""
on_style
=
False
else
:
# can only be data until </style> endtag
style_data
+=
strHtmlPart
(
part
)
else
:
if
part
[
0
]
==
"starttag"
:
# when you save a page from a browser, every script tag are removed
if
part
[
1
]
==
"script"
and
disallow_script
:
on_script
=
True
continue
elif
part
[
1
]
==
"style"
:
on_style
=
True
res
.
append
((
strHtmlPart
,
part
))
return
res
def
handleCssHref
(
href
):
return
handleHref
(
href
)
def
handleJsSource
(
href
):
return
handleHref
(
href
)
def
handleHref
(
href
):
if
not
isHrefAUrl
(
href
):
return
href
try
:
o
=
traverseHref
(
href
)
except
(
KeyError
,
Unauthorized
):
return
makeHrefAbsolute
(
href
)
return
handleHrefObject
(
o
,
href
)
def
handleImageSource
(
src
):
if
not
isHrefAUrl
(
src
):
return
src
try
:
o
=
traverseHref
(
src
)
except
(
KeyError
,
Unauthorized
):
return
makeHrefAbsolute
(
src
)
return
handleImageSourceObject
(
o
,
src
)
def
replaceCssUrl
(
data
):
parts
=
context
.
Base_parseCssForUrl
(
data
)
data
=
""
for
part
in
parts
:
if
part
[
0
]
==
"url"
:
url
=
part
[
2
]
if
isHrefAUrl
(
url
):
data
+=
handleImageSource
(
url
)
else
:
data
+=
part
[
1
]
else
:
data
+=
part
[
1
]
return
data
def
handleImageSourceObject
(
o
,
src
):
if
hasattr
(
o
,
"convert"
):
search
=
parseUrlSearch
(
extractUrlSearch
(
src
))
format_kw
=
{}
for
k
,
x
in
search
:
if
k
==
"format"
and
x
is
not
None
:
format_kw
[
"format"
]
=
x
elif
k
==
"display"
and
x
is
not
None
:
format_kw
[
"display"
]
=
x
if
format_kw
:
mime
,
data
=
o
.
convert
(
**
format_kw
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleHrefObject
(
o
,
src
,
default_mimetype
=
bad_image_mime_type
,
default_data
=
bad_image_data
)
def
handleHrefObject
(
o
,
src
,
default_mimetype
=
"text/html"
,
default_data
=
"<p>Linked page not found</p>"
):
# handle File portal_skins/folder/file.png
# XXX handle "?portal_skin=" parameter ?
if
hasattr
(
o
,
"getContentType"
):
mime
=
o
.
getContentType
(
""
)
if
mime
:
data
=
getattr
(
o
,
"getData"
,
lambda
:
str
(
o
))()
or
""
if
isinstance
(
data
,
unicode
):
data
=
data
.
encode
(
"utf-8"
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleLinkedData
(
default_mimetype
,
default_data
,
src
)
# handle Object.view
# XXX handle url query parameters ? Not so easy because we need to
# use the same behavior as when we call a script from browser URL bar.
if
not
hasattr
(
o
,
"getPortalType"
)
and
callable
(
o
):
mime
,
data
=
"text/html"
,
o
()
if
isinstance
(
data
,
unicode
):
data
=
data
.
encode
(
"utf-8"
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleLinkedData
(
default_mimetype
,
default_data
,
src
)
bad_image_data_url
=
(
"data:image/png;base64,"
+
# little image showing cannot load image
"iVBORw0KGgoAAAANSUhEUgAAABEAAAATCAIAAAD5x3GmAAACWklEQVQokZWSX0hTcRTHz/"
+
"3TunMmMyxrQUzEPQSCFEI0fCi0HmSKdsUGg3q2h5I99dj7bE8Nw6cwLDb3kO7JP5m6h0TE"
+
"CmYQjJYgpaPc7q67+93de04P0zvnQ+CP78Pvdzgfzjnf3+GICE55+NMCACACACKOj49rmv"
+
"afvNHRUZ4/KkBEjLFQKJRTjXyRTqigUSwWI6JQKGSaJhEREQ8ApmkCgFrif+8bJ7RfMAGA"
+
"MRYMBsPhMCLWzFPUUdVI1cjjEj0usXLXdLJ6sTCx2jIBAd1otVVe11vPbKT1iqeJRMLKKp"
+
"fLVYaoChxGEAwDbt0ZsNs4ABAEbiLyoqYOEax/ZyfsYmX4q5iCAABQd1aoen3UGmDt/zod"
+
"/EWnuJczcgcIABzHu91um81W9YCI8Jga6rirqUV41O9pQqeDR6J6iRvs7VUeDFQZJCKEih"
+
"DxfINemIioq4ms7GtrwkaH4KovZ2WfujLL1/SGiIgZZSmavj2Veto0GYXO7vzawo7saztX"
+
"3JF9+bUF6Oyu8YAAtnLvNrJBAOPb7lbkizQyPZuWfX8+LeTaG00NHDe7r8Rmju0oQaawVA"
+
"Eqga+/Xkc+B1vexDSJzx+AJvEtk1FDEHjLAEXfXdt7ZgEA0H754UjH2GZgWFGR2UVFxc3A"
+
"sIh4yDDGFjPPdfxhAdea/Y87xpJy//bqnN3b05XK2r0928n55P2+w3kMw9CXmy/AE4u5Fw"
+
"h89A/tLM9d6urxTr9/G4/74zMfBvt+rsxzRKTruqIojNUsgSRJB+vrqVcv705Fc8ViqVSS"
+
"JMnpcMz5h/4B1Qxz9NOjZCgAAAAASUVORK5CYII="
)
bad_image_data
=
b64decode
(
bad_image_data_url
.
split
(
","
,
1
)[
1
])
bad_image_mime_type
=
"image/png"
request_protocol
=
context
.
REQUEST
.
SERVER_URL
.
split
(
":"
,
1
)[
0
]
+
":"
site_object_dict
=
context
.
ERP5Site_getWebSiteDomainDict
()
base_url_root_object
=
portal
base_url_object
=
context
def
handleLinkedData
(
mime
,
data
,
href
):
if
format
==
"mhtml"
:
url
=
makeHrefAbsolute
(
href
)
mhtml_message
[
"attachment_list"
].
append
({
"mime_type"
:
mime
,
"encode"
:
"quoted-printable"
if
mime
.
startswith
(
"text/"
)
else
None
,
"add_header_list"
:
[(
"Content-Location"
,
url
)],
"data"
:
str
(
data
),
})
return
url
else
:
return
"data:%s;base64,%s"
%
(
mime
,
b64encode
(
data
))
def
makeHrefAbsolute
(
href
):
if
isHrefAnAbsoluteUrl
(
href
)
or
not
isHrefAUrl
(
href
):
return
href
if
href
.
startswith
(
"//"
):
return
request_protocol
+
href
if
href
.
startswith
(
"/"
):
return
base_url_root_object
.
absolute_url
()
+
href
return
base_url_object
.
absolute_url
()
+
"/"
+
href
def
isHrefAnAbsoluteUrl
(
href
):
return
href
.
startswith
(
"https://"
)
or
href
.
startswith
(
"http://"
)
def
isHrefAUrl
(
href
):
return
href
.
startswith
(
"https://"
)
or
href
.
startswith
(
"http://"
)
or
not
href
.
split
(
":"
,
1
)[
0
].
isalpha
()
def
traverseHref
(
url
,
allow_hash
=
False
):
url
=
url
.
split
(
"?"
)[
0
]
if
not
allow_hash
:
url
=
url
.
split
(
"#"
)[
0
]
if
url
.
startswith
(
"https://"
)
or
url
.
startswith
(
"http://"
)
or
url
.
startswith
(
"//"
):
# absolute url possibly on other sites
site_url
=
"/"
.
join
(
url
.
split
(
"/"
,
3
)[:
3
])
domain
=
url
.
split
(
"/"
,
3
)[
2
]
relative_path
=
url
[
len
(
site_url
):]
relative_path
=
(
relative_path
[
1
:]
if
relative_path
[:
1
]
==
"/"
else
relative_path
)
site_object
=
site_object_dict
.
get
(
domain
)
if
site_object
is
None
:
raise
KeyError
(
relative_path
.
split
(
"/"
)[
0
])
return
site_object
.
restrictedTraverse
(
str
(
relative_path
))
if
url
.
startswith
(
"/"
):
# absolute path, relative url
return
base_url_root_object
.
restrictedTraverse
(
str
(
url
[
1
:]))
# relative url (just use a base url)
return
base_url_object
.
restrictedTraverse
(
str
(
url
))
def
replaceFromDataUri
(
data_uri
,
replacer
):
header
,
data
=
data_uri
.
split
(
","
)
if
"text/css"
not
in
header
:
return
data_uri
is_base64
=
False
if
";base64"
in
header
:
is_base64
=
True
data
=
b64decode
(
data
)
data
=
replacer
(
data
)
return
"%s,%s"
%
(
header
,
b64encode
(
data
)
if
is_base64
else
data
)
def
extractUrlSearch
(
url
):
url
=
url
.
split
(
"#"
,
1
)[
0
].
split
(
"?"
,
1
)
url
[
0
]
=
""
return
"?"
.
join
(
url
)
def
parseUrlSearch
(
search
):
if
search
[:
1
]
==
"?"
:
search
=
search
[
1
:]
result
=
[]
for
part
in
search
.
split
(
"&"
):
k
=
part
.
split
(
"="
)
v
=
"="
.
join
(
k
[
1
:])
if
len
(
k
)
else
None
result
.
append
((
k
[
0
],
v
))
return
result
def
parseHtml
(
text
):
return
context
.
Base_parseHtml
(
text
)
def
escapeHtml
(
s
):
return
s
.
replace
(
"&"
,
"&"
).
replace
(
"<"
,
"<"
).
replace
(
">"
,
">"
).
replace
(
"
\
"
"
,
"""
)
def
anny
(
iterable
,
key
=
None
):
for
i
in
iterable
:
if
key
:
i
=
key
(
i
)
if
i
:
return
True
return
False
return
main
()
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
REQUEST=None, allow_script=False, format="embedded_html"
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
WebPage_exportAsSingleFile
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment