Commit 1dee50a7 authored by Jérome Perrin's avatar Jérome Perrin

web, PortalTransforms: py3

parent ea1d391a
......@@ -174,11 +174,7 @@ class WebSite(WebSection):
if sub_path in section_dict:
del section_dict[sub_path]
section_list = section_dict.values()
# Sort by Index
section_list.sort(key=lambda x: x.getIntIndex())
return section_list
return sorted(section_dict.values(), key=lambda x: x.getIntIndex())
else:
return []
\ No newline at end of file
......@@ -14,7 +14,8 @@ TODO: export same components into one mhtml attachment if possible.
"""
# ERP5 web uses format= argument, which is also a python builtin
# pylint: disable=redefined-builtin
import six
from Products.PythonScripts.standard import html_quote
from zExceptions import Unauthorized
from base64 import b64encode, b64decode
portal = context.getPortalObject()
......@@ -27,10 +28,11 @@ mhtml_message = {
}
def main(data):
if isinstance(data, str):
if isinstance(data, bytes):
data = data.decode("utf-8")
data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))])
data = data.encode("utf-8")
if six.PY2:
data = data.encode("utf-8")
if format == "mhtml":
mhtml_message["attachment_list"].insert(0, {
"mime_type": "text/html",
......@@ -75,7 +77,7 @@ def strHtmlPart(part):
part_type = part[0]
if part_type in ("starttag", "startendtag"):
tag, attrs = handleHtmlTag(part[1], part[2])
attrs_str = " ".join(["%s=\"%s\"" % (escapeHtml(k), escapeHtml(v or "")) for k, v in attrs])
attrs_str = " ".join(["%s=\"%s\"" % (html_quote(k), html_quote(v or "")) for k, v in attrs])
return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "")
if part_type == "endtag":
return "</%s>" % part[1]
......@@ -191,7 +193,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
data = str(obj.data or "")
else:
data = getattr(obj, "getData", lambda: str(obj))() or ""
if isinstance(data, unicode):
if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8")
return handleLinkedData(mime, data, src)
return handleLinkedData(default_mimetype, default_data, src)
......@@ -201,7 +203,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
# use the same behavior as when we call a script from browser URL bar.
if not hasattr(obj, "getPortalType") and callable(obj):
mime, data = "text/html", obj()
if isinstance(data, unicode):
if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8")
return handleLinkedData(mime, data, src)
......@@ -270,7 +272,7 @@ def handleLinkedData(mime, data, href):
})
return url
else:
return "data:%s;base64,%s" % (mime, b64encode(data))
return "data:%s;base64,%s" % (mime, b64encode(data.encode()).decode())
def makeHrefAbsolute(href):
if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href):
......@@ -325,7 +327,8 @@ def replaceFromDataUri(data_uri, replacer):
if ";base64" in header:
is_base64 = True
data = b64decode(data)
data = replacer(data)
if not is_base64:
data = replacer(data)
return "%s,%s" % (header, b64encode(data) if is_base64 else data)
def extractUrlSearch(url):
......@@ -346,9 +349,6 @@ def parseUrlSearch(search):
def parseHtml(text):
return context.Base_parseHtml(text)
def escapeHtml(text):
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def anny(iterable, key=None):
for i in iterable:
if key:
......
......@@ -268,6 +268,7 @@ class TestERP5Web(ERP5TypeTestCase):
page.edit(text_content='<p>Hé Hé Hé!</p>', content_type='text/html')
self.tic()
self.assertEqual('Hé Hé Hé!', page.asText().strip())
self.assertIn('Hé Hé Hé!', page.getSearchableText())
def test_WebPageAsTextHTMLEntities(self):
"""Check if Web Page's asText() converts html entities properly
......@@ -1032,12 +1033,10 @@ Hé Hé Hé!""", page.asText().strip())
web_section_portal_type = 'Web Section'
web_section = website.newContent(portal_type=web_section_portal_type)
content = '<p>initial text</p>'
new_content = '<p>modified text<p>'
document = portal.web_page_module.newContent(portal_type='Web Page',
id='document_cache',
reference='NXD-Document.Cache',
text_content=content)
text_content='<p>initial text</p>')
document.publish()
self.tic()
self.assertEqual(document.asText().strip(), 'initial text')
......@@ -1051,15 +1050,15 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1)
self.assertIn(b'<p>initial text</p>', response.getBody())
# Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1)
self.assertIn(b'<p>initial text</p>', response.getBody())
# modified the web_page content
document.edit(text_content=new_content)
document.edit(text_content='<p>modified text<p>')
self.assertEqual(document.asText().strip(), 'modified text')
self.tic()
......@@ -1067,12 +1066,12 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1)
self.assertIn(b'<p>modified</p>', response.getBody())
# Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1)
self.assertIn(b'<p>modified</p>', response.getBody())
def test_13a_DocumentMovedCache(self):
"""
......@@ -1123,12 +1122,10 @@ Hé Hé Hé!""", page.asText().strip())
web_section_portal_type = 'Web Section'
web_section = website.newContent(portal_type=web_section_portal_type)
content = '<p>initial text</p>'
new_content = '<p>modified text</p>'
document = portal.web_page_module.newContent(portal_type='Web Page',
id='document_cache',
reference='NXD-Document.Cache',
text_content=content)
text_content='<p>initial text</p>')
document.publish()
self.tic()
self.assertEqual(document.asText().strip(), 'initial text')
......@@ -1136,16 +1133,16 @@ Hé Hé Hé!""", page.asText().strip())
# Through the web_site.
path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1)
self.assertIn(b'<p>initial text</p>', response.getBody())
# Through a web_section.
path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(content), -1)
self.assertIn(b'<p>initial text</p>', response.getBody())
# Modify the web_page content
# Use unrestrictedTraverse (XXX-JPS reason unknown)
web_document = website.unrestrictedTraverse('web_page_module/%s' % document.getId())
web_document.edit(text_content=new_content)
web_document.edit(text_content='<p>modified text</p>')
# Make sure cached is emptied
self.assertFalse(web_document.hasConversion(format='txt'))
self.assertFalse(document.hasConversion(format='txt'))
......@@ -1170,14 +1167,14 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(web_document.asText().strip(), 'modified text')
path = web_section.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1)
self.assertIn(b'<p>modified text</p>', response.getBody())
# Through a web_site.
web_document = website.restrictedTraverse('NXD-Document.Cache')
self.assertEqual(web_document.asText().strip(), 'modified text')
path = website.absolute_url_path() + '/NXD-Document.Cache'
response = self.publish(path, self.credential)
self.assertNotEqual(response.getBody().find(new_content), -1)
self.assertIn(b'<p>modified text</p>', response.getBody())
def test_14_AccessWebSiteForWithDifferentUserPreferences(self):
"""Check that Ram Cache Manager do not mix websection
......@@ -1239,18 +1236,18 @@ Hé Hé Hé!""", page.asText().strip())
# connect as administrator and check that only developper_mode is enable
response = self.publish(websection_url, 'administrator:administrator')
self.assertIn('manage_main', response.getBody())
self.assertNotIn('manage_messages', response.getBody())
self.assertIn(b'manage_main', response.getBody())
self.assertNotIn(b'manage_messages', response.getBody())
# connect as webeditor and check that only translator_mode is enable
response = self.publish(websection_url, 'webeditor:webeditor')
self.assertNotIn('manage_main', response.getBody())
self.assertIn('manage_messages', response.getBody())
self.assertNotIn(b'manage_main', response.getBody())
self.assertIn(b'manage_messages', response.getBody())
# anonymous user doesn't exists, check anonymous access without preferences
response = self.publish(websection_url, 'anonymous:anonymous')
self.assertNotIn('manage_main', response.getBody())
self.assertNotIn('manage_messages', response.getBody())
self.assertNotIn(b'manage_main', response.getBody())
self.assertNotIn(b'manage_messages', response.getBody())
def test_15_Check_LastModified_Header(self):
"""Checks that Last-Modified header set by caching policy manager
......@@ -1416,7 +1413,7 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(HTTP_OK, response.getStatus())
self.assertEqual('text/html; charset=utf-8',
response.getHeader('content-type'))
self.assertIn("Data updated.", response.getBody())
self.assertIn(b"Data updated.", response.getBody())
self.tic()
......@@ -1472,7 +1469,7 @@ Hé Hé Hé!""", page.asText().strip())
self.assertEqual(HTTP_OK, response.getStatus())
self.assertEqual('text/html; charset=utf-8',
response.getHeader('content-type'))
self.assertIn("Data updated.", response.getBody())
self.assertIn(b"Data updated.", response.getBody())
self.tic()
......
......@@ -163,7 +163,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
if mime_type == 'text/html':
mime_type = 'text/x-html-safe'
if src_mimetype != "image/svg+xml":
result = portal_transforms.convertToData(mime_type, text_content,
if six.PY2:
data = text_content
else:
data = text_content.encode()
result = portal_transforms.convertToData(mime_type, data,
object=self, context=self,
filename=filename,
mimetype=src_mimetype,
......@@ -373,6 +377,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
text_content, content_type)
else:
message = 'Conversion to base format succeeds'
# TODO(zope4py3): rethink this, shouldn't we store bytes in base data ?
self._setBaseData(text_content)
self._setBaseContentType(content_type)
return message
......@@ -385,15 +390,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
self._checkConversionFormatPermission(None)
if default is _MARKER:
text_content = self._baseGetTextContent()
text_content = self._baseGetTextContent(default)
else:
text_content = self._baseGetTextContent(default)
if isinstance(text_content, bytes):
# XXX Zope4py3: should this return str ??
# TODO(Zope4py3): should this return str ??
# We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string.
# XXX what about _convertToBaseFormat/guessCharsetAndConvert ???
LOG('TextDocument', WARNING, "getTextContent with bytes %s" % text_content)
try:
text_content = text_content.decode('utf-8')
except UnicodeDecodeError:
......
......@@ -32,6 +32,7 @@ from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions
from warnings import warn
class TextConvertableMixin:
"""
This class provides a generic implementation of ITextConvertable.
......@@ -46,9 +47,9 @@ class TextConvertableMixin:
"""
Converts the current document to plain text
"""
kw.pop('format', None)
_, data = self.convert(format='txt', **kw)
return str(data)
kw['format'] = 'txt'
_, data = self.convert(**kw)
return data
security.declareProtected(Permissions.AccessContentsInformation,
'asRawText')
......@@ -56,9 +57,9 @@ class TextConvertableMixin:
"""
Converts the current document to plain text without substitution
"""
kw.pop('format', None)
_, data = self.convert(format='txt', substitute=False, **kw)
return str(data)
kw['format'] = 'txt'
kw['substitute'] = False
return self.asText(**kw)
security.declareProtected(Permissions.AccessContentsInformation,
'asTextContent')
......
......@@ -20,8 +20,15 @@ from Products.PortalTransforms.transforms.broken import BrokenTransform
def import_from_name(module_name):
""" import and return a module by its name """
return __import__(module_name, {}, {}, module_name)
"""import and return a module by its name"""
__traceback_info__ = (module_name,)
m = __import__(module_name)
try:
for sub in module_name.split(".")[1:]:
m = getattr(m, sub)
except AttributeError as e:
raise ImportError(str(e))
return m
def make_config_persistent(kwargs):
""" iterates on the given dictionnary and replace list by persistent list,
......
......@@ -154,7 +154,7 @@ class subprocesstransform:
try:
if not self.useStdin:
stdin_file = tempfile.NamedTemporaryFile()
stdin_file.write( data)
stdin_file.write(data)
stdin_file.seek(0)
command = command % {'infile': stdin_file.name} # apply tmp name to command
data = None
......
......@@ -230,9 +230,4 @@ class IllegalHTML( ValueError ):
# j = i + len(toHandle)
# return j
# def scrubHTML( html ):
# """ Strip illegal HTML tags from string text. """
# parser = StrippingParser()
# parser.feed( html )
# parser.close()
# return parser.result
from Products.PortalTransforms.transforms.safe_html import scrubHTML
......@@ -21,14 +21,14 @@ def register():
return unichr(result).encode('utf-8')
return html_to_text("html_to_text",
('<script [^>]>.*</script>(?im)', ' '),
('<style [^>]>.*</style>(?im)', ' '),
('<head [^>]>.*</head>(?im)', ' '),
('(?im)<script [^>]>.*</script>', ' '),
('(?im)<style [^>]>.*</style>', ' '),
('(?im)<head [^>]>.*</head>', ' '),
# added for ERP5, we want to transform <br/> in newlines
('<br\s*/?>(?im)', '\n'),
('(?im)<br\s*/?>', '\n'),
('(?im)</?(font|em|i|strong|b)(?=\W)[^>]*>', ''),
('<[^>]*>(?i)(?m)', ' '),
('(?i)(?m)<[^>]*>', ' '),
(r'&([a-zA-Z0-9#]*?);', sub_func),
)
......@@ -226,7 +226,7 @@ class StrippingParser(HTMLParser):
def handle_data(self, data):
if self.suppress: return
data = html_quote(data)
if self.original_charset and isinstance(data, str):
if self.original_charset and isinstance(data, bytes):
data = data.decode(self.original_charset)
self.result.append(data)
......@@ -332,7 +332,7 @@ class StrippingParser(HTMLParser):
k = len(self.rawdata)
data = self.rawdata[i+9:k]
j = k+3
if self.original_charset and isinstance(data, str):
if self.original_charset and isinstance(data, bytes):
data = data.decode(self.original_charset)
self.result.append("<![CDATA[%s]]>" % data)
else:
......@@ -378,7 +378,7 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS,
parser.feed(html)
parser.close()
result = parser.getResult()
if parser.original_charset and isinstance(result, str):
if parser.original_charset and isinstance(result, bytes):
result = result.decode(parser.original_charset).encode(default_encoding)
return result
......
import six
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implementer
from DocumentTemplate.html_quote import html_quote
......@@ -30,6 +31,7 @@ class TextPreToHTML:
raise AttributeError(attr)
def convert(self, orig, data, **kwargs):
orig = six.ensure_text(orig, errors='replace')
data.setData('<pre class="data">%s</pre>' % html_quote(orig))
return data
......
import six
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implementer
from DocumentTemplate.html_quote import html_quote
......@@ -30,6 +31,7 @@ class TextToHTML:
raise AttributeError(attr)
def convert(self, orig, data, **kwargs):
orig = six.ensure_text(orig, errors='replace')
# Replaces all line breaks with a br tag, and wraps it in a p tag.
data.setData('<p>%s</p>' % html_quote(orig.strip()).replace('\n', '<br />'))
return data
......
......@@ -2,12 +2,13 @@
"""some common utilities
"""
import io
import six
if six.PY2:
from email import message_from_file as message_from_bytes
else:
from email import message_from_bytes
from six.moves import cStringIO as StringIO
class TransformException(Exception):
pass
......@@ -35,4 +36,6 @@ def safeToInt(value):
def parseContentType(content_type):
"""Parses `text/plain;charset="utf-8"` to a email.Message object"""
return message_from_bytes(StringIO("Content-Type:" + content_type.replace("\r\n", "\r\n\t")))
return message_from_bytes(
b"Content-Type:"
+ content_type.replace("\r\n", "\r\n\t").encode('utf-8'))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment