Commit 0601dfd6 authored by Andreas Jung's avatar Andreas Jung

Backported several encoding and webdav fixes for ZPT from the HEAD

parents 93fe1e0b 237c0626
......@@ -14,6 +14,16 @@ Zope Changes
- added Python 2.4.4 as optimal Python version to 'configure'
- the ZopePageTemplate implementation now uses unicode internally.
Non-unicode instances are migrated on-the-fly to unicode. However this
will work only properly for ZPT instances formerly encoded as utf-8 or
ISO-8859-15. For other encodings you might set the environment variable
ZPT_REFERRED_ENCODING to insert your preferred encoding in front of utf-8 and
ISO-8859-15 within the encoding sniffer code. In addition there is a new
'output_encodings' property that controls the conversion from/to unicode
for WebDAV/FTP operations.
Zope 2.10.1 (2006-11-22)
Bugs fixed
......
......@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
LOG = getLogger('PageTemplateFile')
def guess_type(filename, text):
# check for XML ourself since guess_content_type can't
# detect text/xml if 'filename' won't end with .xml
# XXX: fix this in zope.contenttype
if text.startswith('<?xml'):
return 'text/xml'
content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'):
return content_type
......
......@@ -40,20 +40,14 @@ from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplateFile import guess_type
from Products.PageTemplates.Expressions import SecureModuleImporter
# regular expression to extract the encoding from the XML preamble
encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M)
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv, convertToUnicode
preferred_encodings = ['utf-8', 'iso-8859-15']
if os.environ.has_key('ZPT_PREFERRED_ENCODING'):
preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING'])
def sniffEncoding(text, default_encoding='utf-8'):
"""Try to determine the encoding from html or xml"""
if text.startswith('<?xml'):
mo = encoding_reg.search(text)
if mo:
return mo.group(1)
return default_encoding
class Src(Acquisition.Explicit):
""" I am scary code """
......@@ -76,10 +70,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
__implements__ = (WriteLockInterface,)
meta_type = 'Page Template'
output_encoding = 'iso-8859-15' # provide default for old instances
func_defaults = None
func_code = FuncCode((), 0)
strict = False
_default_bindings = {'name_subpath': 'traverse_subpath'}
_default_content_fn = os.path.join(package_home(globals()),
......@@ -97,6 +91,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
_properties=({'id':'title', 'type': 'ustring', 'mode': 'w'},
{'id':'content_type', 'type':'string', 'mode': 'w'},
{'id':'output_encoding', 'type':'string', 'mode': 'w'},
{'id':'expand', 'type':'boolean', 'mode': 'w'},
)
......@@ -108,22 +103,68 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(view_management_screens,
'read', 'ZScriptHTML_tryForm')
def __init__(self, id, text=None, content_type=None, encoding='utf-8',
strict=False):
def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'):
self.id = id
self.expand = 0
self.strict = strict
self.ZBindings_edit(self._default_bindings)
self.output_encoding = output_encoding
# default content
if not text:
text = open(self._default_content_fn).read()
encoding = 'utf-8'
content_type = 'text/html'
self.pt_edit(text, content_type, encoding)
self.pt_edit(text, content_type)
security.declareProtected(change_page_templates, 'pt_edit')
def pt_edit(self, text, content_type, encoding='utf-8'):
def pt_edit(self, text, content_type, keep_output_encoding=False):
text = text.strip()
if self.strict and not isinstance(text, unicode):
is_unicode = isinstance(text, unicode)
encoding = None
output_encoding = None
if content_type == 'text/xml':
if is_unicode:
encoding = None
output_encoding = 'utf-8'
else:
encoding = encodingFromXMLPreamble(text)
output_encoding = 'utf-8'
elif content_type == 'text/html':
charset = charsetFromMetaEquiv(text)
if is_unicode:
if charset:
encoding = None
output_encoding = charset
else:
encoding = None
output_encoding = 'iso-8859-15'
else:
if charset:
encoding = charset
output_encoding = charset
else:
encoding = 'iso-8859-15'
output_encoding = 'iso-8859-15'
else:
raise ValueError('Unsupported content-type %s' % content_type)
# for content updated through WebDAV, FTP
if not keep_output_encoding:
self.output_encoding = output_encoding
if not is_unicode:
text = unicode(text, encoding)
self.ZCacheable_invalidate()
......@@ -137,16 +178,21 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
source_dot_xml = Src()
security.declareProtected(change_page_templates, 'pt_editAction')
def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand):
def pt_editAction(self, REQUEST, title, text, content_type, expand):
"""Change the title and document."""
if self.wl_isLocked():
raise ResourceLockedError("File is locked via WebDAV")
self.expand = expand
self.pt_setTitle(title, encoding)
self.pt_edit(text, content_type, encoding)
# The ZMI edit view uses utf-8! So we can safely assume
# that 'title' and 'text' are utf-8 encoded strings - hopefully
self.pt_setTitle(title, 'utf-8')
text = unicode(text, 'utf-8')
self.pt_edit(text, content_type, True)
REQUEST.set('text', self.read()) # May not equal 'text'!
REQUEST.set('title', self.title)
message = "Saved changes."
......@@ -155,9 +201,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
% '<br>'.join(self._v_warnings))
return self.pt_editForm(manage_tabs_message=message)
security.declareProtected(change_page_templates, 'pt_setTitle')
def pt_setTitle(self, title, encoding='utf-8'):
if self.strict and not isinstance(title, unicode):
if not isinstance(title, unicode):
title = unicode(title, encoding)
self._setPropValue('title', title)
......@@ -186,8 +233,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
if not content_type in ('text/html', 'text/xml'):
raise ValueError('Unsupported mimetype: %s' % content_type)
encoding = sniffEncoding(text, encoding)
self.pt_edit(text, content_type, encoding)
self.pt_edit(text, content_type)
return self.pt_editForm(manage_tabs_message='Saved changes')
security.declareProtected(change_page_templates, 'pt_changePrefs')
......@@ -240,6 +286,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
return c
def write(self, text):
if not isinstance(text, unicode):
raise TypeError("'text' parameter must be unicode")
self.ZCacheable_invalidate()
ZopePageTemplate.inheritedAttribute('write')(self, text)
......@@ -289,10 +337,12 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(change_page_templates, 'PUT')
def PUT(self, REQUEST, RESPONSE):
""" Handle HTTP PUT requests """
self.dav__init(REQUEST, RESPONSE)
self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1)
## XXX this should be unicode or we must pass an encoding
self.pt_edit(REQUEST.get('BODY', ''))
text = REQUEST.get('BODY', '')
content_type = guess_type('', text)
self.pt_edit(text, content_type)
RESPONSE.setStatus(204)
return RESPONSE
......@@ -303,8 +353,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(ftp_access, 'manage_FTPget')
def manage_FTPget(self):
"Get source for FTP download"
self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type)
return self.read()
result = self.pt_render()
return result.encode(self.output_encoding)
security.declareProtected(view_management_screens, 'html')
def html(self):
......@@ -353,34 +403,28 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
# acquisition context, so we don't know where it is. :-(
return None
def __setstate__(self, state):
# Perform on-the-fly migration to unicode.
# Perhaps it might be work with the 'generation' module here?
if not isinstance(state['_text'], unicode):
text, encoding = convertToUnicode(state['_text'],
state.get('content_type', 'text/html'),
preferred_encodings)
state['_text'] = text
state['output_encoding'] = encoding
self.__dict__.update(state)
def pt_render(self, source=False, extra_context={}):
result = PageTemplate.pt_render(self, source, extra_context)
assert isinstance(result, unicode)
return result
def wl_isLocked(self):
return 0
def manage_convertUnicode(self, preferred_encodings=preferred_encodings,
RESPONSE=None):
"""Convert non-unicode templates to unicode"""
if not isinstance(self._text, unicode):
for encoding in preferred_encodings:
try:
self._text = unicode(self._text, encoding)
if RESPONSE:
return RESPONSE.redirect(self.absolute_url() +
'/pt_editForm?manage_tabs_message='
'ZPT+successfully+converted')
else:
return
except UnicodeDecodeError:
pass
raise RuntimeError('Pagetemplate could not be converted to unicode')
else:
if RESPONSE:
return RESPONSE.redirect(self.absolute_url() +
'/pt_editForm?manage_tabs_message='
'ZPT+already+converted')
else:
return
InitializeClass(ZopePageTemplate)
......@@ -407,7 +451,7 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
else:
if hasattr(text, 'read'):
......@@ -418,9 +462,14 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, encoding)
# ensure that we pass unicode to the constructor to
# avoid further hassles with pt_edit()
if not isinstance(text, unicode):
text = unicode(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding)
zpt.pt_setTitle(title, encoding)
self._setObject(id, zpt)
zpt = getattr(self, id)
......
# -*- encoding: iso-8859-15 -*-
"""ZopePageTemplate regression tests.
Ensures that adding a page template works correctly.
......@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7
"""
import unittest
import Zope2
import transaction
import zope.component.testing
from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
iso885915_str = '<html><body></body></html>'
utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
xml_template = '''<?xml vesion="1.0" encoding="%s"?>
<foo>
</foo>
'''
xml_iso_8859_15 = xml_template % 'iso-8859-15'
xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_w_header = '''
<html>
<head>
<META http-equiv="content-type" content="text/html; charset=%s">
</hed>
<body>
test
</body>
</html>
'''
html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_wo_header = '''
<html>
<body>
test
</body>
</html>
'''
html_iso_8859_15_wo_header = html_template_wo_header
html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testWriteAcceptsUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
s = u'this is unicode'
zpt.write(s)
self.assertEqual(zpt.read(), s)
self.assertEqual(isinstance(zpt.read(), unicode), True)
def testWriteWontAcceptsNonUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
self.assertRaises(TypeError, zpt.write, 'this is not unicode')
def _createZPT(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
return zpt
def _makePUTRequest(self, body):
return {'BODY' : body}
def _put(self, text):
zpt = self._createZPT()
REQUEST = self.app.REQUEST
REQUEST.set('BODY', text)
zpt.PUT(REQUEST, REQUEST.RESPONSE)
return zpt
def testPutHTMLIso8859_15WithCharsetInfo(self):
zpt = self._put(html_iso_8859_15_w_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithCharsetInfo(self):
zpt = self._put(html_utf8_w_header)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLIso8859_15WithoutCharsetInfo(self):
zpt = self._put(html_iso_8859_15_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithoutCharsetInfo(self):
zpt = self._put(html_utf8_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutXMLIso8859_15(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_iso_8859_15)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
def testPutXMLUTF8(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_utf8)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
class ZPTRegressions(unittest.TestCase):
......@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text)
def test_BBB_for_strict_attribute(self):
# Collector 2213: old templates don't have 'strict' attribute.
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
zpt = ZopePageTemplate('issue_2213')
del zpt.strict # simulate old templates
self.assertEqual(zpt.strict, False)
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
......@@ -132,7 +276,9 @@ class DummyFileUpload:
def test_suite():
suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
def convertToUnicode(source, content_type, preferred_encodings):
""" Convert 'source' to unicode.
Returns (unicode_str, source_encoding).
"""
if content_type.startswith('text/xml'):
encoding = encodingFromXMLPreamble(source)
return unicode(source, encoding), encoding
elif content_type.startswith('text/html'):
encoding = charsetFromMetaEquiv(source)
# Try to detect the encoding by converting it unicode without raising
# exceptions. There are some smarter Python-based sniffer methods
# available however we have to check their licenses first before
# including them into the Zope 2 core
if not encoding:
for enc in preferred_encodings:
try:
return unicode(source, enc), enc
except UnicodeDecodeError:
continue
raise TypeError('Could not auto-detect encoding')
else:
raise ValueError('Unsupported content-type: %s' % content_type)
<html>
<head>
<title tal:content="template/title">The title</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
......
......@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div>
</td>
<td align="left" valign="top" colspan="2">
<td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br />
......@@ -45,7 +45,6 @@
</tr>
<tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td>
<td align="left" valign="middle" style="background-color: #FFDDDD"
colspan="3">
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment