Commit 0601dfd6 authored by Andreas Jung's avatar Andreas Jung

Backported several encoding and webdav fixes for ZPT from the HEAD

parents 93fe1e0b 237c0626
...@@ -14,6 +14,16 @@ Zope Changes ...@@ -14,6 +14,16 @@ Zope Changes
- added Python 2.4.4 as optimal Python version to 'configure' - added Python 2.4.4 as optimal Python version to 'configure'
- the ZopePageTemplate implementation now uses unicode internally.
Non-unicode instances are migrated on-the-fly to unicode. However this
will work only properly for ZPT instances formerly encoded as utf-8 or
ISO-8859-15. For other encodings you might set the environment variable
ZPT_REFERRED_ENCODING to insert your preferred encoding in front of utf-8 and
ISO-8859-15 within the encoding sniffer code. In addition there is a new
'output_encodings' property that controls the conversion from/to unicode
for WebDAV/FTP operations.
Zope 2.10.1 (2006-11-22) Zope 2.10.1 (2006-11-22)
Bugs fixed Bugs fixed
......
...@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type ...@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
LOG = getLogger('PageTemplateFile') LOG = getLogger('PageTemplateFile')
def guess_type(filename, text): def guess_type(filename, text):
# check for XML ourself since guess_content_type can't
# detect text/xml if 'filename' won't end with .xml
# XXX: fix this in zope.contenttype
if text.startswith('<?xml'):
return 'text/xml'
content_type, dummy = guess_content_type(filename, text) content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'): if content_type in ('text/html', 'text/xml'):
return content_type return content_type
......
# -*- encoding: iso-8859-15 -*-
"""ZopePageTemplate regression tests. """ZopePageTemplate regression tests.
Ensures that adding a page template works correctly. Ensures that adding a page template works correctly.
...@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7 ...@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7
""" """
import unittest import unittest
import Zope2 import Zope2
import transaction import transaction
import zope.component.testing import zope.component.testing
from zope.traversing.adapters import DefaultTraversable from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
iso885915_str = '<html><body></body></html>'
utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
xml_template = '''<?xml vesion="1.0" encoding="%s"?>
<foo>
</foo>
'''
xml_iso_8859_15 = xml_template % 'iso-8859-15'
xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_w_header = '''
<html>
<head>
<META http-equiv="content-type" content="text/html; charset=%s">
</hed>
<body>
test
</body>
</html>
'''
html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_wo_header = '''
<html>
<body>
test
</body>
</html>
'''
html_iso_8859_15_wo_header = html_template_wo_header
html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testWriteAcceptsUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
s = u'this is unicode'
zpt.write(s)
self.assertEqual(zpt.read(), s)
self.assertEqual(isinstance(zpt.read(), unicode), True)
def testWriteWontAcceptsNonUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
self.assertRaises(TypeError, zpt.write, 'this is not unicode')
def _createZPT(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
return zpt
def _makePUTRequest(self, body):
return {'BODY' : body}
def _put(self, text):
zpt = self._createZPT()
REQUEST = self.app.REQUEST
REQUEST.set('BODY', text)
zpt.PUT(REQUEST, REQUEST.RESPONSE)
return zpt
def testPutHTMLIso8859_15WithCharsetInfo(self):
zpt = self._put(html_iso_8859_15_w_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithCharsetInfo(self):
zpt = self._put(html_utf8_w_header)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLIso8859_15WithoutCharsetInfo(self):
zpt = self._put(html_iso_8859_15_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithoutCharsetInfo(self):
zpt = self._put(html_utf8_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutXMLIso8859_15(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_iso_8859_15)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
def testPutXMLUTF8(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_utf8)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
class ZPTRegressions(unittest.TestCase): class ZPTRegressions(unittest.TestCase):
...@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase): ...@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
pt = self.app.pt1 pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text) self.assertEqual(pt.document_src(), self.text)
def test_BBB_for_strict_attribute(self):
# Collector 2213: old templates don't have 'strict' attribute.
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
zpt = ZopePageTemplate('issue_2213')
del zpt.strict # simulate old templates
self.assertEqual(zpt.strict, False)
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase): class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
...@@ -132,7 +276,9 @@ class DummyFileUpload: ...@@ -132,7 +276,9 @@ class DummyFileUpload:
def test_suite(): def test_suite():
suite = unittest.makeSuite(ZPTRegressions) suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros)) suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite return suite
if __name__ == '__main__': if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
def convertToUnicode(source, content_type, preferred_encodings):
""" Convert 'source' to unicode.
Returns (unicode_str, source_encoding).
"""
if content_type.startswith('text/xml'):
encoding = encodingFromXMLPreamble(source)
return unicode(source, encoding), encoding
elif content_type.startswith('text/html'):
encoding = charsetFromMetaEquiv(source)
# Try to detect the encoding by converting it unicode without raising
# exceptions. There are some smarter Python-based sniffer methods
# available however we have to check their licenses first before
# including them into the Zope 2 core
if not encoding:
for enc in preferred_encodings:
try:
return unicode(source, enc), enc
except UnicodeDecodeError:
continue
raise TypeError('Could not auto-detect encoding')
else:
raise ValueError('Unsupported content-type: %s' % content_type)
<html> <html>
<head> <head>
<title tal:content="template/title">The title</title> <title tal:content="template/title">The title</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head> </head>
<body> <body>
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000 tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div> </div>
</td> </td>
<td align="left" valign="top" colspan="2"> <td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a> <a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a> <a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br /> <br />
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
</tr> </tr>
<tr tal:define="errors context/pt_errors" tal:condition="errors"> <tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td> <td align="left" valign="middle" class="form-label">Errors</td>
<td align="left" valign="middle" style="background-color: #FFDDDD" <td align="left" valign="middle" style="background-color: #FFDDDD"
colspan="3"> colspan="3">
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment