Commit 0601dfd6 authored by Andreas Jung's avatar Andreas Jung

Backported several encoding and webdav fixes for ZPT from the HEAD

parents 93fe1e0b 237c0626
......@@ -14,6 +14,16 @@ Zope Changes
- added Python 2.4.4 as optimal Python version to 'configure'
- the ZopePageTemplate implementation now uses unicode internally.
Non-unicode instances are migrated on-the-fly to unicode. However this
will work only properly for ZPT instances formerly encoded as utf-8 or
ISO-8859-15. For other encodings you might set the environment variable
ZPT_REFERRED_ENCODING to insert your preferred encoding in front of utf-8 and
ISO-8859-15 within the encoding sniffer code. In addition there is a new
'output_encodings' property that controls the conversion from/to unicode
for WebDAV/FTP operations.
Zope 2.10.1 (2006-11-22)
Bugs fixed
......
......@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
LOG = getLogger('PageTemplateFile')
def guess_type(filename, text):
# check for XML ourself since guess_content_type can't
# detect text/xml if 'filename' won't end with .xml
# XXX: fix this in zope.contenttype
if text.startswith('<?xml'):
return 'text/xml'
content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'):
return content_type
......
# -*- encoding: iso-8859-15 -*-
"""ZopePageTemplate regression tests.
Ensures that adding a page template works correctly.
......@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7
"""
import unittest
import Zope2
import transaction
import zope.component.testing
from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
iso885915_str = '<html><body></body></html>'
utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
xml_template = '''<?xml vesion="1.0" encoding="%s"?>
<foo>
</foo>
'''
xml_iso_8859_15 = xml_template % 'iso-8859-15'
xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_w_header = '''
<html>
<head>
<META http-equiv="content-type" content="text/html; charset=%s">
</hed>
<body>
test
</body>
</html>
'''
html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_wo_header = '''
<html>
<body>
test
</body>
</html>
'''
html_iso_8859_15_wo_header = html_template_wo_header
html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testWriteAcceptsUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
s = u'this is unicode'
zpt.write(s)
self.assertEqual(zpt.read(), s)
self.assertEqual(isinstance(zpt.read(), unicode), True)
def testWriteWontAcceptsNonUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
self.assertRaises(TypeError, zpt.write, 'this is not unicode')
def _createZPT(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
return zpt
def _makePUTRequest(self, body):
return {'BODY' : body}
def _put(self, text):
zpt = self._createZPT()
REQUEST = self.app.REQUEST
REQUEST.set('BODY', text)
zpt.PUT(REQUEST, REQUEST.RESPONSE)
return zpt
def testPutHTMLIso8859_15WithCharsetInfo(self):
zpt = self._put(html_iso_8859_15_w_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithCharsetInfo(self):
zpt = self._put(html_utf8_w_header)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLIso8859_15WithoutCharsetInfo(self):
zpt = self._put(html_iso_8859_15_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithoutCharsetInfo(self):
zpt = self._put(html_utf8_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutXMLIso8859_15(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_iso_8859_15)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
def testPutXMLUTF8(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_utf8)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
class ZPTRegressions(unittest.TestCase):
......@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text)
def test_BBB_for_strict_attribute(self):
# Collector 2213: old templates don't have 'strict' attribute.
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
zpt = ZopePageTemplate('issue_2213')
del zpt.strict # simulate old templates
self.assertEqual(zpt.strict, False)
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
......@@ -132,7 +276,9 @@ class DummyFileUpload:
def test_suite():
suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
def convertToUnicode(source, content_type, preferred_encodings):
""" Convert 'source' to unicode.
Returns (unicode_str, source_encoding).
"""
if content_type.startswith('text/xml'):
encoding = encodingFromXMLPreamble(source)
return unicode(source, encoding), encoding
elif content_type.startswith('text/html'):
encoding = charsetFromMetaEquiv(source)
# Try to detect the encoding by converting it unicode without raising
# exceptions. There are some smarter Python-based sniffer methods
# available however we have to check their licenses first before
# including them into the Zope 2 core
if not encoding:
for enc in preferred_encodings:
try:
return unicode(source, enc), enc
except UnicodeDecodeError:
continue
raise TypeError('Could not auto-detect encoding')
else:
raise ValueError('Unsupported content-type: %s' % content_type)
<html>
<head>
<title tal:content="template/title">The title</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
......
......@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div>
</td>
<td align="left" valign="top" colspan="2">
<td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br />
......@@ -45,7 +45,6 @@
</tr>
<tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td>
<td align="left" valign="middle" style="background-color: #FFDDDD"
colspan="3">
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment