Backported several encoding and webdav fixes for ZPT from the HEAD

0601dfd6 · Andreas Jung · 93fe1e0b · 237c0626 · 0601dfd6 · 0601dfd6
Commit 0601dfd6 authored Dec 28, 2006 by Andreas Jung
7 changed files
--- a/doc/CHANGES.txt
+++ b/doc/CHANGES.txt
@@ -14,6 +14,16 @@ Zope Changes

      - added Python 2.4.4 as optimal Python version to 'configure'

+      - the ZopePageTemplate implementation now uses unicode internally.
+        Non-unicode instances are migrated on-the-fly to unicode. However this
+        will work only properly for ZPT instances formerly encoded as utf-8 or
+        ISO-8859-15. For other encodings you might set the environment variable
+        ZPT_REFERRED_ENCODING to insert your preferred encoding in front of utf-8 and
+        ISO-8859-15 within the encoding sniffer code. In addition there is a new
+        'output_encodings' property that controls the conversion from/to unicode
+        for WebDAV/FTP operations.
+
+
  Zope 2.10.1 (2006-11-22)

    Bugs fixed

--- a/lib/python/Products/PageTemplates/PageTemplateFile.py
+++ b/lib/python/Products/PageTemplates/PageTemplateFile.py
@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
 LOG = getLogger('PageTemplateFile')

 def guess_type(filename, text):
+
+    # check for XML ourself since guess_content_type can't
+    # detect text/xml  if 'filename' won't end with .xml
+    # XXX: fix this in zope.contenttype
+
+    if text.startswith('<?xml'):
+        return 'text/xml'
+
    content_type, dummy = guess_content_type(filename, text)
    if content_type in ('text/html', 'text/xml'):
        return content_type

--- a/lib/python/Products/PageTemplates/ZopePageTemplate.py
+++ b/lib/python/Products/PageTemplates/ZopePageTemplate.py
--- a/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
+++ b/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py
+# -*- encoding: iso-8859-15 -*-
+
 """ZopePageTemplate regression tests.

 Ensures that adding a page template works correctly.
@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7

 """

-
 import unittest
 import Zope2
 import transaction
 import zope.component.testing
 from zope.traversing.adapters import DefaultTraversable
 from Testing.makerequest import makerequest
+from Testing.ZopeTestCase import ZopeTestCase, installProduct
+from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
+from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
+
+
+ascii_str = '<html><body>hello world</body></html>'
+iso885915_str = '<html><body></body></html>'
+utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
+
+xml_template = '''<?xml vesion="1.0" encoding="%s"?>
+<foo>
+
+</foo>
+'''
+
+xml_iso_8859_15 = xml_template % 'iso-8859-15'
+xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_w_header = '''
+<html>
+    <head>
+        <META http-equiv="content-type" content="text/html; charset=%s">
+    </hed>
+    <body>
+    test 
+    </body>
+</html>
+'''
+
+html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
+html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
+
+html_template_wo_header = '''
+<html>
+    <body>
+    test 
+    </body>
+</html>
+'''
+
+html_iso_8859_15_wo_header = html_template_wo_header 
+html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8') 
+
+
+installProduct('PageTemplates')
+
+class ZPTUtilsTests(unittest.TestCase):
+
+    def testExtractEncodingFromXMLPreamble(self):
+        extract = encodingFromXMLPreamble
+        self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
+        self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
+        self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
+
+    def testExtractCharsetFromMetaHTTPEquivTag(self):
+        extract = charsetFromMetaEquiv
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
+        self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
+        self.assertEqual(extract('<html>...<html>'), None)
+        
+
+class ZopePageTemplateFileTests(ZopeTestCase):
+
+    def testPT_RenderWithAscii(self):
+        manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testPT_RenderWithISO885915(self):
+        manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testPT_RenderWithUTF8(self):
+        manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+        zpt = self.app['test']
+        result = zpt.pt_render()
+        # use startswith() because the renderer appends a trailing \n
+        self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+
+    def testWriteAcceptsUnicode(self):
+        manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+        zpt = self.app['test']
+        s = u'this is unicode'
+        zpt.write(s)
+        self.assertEqual(zpt.read(), s)
+        self.assertEqual(isinstance(zpt.read(), unicode), True)
+
+    def testWriteWontAcceptsNonUnicode(self):
+        manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
+        zpt = self.app['test']
+        self.assertRaises(TypeError, zpt.write, 'this is not unicode')
+
+
+    def _createZPT(self):
+        manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
+        zpt = self.app['test']
+        return zpt
+
+    def _makePUTRequest(self, body):
+        return {'BODY' : body}
+
+    def _put(self, text):
+        zpt = self._createZPT()
+        REQUEST = self.app.REQUEST
+        REQUEST.set('BODY', text)
+        zpt.PUT(REQUEST, REQUEST.RESPONSE)
+        return zpt
+
+    def testPutHTMLIso8859_15WithCharsetInfo(self):
+        zpt = self._put(html_iso_8859_15_w_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLUTF8_WithCharsetInfo(self):
+        zpt = self._put(html_utf8_w_header)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLIso8859_15WithoutCharsetInfo(self):
+        zpt = self._put(html_iso_8859_15_wo_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutHTMLUTF8_WithoutCharsetInfo(self):
+        zpt = self._put(html_utf8_wo_header)
+        self.assertEqual(zpt.output_encoding, 'iso-8859-15')
+        self.assertEqual(zpt.content_type, 'text/html')
+
+    def testPutXMLIso8859_15(self):
+        """ XML: use always UTF-8 als output encoding """
+        zpt = self._put(xml_iso_8859_15)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/xml')
+
+    def testPutXMLUTF8(self):
+        """ XML: use always UTF-8 als output encoding """
+        zpt = self._put(xml_utf8)
+        self.assertEqual(zpt.output_encoding, 'utf-8')
+        self.assertEqual(zpt.content_type, 'text/xml')

 class ZPTRegressions(unittest.TestCase):

@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
        pt = self.app.pt1
        self.assertEqual(pt.document_src(), self.text)

-    def test_BBB_for_strict_attribute(self):
-        # Collector 2213:  old templates don't have 'strict' attribute.
-        from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
-        zpt = ZopePageTemplate('issue_2213')
-        del zpt.strict  # simulate old templates
-        self.assertEqual(zpt.strict, False)
-

 class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):

@@ -132,7 +276,9 @@ class DummyFileUpload:
       
 def test_suite():
    suite = unittest.makeSuite(ZPTRegressions)
+    suite.addTests(unittest.makeSuite(ZPTUtilsTests))
    suite.addTests(unittest.makeSuite(ZPTMacros))
+    suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
    return suite

 if __name__ == '__main__':

--- a/lib/python/Products/PageTemplates/utils.py
+++ b/lib/python/Products/PageTemplates/utils.py
+##############################################################################
+#
+# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE
+#
+##############################################################################
+
+""" Some helper methods 
+
+$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
+"""
+
+import re 
+
+
+xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
+http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
+http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
+
+def encodingFromXMLPreamble(xml):
+    """ Extract the encoding from a xml preamble.
+        Return 'utf-8' if not available
+    """
+
+    mo = xml_preamble_reg.match(xml)
+
+    if not mo:
+        return 'utf-8'
+    else:
+        return mo.group(1).lower()
+
+
+def charsetFromMetaEquiv(html):                                    
+    """ Return the value of the 'charset' from a html document
+        containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
+        Returns None, if not available.
+    """
+
+    # first check for the <meta...> tag
+    mo = http_equiv_reg.search(html)
+    if mo:
+        # extract the meta tag
+        meta = mo.group(1)
+
+        # search for the charset value
+        mo = http_equiv_reg2.search(meta)
+        if mo:
+            # return charset 
+            return mo.group(1).lower()
+
+    return None
+
+                                                                          
+
+def convertToUnicode(source, content_type, preferred_encodings):
+    """ Convert 'source' to unicode.
+        Returns (unicode_str, source_encoding).
+    """
+
+    if content_type.startswith('text/xml'):
+        encoding = encodingFromXMLPreamble(source)
+        return unicode(source, encoding), encoding  
+
+    elif content_type.startswith('text/html'):
+
+        encoding = charsetFromMetaEquiv(source)
+
+        # Try to detect the encoding by converting it unicode without raising
+        # exceptions. There are some smarter Python-based sniffer methods
+        # available however we have to check their licenses first before
+        # including them into the Zope 2 core
+
+        if not encoding:
+            for enc in preferred_encodings:
+                try:
+                    return unicode(source, enc), enc
+                except UnicodeDecodeError:
+                    continue
+
+        raise TypeError('Could not auto-detect encoding')
+
+    else:
+        raise ValueError('Unsupported content-type: %s' % content_type) 
--- a/lib/python/Products/PageTemplates/www/default.html
+++ b/lib/python/Products/PageTemplates/www/default.html
 <html>
  <head>
    <title tal:content="template/title">The title</title>
+    <meta http-equiv="content-type" content="text/html;charset=utf-8">
  </head>
  <body>
    

--- a/lib/python/Products/PageTemplates/www/ptEdit.zpt
+++ b/lib/python/Products/PageTemplates/www/ptEdit.zpt
@@ -33,7 +33,7 @@
       tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
      </div>
    </td>
-    <td align="left" valign="top" colspan="2">
+    <td align="left" valign="top" colspan="2" rowspan="2">
      <a href="source.html" tal:condition="context/html">Browse HTML source</a>
      <a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
      <br />
@@ -45,7 +45,6 @@
  </tr>

  <tr tal:define="errors context/pt_errors" tal:condition="errors">
-    <tal:block define="global body python:context.document_src({'raw':1})" />
    <td align="left" valign="middle" class="form-label">Errors</td>
    <td align="left" valign="middle" style="background-color: #FFDDDD"
        colspan="3">