Commit 0601dfd6 authored by Andreas Jung's avatar Andreas Jung

Backported several encoding and webdav fixes for ZPT from the HEAD

parents 93fe1e0b 237c0626
...@@ -14,6 +14,16 @@ Zope Changes ...@@ -14,6 +14,16 @@ Zope Changes
- added Python 2.4.4 as optimal Python version to 'configure' - added Python 2.4.4 as optimal Python version to 'configure'
- the ZopePageTemplate implementation now uses unicode internally.
Non-unicode instances are migrated on-the-fly to unicode. However this
will work only properly for ZPT instances formerly encoded as utf-8 or
ISO-8859-15. For other encodings you might set the environment variable
ZPT_REFERRED_ENCODING to insert your preferred encoding in front of utf-8 and
ISO-8859-15 within the encoding sniffer code. In addition there is a new
'output_encodings' property that controls the conversion from/to unicode
for WebDAV/FTP operations.
Zope 2.10.1 (2006-11-22) Zope 2.10.1 (2006-11-22)
Bugs fixed Bugs fixed
......
...@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type ...@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
LOG = getLogger('PageTemplateFile') LOG = getLogger('PageTemplateFile')
def guess_type(filename, text): def guess_type(filename, text):
# check for XML ourself since guess_content_type can't
# detect text/xml if 'filename' won't end with .xml
# XXX: fix this in zope.contenttype
if text.startswith('<?xml'):
return 'text/xml'
content_type, dummy = guess_content_type(filename, text) content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'): if content_type in ('text/html', 'text/xml'):
return content_type return content_type
......
...@@ -40,20 +40,14 @@ from Products.PageTemplates.PageTemplateFile import PageTemplateFile ...@@ -40,20 +40,14 @@ from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplateFile import guess_type from Products.PageTemplates.PageTemplateFile import guess_type
from Products.PageTemplates.Expressions import SecureModuleImporter from Products.PageTemplates.Expressions import SecureModuleImporter
# regular expression to extract the encoding from the XML preamble from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv, convertToUnicode
encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M)
preferred_encodings = ['utf-8', 'iso-8859-15'] preferred_encodings = ['utf-8', 'iso-8859-15']
if os.environ.has_key('ZPT_PREFERRED_ENCODING'): if os.environ.has_key('ZPT_PREFERRED_ENCODING'):
preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING']) preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING'])
def sniffEncoding(text, default_encoding='utf-8'):
"""Try to determine the encoding from html or xml"""
if text.startswith('<?xml'):
mo = encoding_reg.search(text)
if mo:
return mo.group(1)
return default_encoding
class Src(Acquisition.Explicit): class Src(Acquisition.Explicit):
""" I am scary code """ """ I am scary code """
...@@ -76,10 +70,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -76,10 +70,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
__implements__ = (WriteLockInterface,) __implements__ = (WriteLockInterface,)
meta_type = 'Page Template' meta_type = 'Page Template'
output_encoding = 'iso-8859-15' # provide default for old instances
func_defaults = None func_defaults = None
func_code = FuncCode((), 0) func_code = FuncCode((), 0)
strict = False
_default_bindings = {'name_subpath': 'traverse_subpath'} _default_bindings = {'name_subpath': 'traverse_subpath'}
_default_content_fn = os.path.join(package_home(globals()), _default_content_fn = os.path.join(package_home(globals()),
...@@ -97,6 +91,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -97,6 +91,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
_properties=({'id':'title', 'type': 'ustring', 'mode': 'w'}, _properties=({'id':'title', 'type': 'ustring', 'mode': 'w'},
{'id':'content_type', 'type':'string', 'mode': 'w'}, {'id':'content_type', 'type':'string', 'mode': 'w'},
{'id':'output_encoding', 'type':'string', 'mode': 'w'},
{'id':'expand', 'type':'boolean', 'mode': 'w'}, {'id':'expand', 'type':'boolean', 'mode': 'w'},
) )
...@@ -108,22 +103,68 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -108,22 +103,68 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(view_management_screens, security.declareProtected(view_management_screens,
'read', 'ZScriptHTML_tryForm') 'read', 'ZScriptHTML_tryForm')
def __init__(self, id, text=None, content_type=None, encoding='utf-8', def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'):
strict=False):
self.id = id self.id = id
self.expand = 0 self.expand = 0
self.strict = strict
self.ZBindings_edit(self._default_bindings) self.ZBindings_edit(self._default_bindings)
self.output_encoding = output_encoding
# default content
if not text: if not text:
text = open(self._default_content_fn).read() text = open(self._default_content_fn).read()
encoding = 'utf-8'
content_type = 'text/html' content_type = 'text/html'
self.pt_edit(text, content_type, encoding) self.pt_edit(text, content_type)
security.declareProtected(change_page_templates, 'pt_edit') security.declareProtected(change_page_templates, 'pt_edit')
def pt_edit(self, text, content_type, encoding='utf-8'): def pt_edit(self, text, content_type, keep_output_encoding=False):
text = text.strip() text = text.strip()
if self.strict and not isinstance(text, unicode):
is_unicode = isinstance(text, unicode)
encoding = None
output_encoding = None
if content_type == 'text/xml':
if is_unicode:
encoding = None
output_encoding = 'utf-8'
else:
encoding = encodingFromXMLPreamble(text)
output_encoding = 'utf-8'
elif content_type == 'text/html':
charset = charsetFromMetaEquiv(text)
if is_unicode:
if charset:
encoding = None
output_encoding = charset
else:
encoding = None
output_encoding = 'iso-8859-15'
else:
if charset:
encoding = charset
output_encoding = charset
else:
encoding = 'iso-8859-15'
output_encoding = 'iso-8859-15'
else:
raise ValueError('Unsupported content-type %s' % content_type)
# for content updated through WebDAV, FTP
if not keep_output_encoding:
self.output_encoding = output_encoding
if not is_unicode:
text = unicode(text, encoding) text = unicode(text, encoding)
self.ZCacheable_invalidate() self.ZCacheable_invalidate()
...@@ -137,16 +178,21 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -137,16 +178,21 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
source_dot_xml = Src() source_dot_xml = Src()
security.declareProtected(change_page_templates, 'pt_editAction') security.declareProtected(change_page_templates, 'pt_editAction')
def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand): def pt_editAction(self, REQUEST, title, text, content_type, expand):
"""Change the title and document.""" """Change the title and document."""
if self.wl_isLocked(): if self.wl_isLocked():
raise ResourceLockedError("File is locked via WebDAV") raise ResourceLockedError("File is locked via WebDAV")
self.expand = expand self.expand = expand
self.pt_setTitle(title, encoding)
self.pt_edit(text, content_type, encoding) # The ZMI edit view uses utf-8! So we can safely assume
# that 'title' and 'text' are utf-8 encoded strings - hopefully
self.pt_setTitle(title, 'utf-8')
text = unicode(text, 'utf-8')
self.pt_edit(text, content_type, True)
REQUEST.set('text', self.read()) # May not equal 'text'! REQUEST.set('text', self.read()) # May not equal 'text'!
REQUEST.set('title', self.title) REQUEST.set('title', self.title)
message = "Saved changes." message = "Saved changes."
...@@ -155,9 +201,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -155,9 +201,10 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
% '<br>'.join(self._v_warnings)) % '<br>'.join(self._v_warnings))
return self.pt_editForm(manage_tabs_message=message) return self.pt_editForm(manage_tabs_message=message)
security.declareProtected(change_page_templates, 'pt_setTitle') security.declareProtected(change_page_templates, 'pt_setTitle')
def pt_setTitle(self, title, encoding='utf-8'): def pt_setTitle(self, title, encoding='utf-8'):
if self.strict and not isinstance(title, unicode): if not isinstance(title, unicode):
title = unicode(title, encoding) title = unicode(title, encoding)
self._setPropValue('title', title) self._setPropValue('title', title)
...@@ -186,8 +233,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -186,8 +233,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
if not content_type in ('text/html', 'text/xml'): if not content_type in ('text/html', 'text/xml'):
raise ValueError('Unsupported mimetype: %s' % content_type) raise ValueError('Unsupported mimetype: %s' % content_type)
encoding = sniffEncoding(text, encoding) self.pt_edit(text, content_type)
self.pt_edit(text, content_type, encoding)
return self.pt_editForm(manage_tabs_message='Saved changes') return self.pt_editForm(manage_tabs_message='Saved changes')
security.declareProtected(change_page_templates, 'pt_changePrefs') security.declareProtected(change_page_templates, 'pt_changePrefs')
...@@ -240,6 +286,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -240,6 +286,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
return c return c
def write(self, text): def write(self, text):
if not isinstance(text, unicode):
raise TypeError("'text' parameter must be unicode")
self.ZCacheable_invalidate() self.ZCacheable_invalidate()
ZopePageTemplate.inheritedAttribute('write')(self, text) ZopePageTemplate.inheritedAttribute('write')(self, text)
...@@ -289,10 +337,12 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -289,10 +337,12 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(change_page_templates, 'PUT') security.declareProtected(change_page_templates, 'PUT')
def PUT(self, REQUEST, RESPONSE): def PUT(self, REQUEST, RESPONSE):
""" Handle HTTP PUT requests """ """ Handle HTTP PUT requests """
self.dav__init(REQUEST, RESPONSE) self.dav__init(REQUEST, RESPONSE)
self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1) self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1)
## XXX this should be unicode or we must pass an encoding text = REQUEST.get('BODY', '')
self.pt_edit(REQUEST.get('BODY', '')) content_type = guess_type('', text)
self.pt_edit(text, content_type)
RESPONSE.setStatus(204) RESPONSE.setStatus(204)
return RESPONSE return RESPONSE
...@@ -303,8 +353,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -303,8 +353,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(ftp_access, 'manage_FTPget') security.declareProtected(ftp_access, 'manage_FTPget')
def manage_FTPget(self): def manage_FTPget(self):
"Get source for FTP download" "Get source for FTP download"
self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type) result = self.pt_render()
return self.read() return result.encode(self.output_encoding)
security.declareProtected(view_management_screens, 'html') security.declareProtected(view_management_screens, 'html')
def html(self): def html(self):
...@@ -353,34 +403,28 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable, ...@@ -353,34 +403,28 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
# acquisition context, so we don't know where it is. :-( # acquisition context, so we don't know where it is. :-(
return None return None
def __setstate__(self, state):
# Perform on-the-fly migration to unicode.
# Perhaps it might be work with the 'generation' module here?
if not isinstance(state['_text'], unicode):
text, encoding = convertToUnicode(state['_text'],
state.get('content_type', 'text/html'),
preferred_encodings)
state['_text'] = text
state['output_encoding'] = encoding
self.__dict__.update(state)
def pt_render(self, source=False, extra_context={}):
result = PageTemplate.pt_render(self, source, extra_context)
assert isinstance(result, unicode)
return result
def wl_isLocked(self): def wl_isLocked(self):
return 0 return 0
def manage_convertUnicode(self, preferred_encodings=preferred_encodings,
RESPONSE=None):
"""Convert non-unicode templates to unicode"""
if not isinstance(self._text, unicode):
for encoding in preferred_encodings:
try:
self._text = unicode(self._text, encoding)
if RESPONSE:
return RESPONSE.redirect(self.absolute_url() +
'/pt_editForm?manage_tabs_message='
'ZPT+successfully+converted')
else:
return
except UnicodeDecodeError:
pass
raise RuntimeError('Pagetemplate could not be converted to unicode')
else:
if RESPONSE:
return RESPONSE.redirect(self.absolute_url() +
'/pt_editForm?manage_tabs_message='
'ZPT+already+converted')
else:
return
InitializeClass(ZopePageTemplate) InitializeClass(ZopePageTemplate)
...@@ -407,7 +451,7 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8', ...@@ -407,7 +451,7 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type'] content_type = headers['content_type']
else: else:
content_type = guess_type(filename, text) content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
else: else:
if hasattr(text, 'read'): if hasattr(text, 'read'):
...@@ -418,9 +462,14 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8', ...@@ -418,9 +462,14 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type'] content_type = headers['content_type']
else: else:
content_type = guess_type(filename, text) content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, encoding) # ensure that we pass unicode to the constructor to
# avoid further hassles with pt_edit()
if not isinstance(text, unicode):
text = unicode(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding)
zpt.pt_setTitle(title, encoding) zpt.pt_setTitle(title, encoding)
self._setObject(id, zpt) self._setObject(id, zpt)
zpt = getattr(self, id) zpt = getattr(self, id)
......
# -*- encoding: iso-8859-15 -*-
"""ZopePageTemplate regression tests. """ZopePageTemplate regression tests.
Ensures that adding a page template works correctly. Ensures that adding a page template works correctly.
...@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7 ...@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7
""" """
import unittest import unittest
import Zope2 import Zope2
import transaction import transaction
import zope.component.testing import zope.component.testing
from zope.traversing.adapters import DefaultTraversable from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
iso885915_str = '<html><body></body></html>'
utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
xml_template = '''<?xml vesion="1.0" encoding="%s"?>
<foo>
</foo>
'''
xml_iso_8859_15 = xml_template % 'iso-8859-15'
xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_w_header = '''
<html>
<head>
<META http-equiv="content-type" content="text/html; charset=%s">
</hed>
<body>
test
</body>
</html>
'''
html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_wo_header = '''
<html>
<body>
test
</body>
</html>
'''
html_iso_8859_15_wo_header = html_template_wo_header
html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testWriteAcceptsUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
s = u'this is unicode'
zpt.write(s)
self.assertEqual(zpt.read(), s)
self.assertEqual(isinstance(zpt.read(), unicode), True)
def testWriteWontAcceptsNonUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
self.assertRaises(TypeError, zpt.write, 'this is not unicode')
def _createZPT(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
return zpt
def _makePUTRequest(self, body):
return {'BODY' : body}
def _put(self, text):
zpt = self._createZPT()
REQUEST = self.app.REQUEST
REQUEST.set('BODY', text)
zpt.PUT(REQUEST, REQUEST.RESPONSE)
return zpt
def testPutHTMLIso8859_15WithCharsetInfo(self):
zpt = self._put(html_iso_8859_15_w_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithCharsetInfo(self):
zpt = self._put(html_utf8_w_header)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLIso8859_15WithoutCharsetInfo(self):
zpt = self._put(html_iso_8859_15_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithoutCharsetInfo(self):
zpt = self._put(html_utf8_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutXMLIso8859_15(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_iso_8859_15)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
def testPutXMLUTF8(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_utf8)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
class ZPTRegressions(unittest.TestCase): class ZPTRegressions(unittest.TestCase):
...@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase): ...@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
pt = self.app.pt1 pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text) self.assertEqual(pt.document_src(), self.text)
def test_BBB_for_strict_attribute(self):
# Collector 2213: old templates don't have 'strict' attribute.
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
zpt = ZopePageTemplate('issue_2213')
del zpt.strict # simulate old templates
self.assertEqual(zpt.strict, False)
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase): class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
...@@ -132,7 +276,9 @@ class DummyFileUpload: ...@@ -132,7 +276,9 @@ class DummyFileUpload:
def test_suite(): def test_suite():
suite = unittest.makeSuite(ZPTRegressions) suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros)) suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite return suite
if __name__ == '__main__': if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
def convertToUnicode(source, content_type, preferred_encodings):
""" Convert 'source' to unicode.
Returns (unicode_str, source_encoding).
"""
if content_type.startswith('text/xml'):
encoding = encodingFromXMLPreamble(source)
return unicode(source, encoding), encoding
elif content_type.startswith('text/html'):
encoding = charsetFromMetaEquiv(source)
# Try to detect the encoding by converting it unicode without raising
# exceptions. There are some smarter Python-based sniffer methods
# available however we have to check their licenses first before
# including them into the Zope 2 core
if not encoding:
for enc in preferred_encodings:
try:
return unicode(source, enc), enc
except UnicodeDecodeError:
continue
raise TypeError('Could not auto-detect encoding')
else:
raise ValueError('Unsupported content-type: %s' % content_type)
<html> <html>
<head> <head>
<title tal:content="template/title">The title</title> <title tal:content="template/title">The title</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head> </head>
<body> <body>
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000 tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div> </div>
</td> </td>
<td align="left" valign="top" colspan="2"> <td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a> <a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a> <a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br /> <br />
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
</tr> </tr>
<tr tal:define="errors context/pt_errors" tal:condition="errors"> <tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td> <td align="left" valign="middle" class="form-label">Errors</td>
<td align="left" valign="middle" style="background-color: #FFDDDD" <td align="left" valign="middle" style="background-color: #FFDDDD"
colspan="3"> colspan="3">
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment