diff --git a/lib/python/Products/PageTemplates/PageTemplateFile.py b/lib/python/Products/PageTemplates/PageTemplateFile.py index 2f1c7a6d053c5388b3268fee64c6811f886df488..2ba0e6b5662a704a411920792c1577f5e9af811a 100755 --- a/lib/python/Products/PageTemplates/PageTemplateFile.py +++ b/lib/python/Products/PageTemplates/PageTemplateFile.py @@ -15,7 +15,7 @@ Zope object encapsulating a Page Template from the filesystem. """ -__version__='$Revision: 1.23 $'[11:-2] +__version__='$Revision: 1.24 $'[11:-2] import os, AccessControl, Acquisition, sys from Globals import package_home, DevelopmentMode @@ -117,7 +117,12 @@ class PageTemplateFile(Script, PageTemplate, Traversable): mtime = 0 if self._v_program is not None and mtime == self._v_last_read: return - self.pt_edit(open(self.filename), None) + f = open(self.filename, "rb") + try: + text = f.read() + finally: + f.close() + self.pt_edit(text, sniff_type(text)) self._cook() if self._v_errors: LOG('PageTemplateFile', ERROR, 'Error in template', @@ -154,3 +159,19 @@ class PageTemplateFile(Script, PageTemplate, Traversable): from ZODB.POSException import StorageError raise StorageError, ("Instance of AntiPersistent class %s " "cannot be stored." % self.__class__.__name__) + + +XML_PREFIXES = [ + "<?xml", # ascii, utf-8 + "\xef\xbb\xbf<?xml", # utf-8 w/ byte order mark + "\0<\0?\0x\0m\0l", # utf-16 big endian + "<\0?\0x\0m\0l\0", # utf-16 little endian + "\xfe\xff\0<\0?\0x\0m\0l", # utf-16 big endian w/ byte order mark + "\xff\xfe<\0?\0x\0m\0l\0", # utf-16 little endian w/ byte order mark + ] + +def sniff_type(text): + for prefix in XML_PREFIXES: + if text.startswith(prefix): + return "text/xml" + return None diff --git a/lib/python/Products/PageTemplates/tests/test_ptfile.py b/lib/python/Products/PageTemplates/tests/test_ptfile.py new file mode 100644 index 0000000000000000000000000000000000000000..c78bd2a7612a0c0756f1713d867a09858f50e24c --- /dev/null +++ b/lib/python/Products/PageTemplates/tests/test_ptfile.py @@ -0,0 +1,126 @@ +"""Tests of PageTemplateFile.""" + +import os +import tempfile +import unittest + +from Products.PageTemplates.PageTemplateFile import PageTemplateFile + + +class TypeSniffingTestCase(unittest.TestCase): + + TEMPFILENAME = tempfile.mktemp() + + def tearDown(self): + if os.path.exists(self.TEMPFILENAME): + os.unlink(self.TEMPFILENAME) + + def check_content_type(self, text, expected_type): + f = open(self.TEMPFILENAME, "wb") + f.write(text) + f.close() + pt = PageTemplateFile(self.TEMPFILENAME) + pt.read() + self.assertEqual(pt.content_type, expected_type) + + def test_sniffer_xml_ascii(self): + self.check_content_type( + "<?xml version='1.0' encoding='ascii'?><doc/>", + "text/xml") + self.check_content_type( + "<?xml\tversion='1.0' encoding='ascii'?><doc/>", + "text/xml") + + def test_sniffer_xml_utf8(self): + # w/out byte order mark + self.check_content_type( + "<?xml version='1.0' encoding='utf-8'?><doc/>", + "text/xml") + self.check_content_type( + "<?xml\tversion='1.0' encoding='utf-8'?><doc/>", + "text/xml") + # with byte order mark + self.check_content_type( + "\xef\xbb\xbf<?xml version='1.0' encoding='utf-8'?><doc/>", + "text/xml") + self.check_content_type( + "\xef\xbb\xbf<?xml\tversion='1.0' encoding='utf-8'?><doc/>", + "text/xml") + + def test_sniffer_xml_utf16_be(self): + # w/out byte order mark + self.check_content_type( + "\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" + "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" + "\0<\0d\0o\0c\0/\0>", + "text/xml") + self.check_content_type( + "\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" + "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" + "\0<\0d\0o\0c\0/\0>", + "text/xml") + # with byte order mark + self.check_content_type( + "\xfe\xff" + "\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" + "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" + "\0<\0d\0o\0c\0/\0>", + "text/xml") + self.check_content_type( + "\xfe\xff" + "\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'" + "\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>" + "\0<\0d\0o\0c\0/\0>", + "text/xml") + + def test_sniffer_xml_utf16_le(self): + # w/out byte order mark + self.check_content_type( + "<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" + "<\0d\0o\0c\0/\0>\n", + "text/xml") + self.check_content_type( + "<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" + "<\0d\0o\0c\0/\0>\0", + "text/xml") + # with byte order mark + self.check_content_type( + "\xff\xfe" + "<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" + "<\0d\0o\0c\0/\0>\0", + "text/xml") + self.check_content_type( + "\xff\xfe" + "<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0" + " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0" + "<\0d\0o\0c\0/\0>\0", + "text/xml") + + HTML_PUBLIC_ID = "-//W3C//DTD HTML 4.01 Transitional//EN" + HTML_SYSTEM_ID = "http://www.w3.org/TR/html4/loose.dtd" + + def test_sniffer_html_ascii(self): + self.check_content_type( + "<!DOCTYPE html [ SYSTEM '%s' ]><html></html>" + % self.HTML_SYSTEM_ID, + "text/html") + self.check_content_type( + "<html><head><title>sample document</title></head></html>", + "text/html") + + # XXX This reflects a case that simply isn't handled by the + # sniffer; there are many, but it gets it right more often than + # before. + def donttest_sniffer_xml_simple(self): + self.check_content_type("<doc><element/></doc>", + "text/xml") + + +def test_suite(): + return unittest.makeSuite(TypeSniffingTestCase) + +if __name__ == "__main__": + unittest.main(defaultTest="test_suite")