Commit 09892b36 authored by Andreas Jung's avatar Andreas Jung

LP/#324876: tighened regex for detecting the charset

from a meta-equiv header
parent a30cc136
...@@ -19,6 +19,9 @@ Zope Changes ...@@ -19,6 +19,9 @@ Zope Changes
Bugs Fixed Bugs Fixed
- LP/#324876: tighened regex for detecting the charset
from a meta-equiv header
- configure script: setting ZOPE_VERS to '2.11' - configure script: setting ZOPE_VERS to '2.11'
- Acquisition wrappers now correctly proxy __iter__. - Acquisition wrappers now correctly proxy __iter__.
......
...@@ -20,7 +20,15 @@ import re ...@@ -20,7 +20,15 @@ import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M) xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S) # This regular expression is defined extremely carelessly. It starts
# with a tag beginning with 'meta' and extends until an arbitrary
# 'content-type' (maybe in a completely unrelated element).
# Tighten the expression a bit.
# Note that using a regular expression at all is unreliable as it does
# not know about e.g. HTML comments. A robust solution would need to
# use an HTML parser to locate the 'meta' tag.
#http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg = re.compile(r'(<meta\s+[^>]*?http\-equiv[^>]*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S) http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml): def encodingFromXMLPreamble(xml):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment