Commit bed9f48e authored by Barry Warsaw's avatar Barry Warsaw

__init__(): Coerce the input_charset to unicode (with ascii encoding) before

calling .lower() on it.  This fixes the problem described in SF patch # 866982
where in the tr_TR.ISO-8859-9 locale, 'I'.lower() isn't 'i'.  unicodes are
locale insensitive.
parent 94aa9a4a
...@@ -185,8 +185,9 @@ class Charset: ...@@ -185,8 +185,9 @@ class Charset:
this attribute will have the same value as the input_codec. this attribute will have the same value as the input_codec.
""" """
def __init__(self, input_charset=DEFAULT_CHARSET): def __init__(self, input_charset=DEFAULT_CHARSET):
# RFC 2046, $4.1.2 says charsets are not case sensitive # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
input_charset = input_charset.lower() # unicode because its .lower() is locale insensitive.
input_charset = unicode(input_charset, 'ascii').lower()
# Set the input charset after filtering through the aliases # Set the input charset after filtering through the aliases
self.input_charset = ALIASES.get(input_charset, input_charset) self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the # We can try to guess which encoding and conversion to use by the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment