Commit ad5b9de2 authored by Guido van Rossum's avatar Guido van Rossum

Change normalize_encodings() to avoid using .translate() or depending on

the string type.  It will always return a Unicode string.  The algoritm's
specification is unchanged.
parent c3b6ac79
...@@ -34,12 +34,6 @@ from . import aliases ...@@ -34,12 +34,6 @@ from . import aliases
_cache = {} _cache = {}
_unknown = '--unknown--' _unknown = '--unknown--'
_import_tail = ['*'] _import_tail = ['*']
_norm_encoding_map = (' . '
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
' abcdefghijklmnopqrstuvwxyz '
' '
' '
' ')
_aliases = aliases.aliases _aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError): class CodecRegistryError(LookupError, SystemError):
...@@ -58,14 +52,17 @@ def normalize_encoding(encoding): ...@@ -58,14 +52,17 @@ def normalize_encoding(encoding):
non-ASCII characters, these must be Latin-1 compatible. non-ASCII characters, these must be Latin-1 compatible.
""" """
# Make sure we have an 8-bit string, because .translate() works chars = []
# differently for Unicode strings. punct = False
if isinstance(encoding, str): for c in encoding:
# Note that .encode('latin-1') does *not* use the codec if c.isalnum() or c == '.':
# registry, so this call doesn't recurse. (See unicodeobject.c if punct and chars:
# PyUnicode_AsEncodedString() for details) chars.append('_')
encoding = encoding.encode('latin-1') chars.append(c)
return '_'.join(encoding.translate(_norm_encoding_map).split()) punct = False
else:
punct = True
return ''.join(chars)
def search_function(encoding): def search_function(encoding):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment