Commit ef87d6ed authored by Guido van Rossum's avatar Guido van Rossum

Rip out all the u"..." literals and calls to unicode().

parent 572dbf8f
......@@ -376,7 +376,7 @@ class HTMLParser(markupbase.ParserBase):
# which is not part of HTML 4
import htmlentitydefs
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
for k, v in htmlentitydefs.name2codepoint.items():
entitydefs[k] = unichr(v)
try:
......
......@@ -589,7 +589,7 @@ class StreamReader(Codec):
"""
self.bytebuffer = ""
self.charbuffer = u""
self.charbuffer = ""
self.linebuffer = None
def seek(self, offset, whence=0):
......
......@@ -740,7 +740,7 @@ class Transformer:
# hack... changes in compile.c:parsestr and
# tokenizer.c must be reflected here.
if self.encoding not in ['utf-8', 'iso-8859-1']:
lit = unicode(lit, 'utf-8').encode(self.encoding)
lit = str(lit, 'utf-8').encode(self.encoding)
return eval("# coding: %s\n%s" % (self.encoding, lit))
else:
return eval(lit)
......
......@@ -644,7 +644,7 @@ def escape_path(path):
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
if isinstance(path, unicode):
if isinstance(path, str):
path = path.encode("utf-8")
path = urllib.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
......
......@@ -186,7 +186,7 @@ except NameError:
pass
d[str] = _deepcopy_atomic
try:
d[unicode] = _deepcopy_atomic
d[str] = _deepcopy_atomic
except NameError:
pass
try:
......
......@@ -59,7 +59,7 @@ def create_string_buffer(init, size=None):
create_string_buffer(anInteger) -> character array
create_string_buffer(aString, anInteger) -> character array
"""
if isinstance(init, (str, unicode)):
if isinstance(init, (str, str)):
if size is None:
size = len(init)+1
buftype = c_char * size
......@@ -281,7 +281,7 @@ else:
create_unicode_buffer(anInteger) -> character array
create_unicode_buffer(aString, anInteger) -> character array
"""
if isinstance(init, (str, unicode)):
if isinstance(init, (str, str)):
if size is None:
size = len(init)+1
buftype = c_wchar * size
......
......@@ -33,7 +33,7 @@ DEFAULT_LIBRARY_FALLBACK = [
def ensure_utf8(s):
"""Not all of PyObjC and Python understand unicode paths very well yet"""
if isinstance(s, unicode):
if isinstance(s, str):
return s.encode('utf8')
return s
......
......@@ -24,7 +24,7 @@ class BasicWrapTestCase(unittest.TestCase):
return
f = dll._testfunc_i_bhilfd
f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double]
result = f(self.wrap(1), self.wrap(u"x"), self.wrap(3), self.wrap(4), self.wrap(5.0), self.wrap(6.0))
result = f(self.wrap(1), self.wrap("x"), self.wrap(3), self.wrap(4), self.wrap(5.0), self.wrap(6.0))
self.failUnlessEqual(result, 139)
self.failUnless(type(result), int)
......
......@@ -17,7 +17,7 @@ class StringBufferTestCase(unittest.TestCase):
self.failUnlessEqual(b[:], "abc\0")
def test_string_conversion(self):
b = create_string_buffer(u"abc")
b = create_string_buffer("abc")
self.failUnlessEqual(len(b), 4) # trailing nul char
self.failUnlessEqual(sizeof(b), 4 * sizeof(c_char))
self.failUnless(type(b[0]) is str)
......@@ -33,21 +33,21 @@ class StringBufferTestCase(unittest.TestCase):
b = create_unicode_buffer(32)
self.failUnlessEqual(len(b), 32)
self.failUnlessEqual(sizeof(b), 32 * sizeof(c_wchar))
self.failUnless(type(b[0]) is unicode)
self.failUnless(type(b[0]) is str)
b = create_unicode_buffer(u"abc")
b = create_unicode_buffer("abc")
self.failUnlessEqual(len(b), 4) # trailing nul char
self.failUnlessEqual(sizeof(b), 4 * sizeof(c_wchar))
self.failUnless(type(b[0]) is unicode)
self.failUnlessEqual(b[0], u"a")
self.failUnless(type(b[0]) is str)
self.failUnlessEqual(b[0], "a")
self.failUnlessEqual(b[:], "abc\0")
def test_unicode_conversion(self):
b = create_unicode_buffer("abc")
self.failUnlessEqual(len(b), 4) # trailing nul char
self.failUnlessEqual(sizeof(b), 4 * sizeof(c_wchar))
self.failUnless(type(b[0]) is unicode)
self.failUnlessEqual(b[0], u"a")
self.failUnless(type(b[0]) is str)
self.failUnlessEqual(b[0], "a")
self.failUnlessEqual(b[:], "abc\0")
if __name__ == "__main__":
......
......@@ -70,7 +70,7 @@ class FunctionTestCase(unittest.TestCase):
return
f = dll._testfunc_i_bhilfd
f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double]
result = f(1, u"x", 3, 4, 5.0, 6.0)
result = f(1, "x", 3, 4, 5.0, 6.0)
self.failUnlessEqual(result, 139)
self.failUnlessEqual(type(result), int)
......@@ -83,7 +83,7 @@ class FunctionTestCase(unittest.TestCase):
f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
f.restype = c_wchar
result = f(0, 0, 0, 0, 0, 0)
self.failUnlessEqual(result, u'\x00')
self.failUnlessEqual(result, '\x00')
def test_voidresult(self):
f = dll._testfunc_v
......
......@@ -58,8 +58,8 @@ class SimpleTypesTestCase(unittest.TestCase):
self.failUnless(c_char_p.from_param(s)._obj is s)
# new in 0.9.1: convert (encode) unicode to ascii
self.failUnlessEqual(c_char_p.from_param(u"123")._obj, "123")
self.assertRaises(UnicodeEncodeError, c_char_p.from_param, u"123\377")
self.failUnlessEqual(c_char_p.from_param("123")._obj, "123")
self.assertRaises(UnicodeEncodeError, c_char_p.from_param, "123\377")
self.assertRaises(TypeError, c_char_p.from_param, 42)
......@@ -75,16 +75,16 @@ class SimpleTypesTestCase(unittest.TestCase):
except ImportError:
## print "(No c_wchar_p)"
return
s = u"123"
s = "123"
if sys.platform == "win32":
self.failUnless(c_wchar_p.from_param(s)._obj is s)
self.assertRaises(TypeError, c_wchar_p.from_param, 42)
# new in 0.9.1: convert (decode) ascii to unicode
self.failUnlessEqual(c_wchar_p.from_param("123")._obj, u"123")
self.failUnlessEqual(c_wchar_p.from_param("123")._obj, "123")
self.assertRaises(UnicodeDecodeError, c_wchar_p.from_param, "123\377")
pa = c_wchar_p.from_param(c_wchar_p(u"123"))
pa = c_wchar_p.from_param(c_wchar_p("123"))
self.failUnlessEqual(type(pa), c_wchar_p)
def test_int_pointers(self):
......
......@@ -123,7 +123,7 @@ class CharPointersTestCase(unittest.TestCase):
pass
else:
self.failUnlessEqual(None, func(c_wchar_p(None)))
self.failUnlessEqual(u"123", func(c_wchar_p(u"123")))
self.failUnlessEqual("123", func(c_wchar_p("123")))
def test_instance(self):
func = testdll._testfunc_p_p
......@@ -157,24 +157,24 @@ else:
func.argtypes = POINTER(c_wchar),
self.failUnlessEqual(None, func(None))
self.failUnlessEqual(u"123", func(u"123"))
self.failUnlessEqual("123", func("123"))
self.failUnlessEqual(None, func(c_wchar_p(None)))
self.failUnlessEqual(u"123", func(c_wchar_p(u"123")))
self.failUnlessEqual("123", func(c_wchar_p("123")))
self.failUnlessEqual(u"123", func(c_wbuffer(u"123")))
self.failUnlessEqual("123", func(c_wbuffer("123")))
ca = c_wchar("a")
self.failUnlessEqual(u"a", func(pointer(ca))[0])
self.failUnlessEqual(u"a", func(byref(ca))[0])
self.failUnlessEqual("a", func(pointer(ca))[0])
self.failUnlessEqual("a", func(byref(ca))[0])
def test_c_wchar_p_arg(self):
func = testdll._testfunc_p_p
func.restype = c_wchar_p
func.argtypes = c_wchar_p,
c_wchar_p.from_param(u"123")
c_wchar_p.from_param("123")
self.failUnlessEqual(None, func(None))
self.failUnlessEqual("123", func(u"123"))
self.failUnlessEqual("123", func("123"))
self.failUnlessEqual(None, func(c_wchar_p(None)))
self.failUnlessEqual("123", func(c_wchar_p("123")))
......
......@@ -45,7 +45,7 @@ class SlicesTestCase(unittest.TestCase):
import operator
self.assertRaises(TypeError, operator.setslice,
res, 0, 5, u"abcde")
res, 0, 5, "abcde")
dll.my_free(res)
dll.my_strdup.restype = POINTER(c_byte)
......@@ -88,7 +88,7 @@ class SlicesTestCase(unittest.TestCase):
pass
else:
def test_wchar_ptr(self):
s = u"abcdefghijklmnopqrstuvwxyz\0"
s = "abcdefghijklmnopqrstuvwxyz\0"
dll = CDLL(_ctypes_test.__file__)
dll.my_wcsdup.restype = POINTER(c_wchar)
......@@ -99,7 +99,7 @@ class SlicesTestCase(unittest.TestCase):
import operator
self.assertRaises(TypeError, operator.setslice,
res, 0, 5, u"abcde")
res, 0, 5, "abcde")
dll.my_free(res)
if sizeof(c_wchar) == sizeof(c_short):
......
......@@ -62,17 +62,17 @@ else:
def test(self):
BUF = c_wchar * 4
buf = BUF(u"a", u"b", u"c")
self.failUnlessEqual(buf.value, u"abc")
buf = BUF("a", "b", "c")
self.failUnlessEqual(buf.value, "abc")
buf.value = u"ABCD"
self.failUnlessEqual(buf.value, u"ABCD")
buf.value = "ABCD"
self.failUnlessEqual(buf.value, "ABCD")
buf.value = u"x"
self.failUnlessEqual(buf.value, u"x")
buf.value = "x"
self.failUnlessEqual(buf.value, "x")
buf[1] = u"Z"
self.failUnlessEqual(buf.value, u"xZCD")
buf[1] = "Z"
self.failUnlessEqual(buf.value, "xZCD")
class StringTestCase(unittest.TestCase):
def XX_test_basic_strings(self):
......@@ -99,7 +99,7 @@ class StringTestCase(unittest.TestCase):
self.failUnlessEqual(cs.value, "XY")
self.failUnlessEqual(cs.raw, "XY\000\000\000\000\000")
self.assertRaises(TypeError, c_string, u"123")
self.assertRaises(TypeError, c_string, "123")
def XX_test_sized_strings(self):
......@@ -142,13 +142,13 @@ except NameError:
else:
class WStringTestCase(unittest.TestCase):
def test_wchar(self):
c_wchar(u"x")
repr(byref(c_wchar(u"x")))
c_wchar("x")
repr(byref(c_wchar("x")))
c_wchar("x")
def X_test_basic_wstrings(self):
cs = c_wstring(u"abcdef")
cs = c_wstring("abcdef")
# XXX This behaviour is about to change:
# len returns the size of the internal buffer in bytes.
......@@ -156,30 +156,30 @@ else:
self.failUnless(sizeof(cs) == 14)
# The value property is the string up to the first terminating NUL.
self.failUnless(cs.value == u"abcdef")
self.failUnless(c_wstring(u"abc\000def").value == u"abc")
self.failUnless(cs.value == "abcdef")
self.failUnless(c_wstring("abc\000def").value == "abc")
self.failUnless(c_wstring(u"abc\000def").value == u"abc")
self.failUnless(c_wstring("abc\000def").value == "abc")
# The raw property is the total buffer contents:
self.failUnless(cs.raw == u"abcdef\000")
self.failUnless(c_wstring(u"abc\000def").raw == u"abc\000def\000")
self.failUnless(cs.raw == "abcdef\000")
self.failUnless(c_wstring("abc\000def").raw == "abc\000def\000")
# We can change the value:
cs.value = u"ab"
self.failUnless(cs.value == u"ab")
self.failUnless(cs.raw == u"ab\000\000\000\000\000")
cs.value = "ab"
self.failUnless(cs.value == "ab")
self.failUnless(cs.raw == "ab\000\000\000\000\000")
self.assertRaises(TypeError, c_wstring, "123")
self.assertRaises(ValueError, c_wstring, 0)
def X_test_toolong(self):
cs = c_wstring(u"abcdef")
cs = c_wstring("abcdef")
# Much too long string:
self.assertRaises(ValueError, setattr, cs, "value", u"123456789012345")
self.assertRaises(ValueError, setattr, cs, "value", "123456789012345")
# One char too long values:
self.assertRaises(ValueError, setattr, cs, "value", u"1234567")
self.assertRaises(ValueError, setattr, cs, "value", "1234567")
def run_test(rep, msg, func, arg):
......
......@@ -269,15 +269,15 @@ class StructureTestCase(unittest.TestCase):
_fields_ = [("name", c_wchar * 12),
("age", c_int)]
p = PersonW(u"Someone")
p = PersonW("Someone")
self.failUnlessEqual(p.name, "Someone")
self.failUnlessEqual(PersonW(u"1234567890").name, u"1234567890")
self.failUnlessEqual(PersonW(u"12345678901").name, u"12345678901")
self.failUnlessEqual(PersonW("1234567890").name, "1234567890")
self.failUnlessEqual(PersonW("12345678901").name, "12345678901")
# exact fit
self.failUnlessEqual(PersonW(u"123456789012").name, u"123456789012")
self.failUnlessEqual(PersonW("123456789012").name, "123456789012")
#too long
self.assertRaises(ValueError, PersonW, u"1234567890123")
self.assertRaises(ValueError, PersonW, "1234567890123")
def test_init_errors(self):
class Phone(Structure):
......
......@@ -23,31 +23,31 @@ else:
def test_ascii_strict(self):
ctypes.set_conversion_mode("ascii", "strict")
# no conversions take place with unicode arguments
self.failUnlessEqual(wcslen(u"abc"), 3)
self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen("ab\u2070"), 3)
# string args are converted
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessRaises(ctypes.ArgumentError, wcslen, "ab")
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "replace")
self.failUnlessEqual(wcslen(u"abc"), 3)
self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen("ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen("ab"), 3)
def test_ascii_ignore(self):
ctypes.set_conversion_mode("ascii", "ignore")
self.failUnlessEqual(wcslen(u"abc"), 3)
self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen("ab\u2070"), 3)
# ignore error mode skips non-ascii characters
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen(""), 0)
def test_latin1_strict(self):
ctypes.set_conversion_mode("latin-1", "strict")
self.failUnlessEqual(wcslen(u"abc"), 3)
self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen("ab\u2070"), 3)
self.failUnlessEqual(wcslen("abc"), 3)
self.failUnlessEqual(wcslen(""), 4)
......@@ -58,12 +58,12 @@ else:
ctypes.set_conversion_mode("ascii", "replace")
buf = ctypes.create_unicode_buffer("ab")
self.failUnlessEqual(buf[:], u"ab\uFFFD\uFFFD\uFFFD\0")
self.failUnlessEqual(buf[:], "ab\uFFFD\uFFFD\uFFFD\0")
ctypes.set_conversion_mode("ascii", "ignore")
buf = ctypes.create_unicode_buffer("ab")
# is that correct? not sure. But with 'ignore', you get what you pay for..
self.failUnlessEqual(buf[:], u"ab\0\0\0\0")
self.failUnlessEqual(buf[:], "ab\0\0\0\0")
import _ctypes_test
func = ctypes.CDLL(_ctypes_test.__file__)._testfunc_p_p
......@@ -82,32 +82,32 @@ else:
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "strict")
self.failUnlessEqual(func("abc"), "abc")
self.failUnlessEqual(func(u"abc"), "abc")
self.assertRaises(ctypes.ArgumentError, func, u"ab")
self.failUnlessEqual(func("abc"), "abc")
self.assertRaises(ctypes.ArgumentError, func, "ab")
def test_ascii_ignore(self):
ctypes.set_conversion_mode("ascii", "ignore")
self.failUnlessEqual(func("abc"), "abc")
self.failUnlessEqual(func(u"abc"), "abc")
self.failUnlessEqual(func(u""), "")
self.failUnlessEqual(func("abc"), "abc")
self.failUnlessEqual(func(""), "")
def test_ascii_replace(self):
ctypes.set_conversion_mode("ascii", "replace")
self.failUnlessEqual(func("abc"), "abc")
self.failUnlessEqual(func(u"abc"), "abc")
self.failUnlessEqual(func(u""), "????")
self.failUnlessEqual(func("abc"), "abc")
self.failUnlessEqual(func(""), "????")
def test_buffers(self):
ctypes.set_conversion_mode("ascii", "strict")
buf = ctypes.create_string_buffer(u"abc")
buf = ctypes.create_string_buffer("abc")
self.failUnlessEqual(len(buf), 3+1)
ctypes.set_conversion_mode("ascii", "replace")
buf = ctypes.create_string_buffer(u"ab")
buf = ctypes.create_string_buffer("ab")
self.failUnlessEqual(buf[:], "ab???\0")
ctypes.set_conversion_mode("ascii", "ignore")
buf = ctypes.create_string_buffer(u"ab")
buf = ctypes.create_string_buffer("ab")
# is that correct? not sure. But with 'ignore', you get what you pay for..
self.failUnlessEqual(buf[:], "ab\0\0\0\0")
......
......@@ -247,11 +247,11 @@ class bdist_wininst (Command):
# Convert cfgdata from unicode to ascii, mbcs encoded
try:
unicode
str
except NameError:
pass
else:
if isinstance(cfgdata, unicode):
if isinstance(cfgdata, str):
cfgdata = cfgdata.encode("mbcs")
# Append the pre-install script
......
......@@ -147,7 +147,7 @@ class build_clib (Command):
raise DistutilsSetupError, \
"each element of 'libraries' must a 2-tuple"
if isinstance(lib[0], basestring) StringType:
if isinstance(lib[0], basestring):
raise DistutilsSetupError, \
"first element of each tuple in 'libraries' " + \
"must be a string (the library name)"
......
......@@ -259,7 +259,7 @@ Your selection [default 1]: ''', end=' ')
if type(value) not in (type([]), type( () )):
value = [value]
for value in value:
value = unicode(value).encode("utf-8")
value = str(value).encode("utf-8")
body.write(sep_boundary)
body.write('\nContent-Disposition: form-data; name="%s"'%key)
body.write("\n\n")
......
......@@ -196,7 +196,7 @@ def _normalize_module(module, depth=2):
"""
if inspect.ismodule(module):
return module
elif isinstance(module, (str, unicode)):
elif isinstance(module, (str, str)):
return __import__(module, globals(), locals(), ["*"])
elif module is None:
return sys.modules[sys._getframe(depth).f_globals['__name__']]
......
......@@ -202,10 +202,10 @@ class Charset:
# is already a unicode, we leave it at that, but ensure that the
# charset is ASCII, as the standard (RFC XXX) requires.
try:
if isinstance(input_charset, unicode):
if isinstance(input_charset, str):
input_charset.encode('ascii')
else:
input_charset = unicode(input_charset, 'ascii')
input_charset = str(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
input_charset = input_charset.lower()
......@@ -264,7 +264,7 @@ class Charset:
def convert(self, s):
"""Convert a string from the input_codec to the output_codec."""
if self.input_codec != self.output_codec:
return unicode(s, self.input_codec).encode(self.output_codec)
return str(s, self.input_codec).encode(self.output_codec)
else:
return s
......@@ -281,10 +281,10 @@ class Charset:
Characters that could not be converted to Unicode will be replaced
with the Unicode replacement character U+FFFD.
"""
if isinstance(s, unicode) or self.input_codec is None:
if isinstance(s, str) or self.input_codec is None:
return s
try:
return unicode(s, self.input_codec, 'replace')
return str(s, self.input_codec, 'replace')
except LookupError:
# Input codec not installed on system, so return the original
# string unchanged.
......@@ -307,7 +307,7 @@ class Charset:
codec = self.output_codec
else:
codec = self.input_codec
if not isinstance(ustr, unicode) or codec is None:
if not isinstance(ustr, str) or codec is None:
return ustr
try:
return ustr.encode(codec, 'replace')
......
......@@ -23,7 +23,7 @@ fcre = re.compile(r'^From ', re.MULTILINE)
def _is8bitstring(s):
if isinstance(s, str):
try:
unicode(s, 'us-ascii')
str(s, 'us-ascii')
except UnicodeError:
return True
return False
......
......@@ -21,9 +21,9 @@ from email.charset import Charset
NL = '\n'
SPACE = ' '
USPACE = u' '
USPACE = ' '
SPACE8 = ' ' * 8
UEMPTYSTRING = u''
UEMPTYSTRING = ''
MAXLINELEN = 76
......@@ -210,7 +210,7 @@ class Header:
elif nextcs not in (None, 'us-ascii'):
uchunks.append(USPACE)
lastcs = nextcs
uchunks.append(unicode(s, str(charset)))
uchunks.append(str(s, str(charset)))
return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
......@@ -257,13 +257,13 @@ class Header:
# Possibly raise UnicodeError if the byte string can't be
# converted to a unicode with the input codec of the charset.
incodec = charset.input_codec or 'us-ascii'
ustr = unicode(s, incodec, errors)
ustr = str(s, incodec, errors)
# Now make sure that the unicode could be converted back to a
# byte string with the output codec, which may be different
# than the iput coded. Still, use the original byte string.
outcodec = charset.output_codec or 'us-ascii'
ustr.encode(outcodec, errors)
elif isinstance(s, unicode):
elif isinstance(s, str):
# Now we have to be sure the unicode string can be converted
# to a byte string with a reasonable output codec. We want to
# use the byte string in the chunk.
......
......@@ -751,13 +751,13 @@ class Message:
# LookupError will be raised if the charset isn't known to
# Python. UnicodeError will be raised if the encoded text
# contains a character not in the charset.
charset = unicode(charset[2], pcharset).encode('us-ascii')
charset = str(charset[2], pcharset).encode('us-ascii')
except (LookupError, UnicodeError):
charset = charset[2]
# charset character must be in us-ascii range
try:
if isinstance(charset, str):
charset = unicode(charset, 'us-ascii')
charset = str(charset, 'us-ascii')
charset = charset.encode('us-ascii')
except UnicodeError:
return failobj
......
......@@ -505,7 +505,7 @@ class TestMessageAPI(TestEmailBase):
msg = Message()
msg.set_charset('us-ascii')
self.assertEqual('us-ascii', msg.get_content_charset())
msg.set_charset(u'us-ascii')
msg.set_charset('us-ascii')
self.assertEqual('us-ascii', msg.get_content_charset())
......@@ -583,7 +583,7 @@ bug demonstration
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g, header_name='Subject')
h.append(cz_head, cz)
h.append(utf8_head, utf8)
......@@ -1514,7 +1514,7 @@ class TestRFC2047(unittest.TestCase):
s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
dh = decode_header(s)
eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
hu = unicode(make_header(dh)).encode('latin-1')
hu = str(make_header(dh)).encode('latin-1')
eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
def test_whitespace_eater_unicode_2(self):
......@@ -1524,7 +1524,7 @@ class TestRFC2047(unittest.TestCase):
eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
('jumped over the', None), ('lazy dog', 'iso-8859-1')])
hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog')
eq(hu, 'The quick brown fox jumped over the lazy dog')
def test_rfc2047_without_whitespace(self):
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
......@@ -2770,7 +2770,7 @@ class TestCharset(unittest.TestCase):
eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
def test_unicode_charset_name(self):
charset = Charset(u'us-ascii')
charset = Charset('us-ascii')
self.assertEqual(str(charset), 'us-ascii')
self.assertRaises(Errors.CharsetError, Charset, 'asc\xffii')
......@@ -2809,7 +2809,7 @@ class TestHeader(TestEmailBase):
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g)
h.append(cz_head, cz)
h.append(utf8_head, utf8)
......@@ -2829,7 +2829,7 @@ class TestHeader(TestEmailBase):
eq(decode_header(enc),
[(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
(utf8_head, "utf-8")])
ustr = unicode(h)
ustr = str(h)
eq(ustr.encode('utf-8'),
'Die Mieter treten hier ein werden mit einem Foerderband '
'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
......@@ -2897,9 +2897,9 @@ A very long line that must get split to something other than at the
def test_utf8_shortest(self):
eq = self.assertEqual
h = Header(u'p\xf6stal', 'utf-8')
h = Header('p\xf6stal', 'utf-8')
eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
def test_bad_8bit_header(self):
......@@ -3152,7 +3152,7 @@ Content-Disposition: inline;
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
u'This is even more ***fun*** is it not.pdf\ufffd')
'This is even more ***fun*** is it not.pdf\ufffd')
def test_rfc2231_unknown_encoding(self):
m = """\
......
......@@ -13,7 +13,7 @@ from email.Message import Message
# We're compatible with Python 2.3, but it doesn't have the built-in Asian
# codecs, so we have to skip all these tests.
try:
unicode('foo', 'euc-jp')
str('foo', 'euc-jp')
except LookupError:
raise TestSkipped
......@@ -57,7 +57,7 @@ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
jcode = 'euc-jp'
msg = Message()
msg.set_payload(jhello, jcode)
ustr = unicode(msg.get_payload(), msg.get_content_charset())
ustr = str(msg.get_payload(), msg.get_content_charset())
self.assertEqual(jhello, ustr.encode(jcode))
......
......@@ -13,7 +13,7 @@ from email.message import Message
# We're compatible with Python 2.3, but it doesn't have the built-in Asian
# codecs, so we have to skip all these tests.
try:
unicode('foo', 'euc-jp')
str('foo', 'euc-jp')
except LookupError:
raise TestSkipped
......@@ -57,7 +57,7 @@ Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
jcode = 'euc-jp'
msg = Message()
msg.set_payload(jhello, jcode)
ustr = unicode(msg.get_payload(), msg.get_content_charset())
ustr = str(msg.get_payload(), msg.get_content_charset())
self.assertEqual(jhello, ustr.encode(jcode))
......
......@@ -564,7 +564,7 @@ bug demonstration
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g, header_name='Subject')
h.append(cz_head, cz)
h.append(utf8_head, utf8)
......@@ -1512,7 +1512,7 @@ class TestRFC2047(unittest.TestCase):
s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
dh = decode_header(s)
eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)])
hu = unicode(make_header(dh)).encode('latin-1')
hu = str(make_header(dh)).encode('latin-1')
eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>')
def test_whitespace_eater_unicode_2(self):
......@@ -1522,7 +1522,7 @@ class TestRFC2047(unittest.TestCase):
eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
('jumped over the', None), ('lazy dog', 'iso-8859-1')])
hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog')
eq(hu, 'The quick brown fox jumped over the lazy dog')
def test_rfc2047_missing_whitespace(self):
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
......@@ -2769,7 +2769,7 @@ class TestCharset(unittest.TestCase):
eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
def test_unicode_charset_name(self):
charset = Charset(u'us-ascii')
charset = Charset('us-ascii')
self.assertEqual(str(charset), 'us-ascii')
self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
......@@ -2808,7 +2808,7 @@ class TestHeader(TestEmailBase):
utf8 = Charset("utf-8")
g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
h = Header(g_head, g)
h.append(cz_head, cz)
h.append(utf8_head, utf8)
......@@ -2828,7 +2828,7 @@ class TestHeader(TestEmailBase):
eq(decode_header(enc),
[(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
(utf8_head, "utf-8")])
ustr = unicode(h)
ustr = str(h)
eq(ustr.encode('utf-8'),
'Die Mieter treten hier ein werden mit einem Foerderband '
'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
......@@ -2896,9 +2896,9 @@ A very long line that must get split to something other than at the
def test_utf8_shortest(self):
eq = self.assertEqual
h = Header(u'p\xf6stal', 'utf-8')
h = Header('p\xf6stal', 'utf-8')
eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
def test_bad_8bit_header(self):
......@@ -3151,7 +3151,7 @@ Content-Disposition: inline;
'''
msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(),
u'This is even more ***fun*** is it not.pdf\ufffd')
'This is even more ***fun*** is it not.pdf\ufffd')
def test_rfc2231_unknown_encoding(self):
m = """\
......
......@@ -44,7 +44,7 @@ from email.encoders import _bencode, _qencode
COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = u''
UEMPTYSTRING = ''
CRLF = '\r\n'
TICK = "'"
......@@ -315,9 +315,9 @@ def collapse_rfc2231_value(value, errors='replace',
rawval = unquote(value[2])
charset = value[0] or 'us-ascii'
try:
return unicode(rawval, charset, errors)
return str(rawval, charset, errors)
except LookupError:
# XXX charset is unknown to Python.
return unicode(rawval, fallback_charset, errors)
return str(rawval, fallback_charset, errors)
else:
return unquote(value)
......@@ -60,7 +60,7 @@ def normalize_encoding(encoding):
"""
# Make sure we have an 8-bit string, because .translate() works
# differently for Unicode strings.
if isinstance(encoding, unicode):
if isinstance(encoding, str):
# Note that .encode('latin-1') does *not* use the codec
# registry, so this call doesn't recurse. (See unicodeobject.c
# PyUnicode_AsEncodedString() for details)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -189,7 +189,7 @@ def punycode_decode(text, errors):
else:
base = text[:pos]
extended = text[pos+1:]
base = unicode(base, "ascii", errors)
base = str(base, "ascii", errors)
extended = extended.upper()
return insertion_sort(base, extended, errors)
......
This diff is collapsed.
......@@ -57,7 +57,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
if codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this really is a BOM
# => try again on the next call
return (u"", 0)
return ("", 0)
else:
self.first = 0
else:
......@@ -106,7 +106,7 @@ class StreamReader(codecs.StreamReader):
if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
# not enough data to decide if this is a BOM
# => try again on the next call
return (u"", 0)
return ("", 0)
self.decode = codecs.utf_8_decode
return decode(input, errors)
......
This diff is collapsed.
......@@ -49,8 +49,8 @@ def iglob(pathname):
def glob1(dirname, pattern):
if not dirname:
dirname = os.curdir
if isinstance(pattern, unicode) and not isinstance(dirname, unicode):
dirname = unicode(dirname, sys.getfilesystemencoding() or
if isinstance(pattern, str) and not isinstance(dirname, str):
dirname = str(dirname, sys.getfilesystemencoding() or
sys.getdefaultencoding())
try:
names = os.listdir(dirname)
......
......@@ -276,7 +276,7 @@ class EditorWindow(object):
def _filename_to_unicode(self, filename):
"""convert filename to unicode in order to display it in Tk"""
if isinstance(filename, unicode) or not filename:
if isinstance(filename, str) or not filename:
return filename
else:
try:
......
......@@ -255,7 +255,7 @@ class IOBinding:
firsteol = self.eol_re.search(chars)
if firsteol:
self.eol_convention = firsteol.group(0)
if isinstance(self.eol_convention, unicode):
if isinstance(self.eol_convention, str):
# Make sure it is an ASCII string
self.eol_convention = self.eol_convention.encode("ascii")
chars = self.eol_re.sub(r"\n", chars)
......@@ -298,18 +298,18 @@ class IOBinding:
enc = None
if enc:
try:
return unicode(chars, enc)
return str(chars, enc)
except UnicodeError:
pass
# If it is ASCII, we need not to record anything
try:
return unicode(chars, 'ascii')
return str(chars, 'ascii')
except UnicodeError:
pass
# Finally, try the locale's encoding. This is deprecated;
# the user should declare a non-ASCII encoding
try:
chars = unicode(chars, encoding)
chars = str(chars, encoding)
self.fileencoding = encoding
except UnicodeError:
pass
......@@ -522,7 +522,7 @@ class IOBinding:
self.opendialog = tkFileDialog.Open(master=self.text,
filetypes=self.filetypes)
filename = self.opendialog.show(initialdir=dir, initialfile=base)
if isinstance(filename, unicode):
if isinstance(filename, str):
filename = filename.encode(filesystemencoding)
return filename
......@@ -544,7 +544,7 @@ class IOBinding:
self.savedialog = tkFileDialog.SaveAs(master=self.text,
filetypes=self.filetypes)
filename = self.savedialog.show(initialdir=dir, initialfile=base)
if isinstance(filename, unicode):
if isinstance(filename, str):
filename = filename.encode(filesystemencoding)
return filename
......
......@@ -39,7 +39,7 @@ class OutputWindow(EditorWindow):
# we assume that they are in the locale's encoding
if isinstance(s, str):
try:
s = unicode(s, IOBinding.encoding)
s = str(s, IOBinding.encoding)
except UnicodeError:
# some other encoding; let Tcl deal with it
pass
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment