Commit 572dbf8f authored by Guido van Rossum's avatar Guido van Rossum

Checkpoint. Manipulated things so that string literals are always

unicode, and a few other compensating changes, e.g. str <- unicode,
chr <- unichr, and repr() of a unicode string no longer starts
with 'u'.  Lots of unit tests are broken, but some basic things
work, in particular distutils works so the extensions can be built,
and test_builtin.py works.
parent d4617f24
......@@ -14,7 +14,6 @@ PyAPI_DATA(int) Py_NoSiteFlag;
PyAPI_DATA(int) Py_UseClassExceptionsFlag;
PyAPI_DATA(int) Py_FrozenFlag;
PyAPI_DATA(int) Py_TabcheckFlag;
PyAPI_DATA(int) Py_UnicodeFlag;
PyAPI_DATA(int) Py_IgnoreEnvironmentFlag;
PyAPI_DATA(int) Py_DivisionWarningFlag;
......
......@@ -168,7 +168,7 @@ class CCompiler:
# set_executables ()
def set_executable(self, key, value):
if type(value) is StringType:
if isinstance(value, basestring):
setattr(self, key, split_quoted(value))
else:
setattr(self, key, value)
......@@ -193,8 +193,8 @@ class CCompiler:
if not (type (defn) is TupleType and
(len (defn) == 1 or
(len (defn) == 2 and
(type (defn[1]) is StringType or defn[1] is None))) and
type (defn[0]) is StringType):
(isinstance (defn[1], basestring) or defn[1] is None))) and
isinstance (defn[0], basestring)):
raise TypeError, \
("invalid macro definition '%s': " % defn) + \
"must be tuple (string,), (string, string), or " + \
......@@ -344,7 +344,7 @@ class CCompiler:
"""
if outdir is None:
outdir = self.output_dir
elif type(outdir) is not StringType:
elif not isinstance(outdir, basestring):
raise TypeError, "'output_dir' must be a string or None"
if macros is None:
......@@ -442,7 +442,7 @@ class CCompiler:
"""
if output_dir is None:
output_dir = self.output_dir
elif type (output_dir) is not StringType:
elif not isinstance(output_dir, basestring):
raise TypeError, "'output_dir' must be a string or None"
if macros is None:
......@@ -527,7 +527,7 @@ class CCompiler:
if output_dir is None:
output_dir = self.output_dir
elif type (output_dir) is not StringType:
elif not isinstance(output_dir, basestring):
raise TypeError, "'output_dir' must be a string or None"
return (objects, output_dir)
......
......@@ -222,7 +222,7 @@ class Command:
if val is None:
setattr(self, option, default)
return default
elif type(val) is not StringType:
elif not isinstance(val, basestring):
raise DistutilsOptionError, \
"'%s' must be a %s (got `%s`)" % (option, what, val)
return val
......@@ -242,12 +242,11 @@ class Command:
val = getattr(self, option)
if val is None:
return
elif type(val) is StringType:
elif isinstance(val, basestring):
setattr(self, option, re.split(r',\s*|\s+', val))
else:
if type(val) is ListType:
types = map(type, val)
ok = (types == [StringType] * len(val))
ok = all(isinstance(v, basestring) for v in val)
else:
ok = 0
......@@ -421,7 +420,7 @@ class Command:
# Allow 'infiles' to be a single string
if type(infiles) is StringType:
if isinstance(infiles, basestring):
infiles = (infiles,)
elif type(infiles) not in (ListType, TupleType):
raise TypeError, \
......
......@@ -92,7 +92,7 @@ class build_clib (Command):
if self.include_dirs is None:
self.include_dirs = self.distribution.include_dirs or []
if type(self.include_dirs) is StringType:
if isinstance(self.include_dirs, basestring):
self.include_dirs = self.include_dirs.split(os.pathsep)
# XXX same as for build_ext -- what about 'self.define' and
......@@ -147,7 +147,7 @@ class build_clib (Command):
raise DistutilsSetupError, \
"each element of 'libraries' must a 2-tuple"
if type(lib[0]) is not StringType:
if isinstance(lib[0], basestring) StringType:
raise DistutilsSetupError, \
"first element of each tuple in 'libraries' " + \
"must be a string (the library name)"
......
......@@ -137,7 +137,7 @@ class build_ext (Command):
plat_py_include = sysconfig.get_python_inc(plat_specific=1)
if self.include_dirs is None:
self.include_dirs = self.distribution.include_dirs or []
if type(self.include_dirs) is StringType:
if isinstance(self.include_dirs, basestring):
self.include_dirs = self.include_dirs.split(os.pathsep)
# Put the Python "system" include dir at the end, so that
......@@ -146,7 +146,7 @@ class build_ext (Command):
if plat_py_include != py_include:
self.include_dirs.append(plat_py_include)
if type(self.libraries) is StringType:
if isinstance(self.libraries, basestring):
self.libraries = [self.libraries]
# Life is easier if we're not forever checking for None, so
......@@ -155,12 +155,12 @@ class build_ext (Command):
self.libraries = []
if self.library_dirs is None:
self.library_dirs = []
elif type(self.library_dirs) is StringType:
elif isinstance(self.library_dirs, basestring):
self.library_dirs = self.library_dirs.split(os.pathsep)
if self.rpath is None:
self.rpath = []
elif type(self.rpath) is StringType:
elif isinstance(self.rpath, basestring):
self.rpath = self.rpath.split(os.pathsep)
# for extensions under windows use different directories
......@@ -321,7 +321,7 @@ class build_ext (Command):
("each element of 'ext_modules' option must be an "
"Extension instance or 2-tuple")
if not (type(ext_name) is StringType and
if not (isinstance(ext_name, basestring) and
extension_name_re.match(ext_name)):
raise DistutilsSetupError, \
("first element of each tuple in 'ext_modules' "
......
......@@ -361,7 +361,7 @@ class build_py (Command):
def build_module (self, module, module_file, package):
if type(package) is StringType:
if isinstance(package, basestring):
package = package.split('.')
elif type(package) not in (ListType, TupleType):
raise TypeError, \
......
......@@ -73,17 +73,17 @@ class config (Command):
def finalize_options (self):
if self.include_dirs is None:
self.include_dirs = self.distribution.include_dirs or []
elif type(self.include_dirs) is StringType:
elif isinstance(self.include_dirs, basestring):
self.include_dirs = self.include_dirs.split(os.pathsep)
if self.libraries is None:
self.libraries = []
elif type(self.libraries) is StringType:
elif isinstance(self.libraries, basestring):
self.libraries = [self.libraries]
if self.library_dirs is None:
self.library_dirs = []
elif type(self.library_dirs) is StringType:
elif isinstance(self.library_dirs, basestring):
self.library_dirs = self.library_dirs.split(os.pathsep)
......@@ -212,7 +212,7 @@ class config (Command):
self._check_compiler()
(src, out) = self._preprocess(body, headers, include_dirs, lang)
if type(pattern) is StringType:
if isinstance(pattern, basestring):
pattern = re.compile(pattern)
file = open(out)
......
......@@ -463,7 +463,7 @@ class install (Command):
self.extra_path = self.distribution.extra_path
if self.extra_path is not None:
if type(self.extra_path) is StringType:
if isinstance(self.extra_path, basestring):
self.extra_path = self.extra_path.split(',')
if len(self.extra_path) == 1:
......
......@@ -10,7 +10,6 @@ platform-independent data files."""
__revision__ = "$Id$"
import os
from types import StringType
from distutils.core import Command
from distutils.util import change_root, convert_path
......@@ -48,7 +47,7 @@ class install_data (Command):
def run (self):
self.mkpath(self.install_dir)
for f in self.data_files:
if type(f) is StringType:
if isinstance(f, basestring):
# it's a simple file, so copy it
f = convert_path(f)
if self.warn_dir:
......
......@@ -31,7 +31,7 @@ def mkpath (name, mode=0777, verbose=0, dry_run=0):
global _path_created
# Detect a common bug -- name is None
if not isinstance(name, StringTypes):
if not isinstance(name, basestring):
raise DistutilsInternalError, \
"mkpath: 'name' must be a string (got %r)" % (name,)
......
......@@ -598,13 +598,13 @@ Common commands: (see '--help-commands' for more)
keywords = self.metadata.keywords
if keywords is not None:
if type(keywords) is StringType:
if isinstance(keywords, basestring):
keywordlist = keywords.split(',')
self.metadata.keywords = [x.strip() for x in keywordlist]
platforms = self.metadata.platforms
if platforms is not None:
if type(platforms) is StringType:
if isinstance(platforms, basestring):
platformlist = platforms.split(',')
self.metadata.platforms = [x.strip() for x in platformlist]
......@@ -906,7 +906,7 @@ Common commands: (see '--help-commands' for more)
neg_opt = {}
try:
is_string = type(value) is StringType
is_string = isinstance(value, basestring)
if option in neg_opt and is_string:
setattr(command_obj, neg_opt[option], not strtobool(value))
elif option in bool_opts and is_string:
......
......@@ -103,9 +103,9 @@ class Extension:
language=None,
**kw # To catch unknown keywords
):
assert type(name) is StringType, "'name' must be a string"
assert isinstance(name, basestring), "'name' must be a string"
assert (type(sources) is ListType and
map(type, sources) == [StringType]*len(sources)), \
all(isinstance(v, basestring) for v in sources)), \
"'sources' must be a list of strings"
self.name = name
......
......@@ -166,13 +166,13 @@ class FancyGetopt:
raise ValueError, "invalid option tuple: %r" % (option,)
# Type- and value-check the option names
if type(long) is not StringType or len(long) < 2:
if not isinstance(long, basestring) or len(long) < 2:
raise DistutilsGetoptError, \
("invalid long option '%s': "
"must be a string of length >= 2") % long
if (not ((short is None) or
(type(short) is StringType and len(short) == 1))):
(isinstance(short, basestring) and len(short) == 1))):
raise DistutilsGetoptError, \
("invalid short option '%s': "
"must a single character or None") % short
......
......@@ -333,7 +333,7 @@ def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0):
or just returned as-is (assumes it's a regex object).
"""
if is_regex:
if type(pattern) is StringType:
if isinstance(pattern, basestring):
return re.compile(pattern)
else:
return pattern
......
......@@ -16,7 +16,7 @@ the "typical" Unix-style command-line C compiler:
__revision__ = "$Id$"
import os, sys
from types import StringType, NoneType
from types import NoneType
from copy import copy
from distutils import sysconfig
......@@ -212,7 +212,7 @@ class UnixCCompiler(CCompiler):
lib_opts = gen_lib_options(self, library_dirs, runtime_library_dirs,
libraries)
if type(output_dir) not in (StringType, NoneType):
if not isinstance(output_dir, (basestring, NoneType)):
raise TypeError, "'output_dir' must be a string or None"
if output_dir is not None:
output_filename = os.path.join(output_dir, output_filename)
......
......@@ -470,7 +470,7 @@ def setlocale(category, locale=None):
category may be given as one of the LC_* values.
"""
if locale and type(locale) is not type(""):
if locale and not isinstance(locale, basestring):
# convert to string
locale = normalize(_build_localename(locale))
return _setlocale(category, locale)
......
......@@ -733,8 +733,8 @@ if not _exists("urandom"):
_urandomfd = open("/dev/urandom", O_RDONLY)
except (OSError, IOError):
raise NotImplementedError("/dev/urandom (or equivalent) not found")
bytes = ""
while len(bytes) < n:
bytes += read(_urandomfd, n - len(bytes))
bs = b""
while len(bs) < n:
bs += read(_urandomfd, n - len(bs))
close(_urandomfd)
return bytes
return bs
......@@ -470,18 +470,8 @@ def _compile_info(code, pattern, flags):
_compile_charset(charset, flags, code)
code[skip] = len(code) - skip
try:
unicode
except NameError:
STRING_TYPES = (type(""),)
else:
STRING_TYPES = (type(""), type(unicode("")))
def isstring(obj):
for tp in STRING_TYPES:
if isinstance(obj, tp):
return 1
return 0
return isinstance(obj, basestring)
def _code(p, flags):
......
......@@ -82,7 +82,7 @@ if have_unicode:
(unicode('100'), 100),
(unicode('314'), 314),
(unicode(' 314'), 314),
(unicode('\u0663\u0661\u0664 ','raw-unicode-escape'), 314),
(unicode(b'\u0663\u0661\u0664 ','raw-unicode-escape'), 314),
(unicode(' \t\t 314 \t\t '), 314),
(unicode(' 1x'), ValueError),
(unicode(' 1 '), 1),
......@@ -185,7 +185,7 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(chr(65), 'A')
self.assertEqual(chr(97), 'a')
self.assertEqual(chr(0xff), '\xff')
self.assertRaises(ValueError, chr, 256)
self.assertRaises(ValueError, chr, 1<<24)
self.assertRaises(TypeError, chr)
def XXX_test_cmp(self):
......@@ -209,7 +209,7 @@ class BuiltinTest(unittest.TestCase):
def test_compile(self):
compile('print(1)\n', '', 'exec')
bom = '\xef\xbb\xbf'
compile(bom + 'print(1)\n', '', 'exec')
compile((bom + 'print(1)\n').encode("latin-1"), '', 'exec')
compile(source='pass', filename='?', mode='exec')
compile(dont_inherit=0, filename='tmp', source='0', mode='eval')
compile('pass', '?', dont_inherit=1, mode='exec')
......@@ -220,7 +220,7 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
mode='eval', source='0', filename='tmp')
if have_unicode:
compile(unicode('print(u"\xc3\xa5")\n', 'utf8'), '', 'exec')
compile(unicode(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec')
self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec')
self.assertRaises(ValueError, compile, unicode('a = 1'), 'f', 'bad')
......@@ -339,9 +339,9 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(eval(unicode('b'), globals, locals), 200)
self.assertEqual(eval(unicode('c'), globals, locals), 300)
bom = '\xef\xbb\xbf'
self.assertEqual(eval(bom + 'a', globals, locals), 1)
self.assertEqual(eval(unicode('u"\xc3\xa5"', 'utf8'), globals),
unicode('\xc3\xa5', 'utf8'))
self.assertEqual(eval((bom + 'a').encode("latin-1"), globals, locals), 1)
self.assertEqual(eval(unicode(b'u"\xc3\xa5"', 'utf8'), globals),
unicode(b'\xc3\xa5', 'utf8'))
self.assertRaises(TypeError, eval)
self.assertRaises(TypeError, eval, ())
......@@ -608,7 +608,7 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(ValueError, float, " -0x3.p-1 ")
if have_unicode:
self.assertEqual(float(unicode(" 3.14 ")), 3.14)
self.assertEqual(float(unicode(" \u0663.\u0661\u0664 ",'raw-unicode-escape')), 3.14)
self.assertEqual(float(unicode(b" \u0663.\u0661\u0664 ",'raw-unicode-escape')), 3.14)
# Implementation limitation in PyFloat_FromString()
self.assertRaises(ValueError, float, unicode("1"*10000))
......@@ -1673,7 +1673,7 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(unichr(97), unicode('a'))
self.assertEqual(
unichr(sys.maxunicode),
unicode('\\U%08x' % (sys.maxunicode), 'unicode-escape')
unicode(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
)
self.assertRaises(ValueError, unichr, sys.maxunicode+1)
self.assertRaises(TypeError, unichr)
......
......@@ -6163,7 +6163,8 @@ Read a file descriptor.");
static PyObject *
posix_read(PyObject *self, PyObject *args)
{
int fd, size, n;
int fd, size;
Py_ssize_t n;
PyObject *buffer;
if (!PyArg_ParseTuple(args, "ii:read", &fd, &size))
return NULL;
......@@ -6171,18 +6172,18 @@ posix_read(PyObject *self, PyObject *args)
errno = EINVAL;
return posix_error();
}
buffer = PyString_FromStringAndSize((char *)NULL, size);
buffer = PyBytes_FromStringAndSize((char *)NULL, size);
if (buffer == NULL)
return NULL;
Py_BEGIN_ALLOW_THREADS
n = read(fd, PyString_AsString(buffer), size);
n = read(fd, PyBytes_AsString(buffer), size);
Py_END_ALLOW_THREADS
if (n < 0) {
Py_DECREF(buffer);
return posix_error();
}
if (n != size)
_PyString_Resize(&buffer, n);
PyBytes_Resize(buffer, n);
return buffer;
}
......@@ -8841,5 +8842,3 @@ INITFUNC(void)
#ifdef __cplusplus
}
#endif
......@@ -59,6 +59,7 @@ PyModule_GetName(PyObject *m)
{
PyObject *d;
PyObject *nameobj;
char *s;
if (!PyModule_Check(m)) {
PyErr_BadArgument();
return NULL;
......@@ -66,11 +67,13 @@ PyModule_GetName(PyObject *m)
d = ((PyModuleObject *)m)->md_dict;
if (d == NULL ||
(nameobj = PyDict_GetItemString(d, "__name__")) == NULL ||
!PyString_Check(nameobj))
!(PyString_Check(nameobj) || PyUnicode_Check(nameobj)))
{
PyErr_SetString(PyExc_SystemError, "nameless module");
return NULL;
}
if (PyUnicode_Check(nameobj))
nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, "replace");
return PyString_AsString(nameobj);
}
......
......@@ -2072,7 +2072,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
p = PyString_AS_STRING(repr);
if (quotes) {
*p++ = 'u';
*p++ = (findchar(s, size, '\'') &&
!findchar(s, size, '"')) ? '"' : '\'';
}
......@@ -2081,7 +2080,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
/* Escape quotes and backslashes */
if ((quotes &&
ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') {
ch == (Py_UNICODE) PyString_AS_STRING(repr)[0]) || ch == '\\') {
*p++ = '\\';
*p++ = (char) ch;
continue;
......@@ -2167,7 +2166,7 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
*p++ = (char) ch;
}
if (quotes)
*p++ = PyString_AS_STRING(repr)[1];
*p++ = PyString_AS_STRING(repr)[0];
*p = '\0';
_PyString_Resize(&repr, p - PyString_AS_STRING(repr));
......
......@@ -3187,7 +3187,7 @@ parsestr(const node *n, const char *encoding, int *bytesmode)
}
}
#ifdef Py_USING_UNICODE
if (unicode || Py_UnicodeFlag) {
if (!*bytesmode) {
return decode_unicode(s, len, rawmode, encoding);
}
#endif
......
......@@ -2270,7 +2270,7 @@ static PyMethodDef builtin_methods[] = {
{"all", builtin_all, METH_O, all_doc},
{"any", builtin_any, METH_O, any_doc},
{"callable", builtin_callable, METH_O, callable_doc},
{"chr", builtin_chr, METH_VARARGS, chr_doc},
{"chr", builtin_unichr, METH_VARARGS, chr_doc},
{"cmp", builtin_cmp, METH_VARARGS, cmp_doc},
{"compile", (PyCFunction)builtin_compile, METH_VARARGS | METH_KEYWORDS, compile_doc},
{"delattr", builtin_delattr, METH_VARARGS, delattr_doc},
......@@ -2375,7 +2375,7 @@ _PyBuiltin_Init(void)
SETBUILTIN("set", &PySet_Type);
SETBUILTIN("slice", &PySlice_Type);
SETBUILTIN("staticmethod", &PyStaticMethod_Type);
SETBUILTIN("str", &PyString_Type);
SETBUILTIN("str", &PyUnicode_Type);
SETBUILTIN("super", &PySuper_Type);
SETBUILTIN("tuple", &PyTuple_Type);
SETBUILTIN("type", &PyType_Type);
......
......@@ -2633,7 +2633,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
PyObject *keyword = kws[2*i];
PyObject *value = kws[2*i + 1];
int j;
if (keyword == NULL || !PyString_Check(keyword)) {
if (keyword == NULL || !(PyString_Check(keyword) || PyUnicode_Check(keyword))) {
PyErr_Format(PyExc_TypeError,
"%.200s() keywords must be strings",
PyString_AsString(co->co_name));
......
......@@ -1081,7 +1081,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
case 'S': { /* string object */
PyObject **p = va_arg(*p_va, PyObject **);
if (PyString_Check(arg))
if (PyString_Check(arg) || PyUnicode_Check(arg))
*p = arg;
else
return converterr("string", arg, msgbuf, bufsize);
......@@ -1531,7 +1531,7 @@ vgetargskeywords(PyObject *args, PyObject *keywords, const char *format,
while (PyDict_Next(keywords, &pos, &key, &value)) {
int match = 0;
char *ks;
if (!PyString_Check(key)) {
if (!PyString_Check(key) && !PyUnicode_Check(key)) {
PyErr_SetString(PyExc_TypeError,
"keywords must be strings");
return cleanreturn(0, freelist);
......
......@@ -154,7 +154,7 @@ _PyImport_Init(void)
}
}
if (Py_UnicodeFlag) {
{
/* Fix the pyc_magic so that byte compiled code created
using the all-Unicode method doesn't interfere with
code created in normal operation mode. */
......
......@@ -76,7 +76,6 @@ int Py_InspectFlag; /* Needed to determine whether to exit at SystemError */
int Py_NoSiteFlag; /* Suppress 'import site' */
int Py_UseClassExceptionsFlag = 1; /* Needed by bltinmodule.c: deprecated */
int Py_FrozenFlag; /* Needed by getpath.c */
int Py_UnicodeFlag = 0; /* Needed by compile.c */
int Py_IgnoreEnvironmentFlag; /* e.g. PYTHONPATH, PYTHONHOME */
/* Reference to 'warnings' module, to avoid importing it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment