More coding by random modification.

Encoding now return bytes instead of str8. eval(), exec(), compile() now accept unicode or bytes.

More coding by random modification.
Encoding now return bytes instead of str8. eval(), exec(), compile() now accept unicode or bytes.
f15a29f9 · Guido van Rossum · bae5cedb · f15a29f9 · f15a29f9 · f15a29f9
Commit f15a29f9 authored May 04, 2007 by Guido van Rossum
12 changed files
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -208,8 +208,8 @@ class BuiltinTest(unittest.TestCase):

    def test_compile(self):
        compile('print(1)\n', '', 'exec')
-        bom = '\xef\xbb\xbf'
-        compile((bom + 'print(1)\n').encode("latin-1"), '', 'exec')
+##         bom = b'\xef\xbb\xbf'
+##         compile(bom + b'print(1)\n', '', 'exec')
        compile(source='pass', filename='?', mode='exec')
        compile(dont_inherit=0, filename='tmp', source='0', mode='eval')
        compile('pass', '?', dont_inherit=1, mode='exec')
@@ -220,7 +220,7 @@ class BuiltinTest(unittest.TestCase):
        self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
                          mode='eval', source='0', filename='tmp')
        if have_unicode:
-            compile(str(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec')
+            compile('print(u"\xe5")\n', '', 'exec')
            self.assertRaises(TypeError, compile, chr(0), 'f', 'exec')
            self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')

@@ -338,10 +338,9 @@ class BuiltinTest(unittest.TestCase):
            self.assertEqual(eval(str('a'), globals, locals), 1)
            self.assertEqual(eval(str('b'), globals, locals), 200)
            self.assertEqual(eval(str('c'), globals, locals), 300)
-            bom = '\xef\xbb\xbf'
-            self.assertEqual(eval((bom + 'a').encode("latin-1"), globals, locals), 1)
-            self.assertEqual(eval(str(b'u"\xc3\xa5"', 'utf8'), globals),
-                             str(b'\xc3\xa5', 'utf8'))
+##             bom = b'\xef\xbb\xbf'
+##             self.assertEqual(eval(bom + b'a', globals, locals), 1)
+            self.assertEqual(eval('u"\xe5"', globals), u"\xe5")
        self.assertRaises(TypeError, eval)
        self.assertRaises(TypeError, eval, ())

@@ -675,16 +674,14 @@ class BuiltinTest(unittest.TestCase):
        self.assertRaises(TypeError, getattr, sys, 1)
        self.assertRaises(TypeError, getattr, sys, 1, "foo")
        self.assertRaises(TypeError, getattr)
-        if have_unicode:
-            self.assertRaises(UnicodeError, getattr, sys, chr(sys.maxunicode))
+        self.assertRaises(AttributeError, getattr, sys, chr(sys.maxunicode))

    def test_hasattr(self):
        import sys
        self.assert_(hasattr(sys, 'stdout'))
        self.assertRaises(TypeError, hasattr, sys, 1)
        self.assertRaises(TypeError, hasattr)
-        if have_unicode:
-            self.assertRaises(UnicodeError, hasattr, sys, chr(sys.maxunicode))
+        self.assertEqual(False, hasattr(sys, chr(sys.maxunicode)))

    def test_hash(self):
        hash(None)

--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -79,6 +79,7 @@ PyObject *
 PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
 {
    PyBytesObject *new;
+    int alloc;

    assert(size >= 0);

@@ -86,18 +87,23 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
    if (new == NULL)
        return NULL;

-    if (size == 0)
+    if (size == 0) {
        new->ob_bytes = NULL;
+        alloc = 0;
+    }
    else {
-        new->ob_bytes = PyMem_Malloc(size);
+        alloc = size + 1;
+        new->ob_bytes = PyMem_Malloc(alloc);
        if (new->ob_bytes == NULL) {
            Py_DECREF(new);
            return NULL;
        }
        if (bytes != NULL)
            memcpy(new->ob_bytes, bytes, size);
+        new->ob_bytes[size] = '\0';  /* Trailing null byte */
    }
-    new->ob_size = new->ob_alloc = size;
+    new->ob_size = size;
+    new->ob_alloc = alloc;

    return (PyObject *)new;
 }
@@ -134,7 +140,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
        /* Major downsize; resize down to exact size */
        alloc = size;
    }
-    else if (size <= alloc) {
+    else if (size < alloc) {
        /* Within allocated size; quick exit */
        ((PyBytesObject *)self)->ob_size = size;
        return 0;
@@ -147,6 +153,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
        /* Major upsize; resize up to exact size */
        alloc = size;
    }
+    if (alloc <= size)
+        alloc = size + 1;

    sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
    if (sval == NULL) {
@@ -158,6 +166,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
    ((PyBytesObject *)self)->ob_size = size;
    ((PyBytesObject *)self)->ob_alloc = alloc;

+    ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
+
    return 0;
 }

@@ -221,7 +231,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
    size = mysize + osize;
    if (size < 0)
        return PyErr_NoMemory();
-    if (size <= self->ob_alloc)
+    if (size < self->ob_alloc)
        self->ob_size = size;
    else if (PyBytes_Resize((PyObject *)self, size) < 0)
        return NULL;
@@ -243,7 +253,7 @@ bytes_repeat(PyBytesObject *self, Py_ssize_t count)
    size = mysize * count;
    if (count != 0 && size / count != mysize)
        return PyErr_NoMemory();
-    result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL,  size);
+    result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
    if (result != NULL && size != 0) {
        if (mysize == 1)
            memset(result->ob_bytes, self->ob_bytes[0], size);
@@ -268,7 +278,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
    size = mysize * count;
    if (count != 0 && size / count != mysize)
        return PyErr_NoMemory();
-    if (size <= self->ob_alloc)
+    if (size < self->ob_alloc)
        self->ob_size = size;
    else if (PyBytes_Resize((PyObject *)self, size) < 0)
        return NULL;
@@ -703,7 +713,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
        }
        bytes = PyString_AS_STRING(encoded);
        size = PyString_GET_SIZE(encoded);
-        if (size <= self->ob_alloc)
+        if (size < self->ob_alloc)
            self->ob_size = size;
        else if (PyBytes_Resize((PyObject *)self, size) < 0) {
            Py_DECREF(encoded);

--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -72,8 +72,11 @@ PyModule_GetName(PyObject *m)
 		PyErr_SetString(PyExc_SystemError, "nameless module");
 		return NULL;
 	}
-        if (PyUnicode_Check(nameobj))
-		nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, "replace");
+        if (PyUnicode_Check(nameobj)) {
+		nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, NULL);
+		if (nameobj == NULL)
+			return NULL;
+	}
 	return PyString_AsString(nameobj);
 }


--- a/Objects/object.c
+++ b/Objects/object.c
@@ -422,7 +422,8 @@ PyObject_Str(PyObject *v)
 		return NULL;
 	if (PyUnicode_Check(res)) {
 		PyObject* str;
-		str = PyUnicode_AsEncodedString(res, NULL, NULL);
+		str = _PyUnicode_AsDefaultEncodedString(res, NULL);
+		Py_XINCREF(str);
 		Py_DECREF(res);
 		if (str)
 			res = str;
@@ -929,12 +930,12 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
 	PyTypeObject *tp = v->ob_type;
 	int err;

-	if (!PyString_Check(name)){
+	if (!PyString_Check(name)) {
 		/* The Unicode to string conversion is done here because the
 		   existing tp_setattro slots expect a string object as name
 		   and we wouldn't want to break those. */
 		if (PyUnicode_Check(name)) {
-			name = PyUnicode_AsEncodedString(name, NULL, NULL);
+			name = _PyUnicode_AsDefaultEncodedString(name, NULL);
 			if (name == NULL)
 				return -1;
 		}
@@ -946,8 +947,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
 			return -1;
 		}
 	}
-	else
-		Py_INCREF(name);
+	Py_INCREF(name);

 	PyString_InternInPlace(&name);
 	if (tp->tp_setattro != NULL) {
@@ -961,6 +961,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
 		return err;
 	}
 	Py_DECREF(name);
+	assert(name->ob_refcnt >= 1);
 	if (tp->tp_getattr == NULL && tp->tp_getattro == NULL)
 		PyErr_Format(PyExc_TypeError,
 			     "'%.100s' object has no attributes "

--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -3181,9 +3181,9 @@ string_encode(PyStringObject *self, PyObject *args)
    v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
    if (v == NULL)
        goto onError;
-    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
+    if (!PyBytes_Check(v)) {
        PyErr_Format(PyExc_TypeError,
-                     "encoder did not return a string/unicode object "
+                     "[str8] encoder did not return a bytes object "
                     "(type=%.400s)",
                     v->ob_type->tp_name);
        Py_DECREF(v);

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -104,13 +104,9 @@ static PyUnicodeObject *unicode_empty;
 static PyUnicodeObject *unicode_latin1[256];

 /* Default encoding to use and assume when NULL is passed as encoding
-   parameter; it is initialized by _PyUnicode_Init().
-
-   Always use the PyUnicode_SetDefaultEncoding() and
-   PyUnicode_GetDefaultEncoding() APIs to access this global.
-
-*/
-static char unicode_default_encoding[100];
+   parameter; it is fixed to "utf-8".  Always use the
+   PyUnicode_GetDefaultEncoding() API to access this global. */
+static const char unicode_default_encoding[] = "utf-8";

 Py_UNICODE
 PyUnicode_GetMax(void)
@@ -711,10 +707,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
    v = PyCodec_Encode(unicode, encoding, errors);
    if (v == NULL)
        goto onError;
-    if (!PyString_Check(v)) {
+    if (!PyBytes_Check(v)) {
+        if (PyString_Check(v)) {
+            /* Old codec, turn it into bytes */
+            PyObject *b = PyBytes_FromObject(v);
+            Py_DECREF(v);
+            return b;
+        }
        PyErr_Format(PyExc_TypeError,
-                     "encoder did not return a string object (type=%.400s)",
-                     v->ob_type->tp_name);
+                     "encoder did not return a bytes object "
+                     "(type=%.400s, encoding=%.20s, errors=%.20s)",
+                     v->ob_type->tp_name,
+                     encoding ? encoding : "NULL",
+                     errors ? errors : "NULL");
        Py_DECREF(v);
        goto onError;
    }
@@ -728,12 +733,28 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
 					    const char *errors)
 {
    PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
-
+    PyObject *b;
    if (v)
        return v;
-    v = PyUnicode_AsEncodedString(unicode, NULL, errors);
-    if (v && errors == NULL)
+    if (errors != NULL)
+        Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
+    if (errors == NULL) {
+        b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                 PyUnicode_GET_SIZE(unicode),
+                                 NULL);
+    }
+    else {
+        b = PyUnicode_AsEncodedString(unicode, NULL, errors);
+    }
+    if (!b)
+        return NULL;
+    v = PyString_FromStringAndSize(PyBytes_AsString(b),
+                                   PyBytes_Size(b));
+    Py_DECREF(b);
+    if (!errors) {
+        Py_XINCREF(v);
        ((PyUnicodeObject *)unicode)->defenc = v;
+    }
    return v;
 }

@@ -768,21 +789,13 @@ const char *PyUnicode_GetDefaultEncoding(void)

 int PyUnicode_SetDefaultEncoding(const char *encoding)
 {
-    PyObject *v;
-
-    /* Make sure the encoding is valid. As side effect, this also
-       loads the encoding into the codec registry cache. */
-    v = _PyCodec_Lookup(encoding);
-    if (v == NULL)
-	goto onError;
-    Py_DECREF(v);
-    strncpy(unicode_default_encoding,
-	    encoding,
-	    sizeof(unicode_default_encoding));
+    if (strcmp(encoding, unicode_default_encoding) != 0) {
+        PyErr_Format(PyExc_ValueError,
+                     "Can only set default encoding to %s",
+                     unicode_default_encoding);
+        return -1;
+    }
    return 0;
-
- onError:
-    return -1;
 }

 /* error handling callback helper:
@@ -1429,10 +1442,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
        nallocated = size * 4;
        if (nallocated / 4 != size)  /* overflow! */
            return PyErr_NoMemory();
-        v = PyString_FromStringAndSize(NULL, nallocated);
+        v = PyBytes_FromStringAndSize(NULL, nallocated);
        if (v == NULL)
            return NULL;
-        p = PyString_AS_STRING(v);
+        p = PyBytes_AS_STRING(v);
    }

    for (i = 0; i < size;) {
@@ -1480,13 +1493,13 @@ encodeUCS4:
        /* This was stack allocated. */
        nneeded = p - stackbuf;
        assert(nneeded <= nallocated);
-        v = PyString_FromStringAndSize(stackbuf, nneeded);
+        v = PyBytes_FromStringAndSize(stackbuf, nneeded);
    }
    else {
    	/* Cut back to size actually needed. */
-        nneeded = p - PyString_AS_STRING(v);
+        nneeded = p - PyBytes_AS_STRING(v);
        assert(nneeded <= nallocated);
-        _PyString_Resize(&v, nneeded);
+        PyBytes_Resize(v, nneeded);
    }
    return v;

@@ -2588,12 +2601,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,

    /* allocate enough for a simple encoding without
       replacements, if we need more, we'll resize */
-    res = PyString_FromStringAndSize(NULL, size);
+    res = PyBytes_FromStringAndSize(NULL, size);
    if (res == NULL)
        goto onError;
    if (size == 0)
 	return res;
-    str = PyString_AS_STRING(res);
+    str = PyBytes_AS_STRING(res);
    ressize = size;

    while (p<endp) {
@@ -2643,7 +2656,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    p = collend;
 		    break;
 		case 4: /* xmlcharrefreplace */
-		    respos = str-PyString_AS_STRING(res);
+		    respos = str - PyBytes_AS_STRING(res);
 		    /* determine replacement size (temporarily (mis)uses p) */
 		    for (p = collstart, repsize = 0; p < collend; ++p) {
 			if (*p<10)
@@ -2670,9 +2683,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			    requiredsize = 2*ressize;
-			if (_PyString_Resize(&res, requiredsize))
+			if (PyBytes_Resize(res, requiredsize))
 			    goto onError;
-			str = PyString_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		    }
 		    /* generate replacement (temporarily (mis)uses p) */
@@ -2690,17 +2703,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 		    /* need more space? (at least enough for what we
 		       have+the replacement+the rest of the string, so
 		       we won't have to check space for encodable characters) */
-		    respos = str-PyString_AS_STRING(res);
+		    respos = str - PyBytes_AS_STRING(res);
 		    repsize = PyUnicode_GET_SIZE(repunicode);
 		    requiredsize = respos+repsize+(endp-collend);
 		    if (requiredsize > ressize) {
 			if (requiredsize<2*ressize)
 			    requiredsize = 2*ressize;
-			if (_PyString_Resize(&res, requiredsize)) {
+			if (PyBytes_Resize(res, requiredsize)) {
 			    Py_DECREF(repunicode);
 			    goto onError;
 			}
-			str = PyString_AS_STRING(res) + respos;
+			str = PyBytes_AS_STRING(res) + respos;
 			ressize = requiredsize;
 		    }
 		    /* check if there is anything unencodable in the replacement
@@ -2721,10 +2734,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
 	}
    }
    /* Resize if we allocated to much */
-    respos = str-PyString_AS_STRING(res);
+    respos = str - PyBytes_AS_STRING(res);
    if (respos<ressize)
       /* If this falls res will be NULL */
-	_PyString_Resize(&res, respos);
+	PyBytes_Resize(res, respos);
    Py_XDECREF(errorHandler);
    Py_XDECREF(exc);
    return res;
@@ -2979,20 +2992,20 @@ static int encode_mbcs(PyObject **repr,

    if (*repr == NULL) {
 	/* Create string object */
-	*repr = PyString_FromStringAndSize(NULL, mbcssize);
+	*repr = PyBytes_FromStringAndSize(NULL, mbcssize);
 	if (*repr == NULL)
 	    return -1;
    }
    else {
 	/* Extend string object */
-	n = PyString_Size(*repr);
-	if (_PyString_Resize(repr, n + mbcssize) < 0)
+	n = PyBytes_Size(*repr);
+	if (PyBytes_Resize(*repr, n + mbcssize) < 0)
 	    return -1;
    }

    /* Do the conversion */
    if (size > 0) {
-	char *s = PyString_AS_STRING(*repr) + n;
+	char *s = PyBytes_AS_STRING(*repr) + n;
 	if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
 	    PyErr_SetFromWindowsErrWithFilename(0, NULL);
 	    return -1;
@@ -5630,9 +5643,9 @@ unicode_encode(PyUnicodeObject *self, PyObject *args)
    v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
    if (v == NULL)
        goto onError;
-    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
+    if (!PyBytes_Check(v)) {
        PyErr_Format(PyExc_TypeError,
-                     "encoder did not return a string/unicode object "
+                     "encoder did not return a bytes object "
                     "(type=%.400s)",
                     v->ob_type->tp_name);
        Py_DECREF(v);
@@ -6797,9 +6810,11 @@ unicode_splitlines(PyUnicodeObject *self, PyObject *args)
 }

 static
-PyObject *unicode_str(PyUnicodeObject *self)
+PyObject *unicode_str(PyObject *self)
 {
-    return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
+    PyObject *res = _PyUnicode_AsDefaultEncodedString(self, NULL);
+    Py_XINCREF(res);
+    return res;
 }

 PyDoc_STRVAR(swapcase__doc__,
@@ -8021,7 +8036,6 @@ void _PyUnicode_Init(void)
    if (!unicode_empty)
 	return;

-    strcpy(unicode_default_encoding, "ascii");
    for (i = 0; i < 256; i++)
 	unicode_latin1[i] = NULL;
    if (PyType_Ready(&PyUnicode_Type) < 0)

--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -696,9 +696,9 @@ tok_stdin_decode(struct tok_state *tok, char **inp)
 	if (utf8 == NULL)
 		goto error_clear;

-	assert(PyString_Check(utf8));
-	converted = new_string(PyString_AS_STRING(utf8),
-			       PyString_GET_SIZE(utf8));
+	assert(PyBytes_Check(utf8));
+	converted = new_string(PyBytes_AS_STRING(utf8),
+			       PyBytes_GET_SIZE(utf8));
 	Py_DECREF(utf8);
 	if (converted == NULL)
 		goto error_nomem;

--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3101,8 +3101,9 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
                    Py_DECREF(u);
                    return NULL;
                }
-                r = PyString_AsString(w);
-                rn = PyString_Size(w);
+                assert(PyBytes_Check(w));
+                r = PyBytes_AsString(w);
+                rn = PyBytes_Size(w);
                assert(rn % 2 == 0);
                for (i = 0; i < rn; i += 2) {
                    sprintf(p, "\\u%02x%02x",

--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -412,6 +412,36 @@ PyDoc_STRVAR(cmp_doc,
 \n\
 Return negative if x<y, zero if x==y, positive if x>y.");

+
+static char *
+source_as_string(PyObject *cmd)
+{
+	char *str;
+	Py_ssize_t size;
+
+	if (!PyObject_CheckReadBuffer(cmd) &&
+	    !PyUnicode_Check(cmd)) {
+		PyErr_SetString(PyExc_TypeError,
+			   "eval()/exec() arg 1 must be a string, bytes or code object");
+		return NULL;
+	}
+
+	if (PyUnicode_Check(cmd)) {
+		cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL);
+		if (cmd == NULL)
+			return NULL;
+	}
+	if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
+		return NULL;
+	}
+	if (strlen(str) != size) {
+		PyErr_SetString(PyExc_TypeError,
+				"source code string cannot contain null bytes");
+		return NULL;
+	}
+	return str;
+}
+
 static PyObject *
 builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 {
@@ -422,8 +452,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 	int dont_inherit = 0;
 	int supplied_flags = 0;
 	PyCompilerFlags cf;
-	PyObject *result = NULL, *cmd, *tmp = NULL;
-	Py_ssize_t length;
+	PyObject *cmd;
 	static char *kwlist[] = {"source", "filename", "mode", "flags",
 				 "dont_inherit", NULL};

@@ -432,22 +461,11 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 					 &supplied_flags, &dont_inherit))
 		return NULL;

-	cf.cf_flags = supplied_flags;
+	cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;

-	if (PyUnicode_Check(cmd)) {
-		tmp = PyUnicode_AsUTF8String(cmd);
-		if (tmp == NULL)
-			return NULL;
-		cmd = tmp;
-		cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
-	}
-	if (PyObject_AsReadBuffer(cmd, (const void **)&str, &length))
+	str = source_as_string(cmd);
+	if (str == NULL)
 		return NULL;
-	if ((size_t)length != strlen(str)) {
-		PyErr_SetString(PyExc_TypeError,
-				"compile() expected string without null bytes");
-		goto cleanup;
-	}

 	if (strcmp(startstr, "exec") == 0)
 		start = Py_file_input;
@@ -458,7 +476,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 	else {
 		PyErr_SetString(PyExc_ValueError,
 		   "compile() arg 3 must be 'exec' or 'eval' or 'single'");
-		goto cleanup;
+		return NULL;
 	}

 	if (supplied_flags &
@@ -466,17 +484,14 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 	{
 		PyErr_SetString(PyExc_ValueError,
 				"compile(): unrecognised flags");
-		goto cleanup;
+		return NULL;
 	}
 	/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */

 	if (!dont_inherit) {
 		PyEval_MergeCompilerFlags(&cf);
 	}
-	result = Py_CompileStringFlags(str, filename, start, &cf);
-cleanup:
-	Py_XDECREF(tmp);
-	return result;
+	return Py_CompileStringFlags(str, filename, start, &cf);
 }

 PyDoc_STRVAR(compile_doc,
@@ -584,28 +599,14 @@ builtin_eval(PyObject *self, PyObject *args)
 		return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
 	}

-	if (!PyString_Check(cmd) &&
-	    !PyUnicode_Check(cmd)) {
-		PyErr_SetString(PyExc_TypeError,
-			   "eval() arg 1 must be a string or code object");
+	str = source_as_string(cmd);
+	if (str == NULL)
 		return NULL;
-	}
-	cf.cf_flags = 0;

-	if (PyUnicode_Check(cmd)) {
-		tmp = PyUnicode_AsUTF8String(cmd);
-		if (tmp == NULL)
-			return NULL;
-		cmd = tmp;
-		cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
-	}
-	if (PyString_AsStringAndSize(cmd, &str, NULL)) {
-		Py_XDECREF(tmp);
-		return NULL;
-	}
 	while (*str == ' ' || *str == '\t')
 		str++;

+	cf.cf_flags = PyCF_SOURCE_IS_UTF8;
 	(void)PyEval_MergeCompilerFlags(&cf);
 	result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
 	Py_XDECREF(tmp);
@@ -694,25 +695,16 @@ builtin_exec(PyObject *self, PyObject *args)
 				       locals);
 	}
 	else {
-		PyObject *tmp = NULL;
-		char *str;
+		char *str = source_as_string(prog);
 		PyCompilerFlags cf;
-		cf.cf_flags = 0;
-		if (PyUnicode_Check(prog)) {
-			tmp = PyUnicode_AsUTF8String(prog);
-			if (tmp == NULL)
-				return NULL;
-			prog = tmp;
-			cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
-		}
-		if (PyString_AsStringAndSize(prog, &str, NULL))
+		if (str == NULL)
 			return NULL;
+		cf.cf_flags = PyCF_SOURCE_IS_UTF8;
 		if (PyEval_MergeCompilerFlags(&cf))
 			v = PyRun_StringFlags(str, Py_file_input, globals,
 					      locals, &cf);
 		else
 			v = PyRun_String(str, Py_file_input, globals, locals);
-		Py_XDECREF(tmp);
 	}
 	if (v == NULL)
 		return NULL;

--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -883,7 +883,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 		char **buffer;
 		const char *encoding;
 		PyObject *s;
-		int size, recode_strings;
+		int recode_strings;
+		Py_ssize_t size;
+		char *ptr;

 		/* Get 'e' parameter: the encoding name */
 		encoding = (const char *)va_arg(*p_va, const char *);
@@ -912,6 +914,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 		if (!recode_strings && PyString_Check(arg)) {
 			s = arg;
 			Py_INCREF(s);
+			size = PyString_GET_SIZE(s);
+			ptr = PyString_AS_STRING(s);
 		}
 		else {
 		    	PyObject *u;
@@ -931,14 +935,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			if (s == NULL)
 				return converterr("(encoding failed)",
 						  arg, msgbuf, bufsize);
-			if (!PyString_Check(s)) {
+			if (!PyBytes_Check(s)) {
 				Py_DECREF(s);
 				return converterr(
-					"(encoder failed to return a string)",
+					"(encoder failed to return bytes)",
 					arg, msgbuf, bufsize);
 			}
+			size = PyBytes_GET_SIZE(s);
+			ptr = PyBytes_AS_STRING(s);
 		}
-		size = PyString_GET_SIZE(s);

 		/* Write output; output is guaranteed to be 0-terminated */
 		if (*format == '#') { 
@@ -994,9 +999,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 						arg, msgbuf, bufsize);
 				}
 			}
-			memcpy(*buffer,
-			       PyString_AS_STRING(s),
-			       size + 1);
+			memcpy(*buffer, ptr, size+1);
 			STORE_SIZE(size);
 		} else {
 			/* Using a 0-terminated buffer:
@@ -1012,8 +1015,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 			   PyMem_Free()ing it after usage

 			*/
-			if ((Py_ssize_t)strlen(PyString_AS_STRING(s))
-								!= size) {
+			if ((Py_ssize_t)strlen(ptr) != size) {
 				Py_DECREF(s);
 				return converterr(
 					"(encoded string without NULL bytes)",
@@ -1030,9 +1032,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 				return converterr("(cleanup problem)",
 						arg, msgbuf, bufsize);
 			}
-			memcpy(*buffer,
-			       PyString_AS_STRING(s),
-			       size + 1);
+			memcpy(*buffer, ptr, size+1);
 		}
 		Py_DECREF(s);
 		break;

--- a/Python/import.c
+++ b/Python/import.c
@@ -1254,6 +1254,9 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
 	for (i = 0; i < npath; i++) {
 		PyObject *copy = NULL;
 		PyObject *v = PyList_GetItem(path, i);
+		PyObject *origv = v;
+		char *base;
+		Py_ssize_t size;
 		if (!v)
 			return NULL;
 		if (PyUnicode_Check(v)) {
@@ -1263,15 +1266,24 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
 				return NULL;
 			v = copy;
 		}
-		else
-		if (!PyString_Check(v))
+		if (PyString_Check(v)) {
+			base = PyString_AS_STRING(v);
+			size = PyString_GET_SIZE(v);
+		}
+		else if (PyBytes_Check(v)) {
+			base = PyBytes_AS_STRING(v);
+			size = PyBytes_GET_SIZE(v);
+		}
+		else {
+			Py_XDECREF(copy);
 			continue;
-		len = PyString_GET_SIZE(v);
+		}
+		len = size;
 		if (len + 2 + namelen + MAXSUFFIXSIZE >= buflen) {
 			Py_XDECREF(copy);
 			continue; /* Too long */
 		}
-		strcpy(buf, PyString_AS_STRING(v));
+		strcpy(buf, base);
 		if (strlen(buf) != len) {
 			Py_XDECREF(copy);
 			continue; /* v contains '\0' */
@@ -1282,7 +1294,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
 			PyObject *importer;

 			importer = get_path_importer(path_importer_cache,
-						     path_hooks, v);
+						     path_hooks, origv);
 			if (importer == NULL) {
 				Py_XDECREF(copy);
 				return NULL;

--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -263,14 +263,14 @@ w_object(PyObject *v, WFILE *p)
 			return;
 		}
 		w_byte(TYPE_UNICODE, p);
-		n = PyString_GET_SIZE(utf8);
+		n = PyBytes_GET_SIZE(utf8);
 		if (n > INT_MAX) {
 			p->depth--;
 			p->error = 1;
 			return;
 		}
 		w_long((long)n, p);
-		w_string(PyString_AS_STRING(utf8), (int)n, p);
+		w_string(PyBytes_AS_STRING(utf8), (int)n, p);
 		Py_DECREF(utf8);
 	}
 	else if (PyTuple_Check(v)) {
@@ -1031,7 +1031,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
 		if (wf.ptr - base > PY_SSIZE_T_MAX) {
 			Py_DECREF(wf.str);
 			PyErr_SetString(PyExc_OverflowError,
-					"too much marshall data for a string");
+					"too much marshal data for a string");
 			return NULL;
 		}
 		_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base));