Commit 68498a9a authored by Guido van Rossum's avatar Guido van Rossum

More coding by random modification.

Encoding now return bytes instead of str8.
eval(), exec(), compile() now accept unicode or bytes.
parent 5673df5b
...@@ -208,8 +208,8 @@ class BuiltinTest(unittest.TestCase): ...@@ -208,8 +208,8 @@ class BuiltinTest(unittest.TestCase):
def test_compile(self): def test_compile(self):
compile('print(1)\n', '', 'exec') compile('print(1)\n', '', 'exec')
bom = '\xef\xbb\xbf' ## bom = b'\xef\xbb\xbf'
compile((bom + 'print(1)\n').encode("latin-1"), '', 'exec') ## compile(bom + b'print(1)\n', '', 'exec')
compile(source='pass', filename='?', mode='exec') compile(source='pass', filename='?', mode='exec')
compile(dont_inherit=0, filename='tmp', source='0', mode='eval') compile(dont_inherit=0, filename='tmp', source='0', mode='eval')
compile('pass', '?', dont_inherit=1, mode='exec') compile('pass', '?', dont_inherit=1, mode='exec')
...@@ -220,7 +220,7 @@ class BuiltinTest(unittest.TestCase): ...@@ -220,7 +220,7 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, compile, 'pass', '?', 'exec', self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
mode='eval', source='0', filename='tmp') mode='eval', source='0', filename='tmp')
if have_unicode: if have_unicode:
compile(str(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec') compile('print(u"\xe5")\n', '', 'exec')
self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') self.assertRaises(TypeError, compile, chr(0), 'f', 'exec')
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad') self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
...@@ -338,10 +338,9 @@ class BuiltinTest(unittest.TestCase): ...@@ -338,10 +338,9 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(eval(str('a'), globals, locals), 1) self.assertEqual(eval(str('a'), globals, locals), 1)
self.assertEqual(eval(str('b'), globals, locals), 200) self.assertEqual(eval(str('b'), globals, locals), 200)
self.assertEqual(eval(str('c'), globals, locals), 300) self.assertEqual(eval(str('c'), globals, locals), 300)
bom = '\xef\xbb\xbf' ## bom = b'\xef\xbb\xbf'
self.assertEqual(eval((bom + 'a').encode("latin-1"), globals, locals), 1) ## self.assertEqual(eval(bom + b'a', globals, locals), 1)
self.assertEqual(eval(str(b'u"\xc3\xa5"', 'utf8'), globals), self.assertEqual(eval('u"\xe5"', globals), u"\xe5")
str(b'\xc3\xa5', 'utf8'))
self.assertRaises(TypeError, eval) self.assertRaises(TypeError, eval)
self.assertRaises(TypeError, eval, ()) self.assertRaises(TypeError, eval, ())
...@@ -675,16 +674,14 @@ class BuiltinTest(unittest.TestCase): ...@@ -675,16 +674,14 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, getattr, sys, 1) self.assertRaises(TypeError, getattr, sys, 1)
self.assertRaises(TypeError, getattr, sys, 1, "foo") self.assertRaises(TypeError, getattr, sys, 1, "foo")
self.assertRaises(TypeError, getattr) self.assertRaises(TypeError, getattr)
if have_unicode: self.assertRaises(AttributeError, getattr, sys, chr(sys.maxunicode))
self.assertRaises(UnicodeError, getattr, sys, chr(sys.maxunicode))
def test_hasattr(self): def test_hasattr(self):
import sys import sys
self.assert_(hasattr(sys, 'stdout')) self.assert_(hasattr(sys, 'stdout'))
self.assertRaises(TypeError, hasattr, sys, 1) self.assertRaises(TypeError, hasattr, sys, 1)
self.assertRaises(TypeError, hasattr) self.assertRaises(TypeError, hasattr)
if have_unicode: self.assertEqual(False, hasattr(sys, chr(sys.maxunicode)))
self.assertRaises(UnicodeError, hasattr, sys, chr(sys.maxunicode))
def test_hash(self): def test_hash(self):
hash(None) hash(None)
......
...@@ -79,6 +79,7 @@ PyObject * ...@@ -79,6 +79,7 @@ PyObject *
PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size) PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
{ {
PyBytesObject *new; PyBytesObject *new;
int alloc;
assert(size >= 0); assert(size >= 0);
...@@ -86,18 +87,23 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size) ...@@ -86,18 +87,23 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
if (new == NULL) if (new == NULL)
return NULL; return NULL;
if (size == 0) if (size == 0) {
new->ob_bytes = NULL; new->ob_bytes = NULL;
alloc = 0;
}
else { else {
new->ob_bytes = PyMem_Malloc(size); alloc = size + 1;
new->ob_bytes = PyMem_Malloc(alloc);
if (new->ob_bytes == NULL) { if (new->ob_bytes == NULL) {
Py_DECREF(new); Py_DECREF(new);
return NULL; return NULL;
} }
if (bytes != NULL) if (bytes != NULL)
memcpy(new->ob_bytes, bytes, size); memcpy(new->ob_bytes, bytes, size);
new->ob_bytes[size] = '\0'; /* Trailing null byte */
} }
new->ob_size = new->ob_alloc = size; new->ob_size = size;
new->ob_alloc = alloc;
return (PyObject *)new; return (PyObject *)new;
} }
...@@ -134,7 +140,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) ...@@ -134,7 +140,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
/* Major downsize; resize down to exact size */ /* Major downsize; resize down to exact size */
alloc = size; alloc = size;
} }
else if (size <= alloc) { else if (size < alloc) {
/* Within allocated size; quick exit */ /* Within allocated size; quick exit */
((PyBytesObject *)self)->ob_size = size; ((PyBytesObject *)self)->ob_size = size;
return 0; return 0;
...@@ -147,6 +153,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) ...@@ -147,6 +153,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
/* Major upsize; resize up to exact size */ /* Major upsize; resize up to exact size */
alloc = size; alloc = size;
} }
if (alloc <= size)
alloc = size + 1;
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc); sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
if (sval == NULL) { if (sval == NULL) {
...@@ -158,6 +166,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) ...@@ -158,6 +166,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
((PyBytesObject *)self)->ob_size = size; ((PyBytesObject *)self)->ob_size = size;
((PyBytesObject *)self)->ob_alloc = alloc; ((PyBytesObject *)self)->ob_alloc = alloc;
((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
return 0; return 0;
} }
...@@ -221,7 +231,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other) ...@@ -221,7 +231,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
size = mysize + osize; size = mysize + osize;
if (size < 0) if (size < 0)
return PyErr_NoMemory(); return PyErr_NoMemory();
if (size <= self->ob_alloc) if (size < self->ob_alloc)
self->ob_size = size; self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0) else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL; return NULL;
...@@ -243,7 +253,7 @@ bytes_repeat(PyBytesObject *self, Py_ssize_t count) ...@@ -243,7 +253,7 @@ bytes_repeat(PyBytesObject *self, Py_ssize_t count)
size = mysize * count; size = mysize * count;
if (count != 0 && size / count != mysize) if (count != 0 && size / count != mysize)
return PyErr_NoMemory(); return PyErr_NoMemory();
result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size); result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
if (result != NULL && size != 0) { if (result != NULL && size != 0) {
if (mysize == 1) if (mysize == 1)
memset(result->ob_bytes, self->ob_bytes[0], size); memset(result->ob_bytes, self->ob_bytes[0], size);
...@@ -268,7 +278,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count) ...@@ -268,7 +278,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
size = mysize * count; size = mysize * count;
if (count != 0 && size / count != mysize) if (count != 0 && size / count != mysize)
return PyErr_NoMemory(); return PyErr_NoMemory();
if (size <= self->ob_alloc) if (size < self->ob_alloc)
self->ob_size = size; self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0) else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL; return NULL;
...@@ -703,7 +713,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) ...@@ -703,7 +713,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
} }
bytes = PyString_AS_STRING(encoded); bytes = PyString_AS_STRING(encoded);
size = PyString_GET_SIZE(encoded); size = PyString_GET_SIZE(encoded);
if (size <= self->ob_alloc) if (size < self->ob_alloc)
self->ob_size = size; self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0) { else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Py_DECREF(encoded); Py_DECREF(encoded);
......
...@@ -72,8 +72,11 @@ PyModule_GetName(PyObject *m) ...@@ -72,8 +72,11 @@ PyModule_GetName(PyObject *m)
PyErr_SetString(PyExc_SystemError, "nameless module"); PyErr_SetString(PyExc_SystemError, "nameless module");
return NULL; return NULL;
} }
if (PyUnicode_Check(nameobj)) if (PyUnicode_Check(nameobj)) {
nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, "replace"); nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, NULL);
if (nameobj == NULL)
return NULL;
}
return PyString_AsString(nameobj); return PyString_AsString(nameobj);
} }
......
...@@ -422,7 +422,8 @@ PyObject_Str(PyObject *v) ...@@ -422,7 +422,8 @@ PyObject_Str(PyObject *v)
return NULL; return NULL;
if (PyUnicode_Check(res)) { if (PyUnicode_Check(res)) {
PyObject* str; PyObject* str;
str = PyUnicode_AsEncodedString(res, NULL, NULL); str = _PyUnicode_AsDefaultEncodedString(res, NULL);
Py_XINCREF(str);
Py_DECREF(res); Py_DECREF(res);
if (str) if (str)
res = str; res = str;
...@@ -929,12 +930,12 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) ...@@ -929,12 +930,12 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
PyTypeObject *tp = v->ob_type; PyTypeObject *tp = v->ob_type;
int err; int err;
if (!PyString_Check(name)){ if (!PyString_Check(name)) {
/* The Unicode to string conversion is done here because the /* The Unicode to string conversion is done here because the
existing tp_setattro slots expect a string object as name existing tp_setattro slots expect a string object as name
and we wouldn't want to break those. */ and we wouldn't want to break those. */
if (PyUnicode_Check(name)) { if (PyUnicode_Check(name)) {
name = PyUnicode_AsEncodedString(name, NULL, NULL); name = _PyUnicode_AsDefaultEncodedString(name, NULL);
if (name == NULL) if (name == NULL)
return -1; return -1;
} }
...@@ -946,8 +947,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) ...@@ -946,8 +947,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
return -1; return -1;
} }
} }
else Py_INCREF(name);
Py_INCREF(name);
PyString_InternInPlace(&name); PyString_InternInPlace(&name);
if (tp->tp_setattro != NULL) { if (tp->tp_setattro != NULL) {
...@@ -961,6 +961,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) ...@@ -961,6 +961,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
return err; return err;
} }
Py_DECREF(name); Py_DECREF(name);
assert(name->ob_refcnt >= 1);
if (tp->tp_getattr == NULL && tp->tp_getattro == NULL) if (tp->tp_getattr == NULL && tp->tp_getattro == NULL)
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"'%.100s' object has no attributes " "'%.100s' object has no attributes "
......
...@@ -3181,9 +3181,9 @@ string_encode(PyStringObject *self, PyObject *args) ...@@ -3181,9 +3181,9 @@ string_encode(PyStringObject *self, PyObject *args)
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
if (!PyString_Check(v) && !PyUnicode_Check(v)) { if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"encoder did not return a string/unicode object " "[str8] encoder did not return a bytes object "
"(type=%.400s)", "(type=%.400s)",
v->ob_type->tp_name); v->ob_type->tp_name);
Py_DECREF(v); Py_DECREF(v);
......
...@@ -104,13 +104,9 @@ static PyUnicodeObject *unicode_empty; ...@@ -104,13 +104,9 @@ static PyUnicodeObject *unicode_empty;
static PyUnicodeObject *unicode_latin1[256]; static PyUnicodeObject *unicode_latin1[256];
/* Default encoding to use and assume when NULL is passed as encoding /* Default encoding to use and assume when NULL is passed as encoding
parameter; it is initialized by _PyUnicode_Init(). parameter; it is fixed to "utf-8". Always use the
PyUnicode_GetDefaultEncoding() API to access this global. */
Always use the PyUnicode_SetDefaultEncoding() and static const char unicode_default_encoding[] = "utf-8";
PyUnicode_GetDefaultEncoding() APIs to access this global.
*/
static char unicode_default_encoding[100];
Py_UNICODE Py_UNICODE
PyUnicode_GetMax(void) PyUnicode_GetMax(void)
...@@ -711,10 +707,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, ...@@ -711,10 +707,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
v = PyCodec_Encode(unicode, encoding, errors); v = PyCodec_Encode(unicode, encoding, errors);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
if (!PyString_Check(v)) { if (!PyBytes_Check(v)) {
if (PyString_Check(v)) {
/* Old codec, turn it into bytes */
PyObject *b = PyBytes_FromObject(v);
Py_DECREF(v);
return b;
}
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"encoder did not return a string object (type=%.400s)", "encoder did not return a bytes object "
v->ob_type->tp_name); "(type=%.400s, encoding=%.20s, errors=%.20s)",
v->ob_type->tp_name,
encoding ? encoding : "NULL",
errors ? errors : "NULL");
Py_DECREF(v); Py_DECREF(v);
goto onError; goto onError;
} }
...@@ -728,12 +733,28 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, ...@@ -728,12 +733,28 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
const char *errors) const char *errors)
{ {
PyObject *v = ((PyUnicodeObject *)unicode)->defenc; PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
PyObject *b;
if (v) if (v)
return v; return v;
v = PyUnicode_AsEncodedString(unicode, NULL, errors); if (errors != NULL)
if (v && errors == NULL) Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
if (errors == NULL) {
b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
}
else {
b = PyUnicode_AsEncodedString(unicode, NULL, errors);
}
if (!b)
return NULL;
v = PyString_FromStringAndSize(PyBytes_AsString(b),
PyBytes_Size(b));
Py_DECREF(b);
if (!errors) {
Py_XINCREF(v);
((PyUnicodeObject *)unicode)->defenc = v; ((PyUnicodeObject *)unicode)->defenc = v;
}
return v; return v;
} }
...@@ -768,21 +789,13 @@ const char *PyUnicode_GetDefaultEncoding(void) ...@@ -768,21 +789,13 @@ const char *PyUnicode_GetDefaultEncoding(void)
int PyUnicode_SetDefaultEncoding(const char *encoding) int PyUnicode_SetDefaultEncoding(const char *encoding)
{ {
PyObject *v; if (strcmp(encoding, unicode_default_encoding) != 0) {
PyErr_Format(PyExc_ValueError,
/* Make sure the encoding is valid. As side effect, this also "Can only set default encoding to %s",
loads the encoding into the codec registry cache. */ unicode_default_encoding);
v = _PyCodec_Lookup(encoding); return -1;
if (v == NULL) }
goto onError;
Py_DECREF(v);
strncpy(unicode_default_encoding,
encoding,
sizeof(unicode_default_encoding));
return 0; return 0;
onError:
return -1;
} }
/* error handling callback helper: /* error handling callback helper:
...@@ -1429,10 +1442,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, ...@@ -1429,10 +1442,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
nallocated = size * 4; nallocated = size * 4;
if (nallocated / 4 != size) /* overflow! */ if (nallocated / 4 != size) /* overflow! */
return PyErr_NoMemory(); return PyErr_NoMemory();
v = PyString_FromStringAndSize(NULL, nallocated); v = PyBytes_FromStringAndSize(NULL, nallocated);
if (v == NULL) if (v == NULL)
return NULL; return NULL;
p = PyString_AS_STRING(v); p = PyBytes_AS_STRING(v);
} }
for (i = 0; i < size;) { for (i = 0; i < size;) {
...@@ -1480,13 +1493,13 @@ encodeUCS4: ...@@ -1480,13 +1493,13 @@ encodeUCS4:
/* This was stack allocated. */ /* This was stack allocated. */
nneeded = p - stackbuf; nneeded = p - stackbuf;
assert(nneeded <= nallocated); assert(nneeded <= nallocated);
v = PyString_FromStringAndSize(stackbuf, nneeded); v = PyBytes_FromStringAndSize(stackbuf, nneeded);
} }
else { else {
/* Cut back to size actually needed. */ /* Cut back to size actually needed. */
nneeded = p - PyString_AS_STRING(v); nneeded = p - PyBytes_AS_STRING(v);
assert(nneeded <= nallocated); assert(nneeded <= nallocated);
_PyString_Resize(&v, nneeded); PyBytes_Resize(v, nneeded);
} }
return v; return v;
...@@ -2588,12 +2601,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, ...@@ -2588,12 +2601,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* allocate enough for a simple encoding without /* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */ replacements, if we need more, we'll resize */
res = PyString_FromStringAndSize(NULL, size); res = PyBytes_FromStringAndSize(NULL, size);
if (res == NULL) if (res == NULL)
goto onError; goto onError;
if (size == 0) if (size == 0)
return res; return res;
str = PyString_AS_STRING(res); str = PyBytes_AS_STRING(res);
ressize = size; ressize = size;
while (p<endp) { while (p<endp) {
...@@ -2643,7 +2656,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, ...@@ -2643,7 +2656,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
p = collend; p = collend;
break; break;
case 4: /* xmlcharrefreplace */ case 4: /* xmlcharrefreplace */
respos = str-PyString_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
/* determine replacement size (temporarily (mis)uses p) */ /* determine replacement size (temporarily (mis)uses p) */
for (p = collstart, repsize = 0; p < collend; ++p) { for (p = collstart, repsize = 0; p < collend; ++p) {
if (*p<10) if (*p<10)
...@@ -2670,9 +2683,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, ...@@ -2670,9 +2683,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (requiredsize<2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyString_Resize(&res, requiredsize)) if (PyBytes_Resize(res, requiredsize))
goto onError; goto onError;
str = PyString_AS_STRING(res) + respos; str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize; ressize = requiredsize;
} }
/* generate replacement (temporarily (mis)uses p) */ /* generate replacement (temporarily (mis)uses p) */
...@@ -2690,17 +2703,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, ...@@ -2690,17 +2703,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* need more space? (at least enough for what we /* need more space? (at least enough for what we
have+the replacement+the rest of the string, so have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */ we won't have to check space for encodable characters) */
respos = str-PyString_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_SIZE(repunicode); repsize = PyUnicode_GET_SIZE(repunicode);
requiredsize = respos+repsize+(endp-collend); requiredsize = respos+repsize+(endp-collend);
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (requiredsize<2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyString_Resize(&res, requiredsize)) { if (PyBytes_Resize(res, requiredsize)) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
} }
str = PyString_AS_STRING(res) + respos; str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize; ressize = requiredsize;
} }
/* check if there is anything unencodable in the replacement /* check if there is anything unencodable in the replacement
...@@ -2721,10 +2734,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, ...@@ -2721,10 +2734,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
} }
} }
/* Resize if we allocated to much */ /* Resize if we allocated to much */
respos = str-PyString_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
if (respos<ressize) if (respos<ressize)
/* If this falls res will be NULL */ /* If this falls res will be NULL */
_PyString_Resize(&res, respos); PyBytes_Resize(res, respos);
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);
Py_XDECREF(exc); Py_XDECREF(exc);
return res; return res;
...@@ -2979,20 +2992,20 @@ static int encode_mbcs(PyObject **repr, ...@@ -2979,20 +2992,20 @@ static int encode_mbcs(PyObject **repr,
if (*repr == NULL) { if (*repr == NULL) {
/* Create string object */ /* Create string object */
*repr = PyString_FromStringAndSize(NULL, mbcssize); *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
if (*repr == NULL) if (*repr == NULL)
return -1; return -1;
} }
else { else {
/* Extend string object */ /* Extend string object */
n = PyString_Size(*repr); n = PyBytes_Size(*repr);
if (_PyString_Resize(repr, n + mbcssize) < 0) if (PyBytes_Resize(*repr, n + mbcssize) < 0)
return -1; return -1;
} }
/* Do the conversion */ /* Do the conversion */
if (size > 0) { if (size > 0) {
char *s = PyString_AS_STRING(*repr) + n; char *s = PyBytes_AS_STRING(*repr) + n;
if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
PyErr_SetFromWindowsErrWithFilename(0, NULL); PyErr_SetFromWindowsErrWithFilename(0, NULL);
return -1; return -1;
...@@ -5630,9 +5643,9 @@ unicode_encode(PyUnicodeObject *self, PyObject *args) ...@@ -5630,9 +5643,9 @@ unicode_encode(PyUnicodeObject *self, PyObject *args)
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
if (!PyString_Check(v) && !PyUnicode_Check(v)) { if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"encoder did not return a string/unicode object " "encoder did not return a bytes object "
"(type=%.400s)", "(type=%.400s)",
v->ob_type->tp_name); v->ob_type->tp_name);
Py_DECREF(v); Py_DECREF(v);
...@@ -6797,9 +6810,11 @@ unicode_splitlines(PyUnicodeObject *self, PyObject *args) ...@@ -6797,9 +6810,11 @@ unicode_splitlines(PyUnicodeObject *self, PyObject *args)
} }
static static
PyObject *unicode_str(PyUnicodeObject *self) PyObject *unicode_str(PyObject *self)
{ {
return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL); PyObject *res = _PyUnicode_AsDefaultEncodedString(self, NULL);
Py_XINCREF(res);
return res;
} }
PyDoc_STRVAR(swapcase__doc__, PyDoc_STRVAR(swapcase__doc__,
...@@ -8021,7 +8036,6 @@ void _PyUnicode_Init(void) ...@@ -8021,7 +8036,6 @@ void _PyUnicode_Init(void)
if (!unicode_empty) if (!unicode_empty)
return; return;
strcpy(unicode_default_encoding, "ascii");
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
unicode_latin1[i] = NULL; unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0) if (PyType_Ready(&PyUnicode_Type) < 0)
......
...@@ -696,9 +696,9 @@ tok_stdin_decode(struct tok_state *tok, char **inp) ...@@ -696,9 +696,9 @@ tok_stdin_decode(struct tok_state *tok, char **inp)
if (utf8 == NULL) if (utf8 == NULL)
goto error_clear; goto error_clear;
assert(PyString_Check(utf8)); assert(PyBytes_Check(utf8));
converted = new_string(PyString_AS_STRING(utf8), converted = new_string(PyBytes_AS_STRING(utf8),
PyString_GET_SIZE(utf8)); PyBytes_GET_SIZE(utf8));
Py_DECREF(utf8); Py_DECREF(utf8);
if (converted == NULL) if (converted == NULL)
goto error_nomem; goto error_nomem;
......
...@@ -3101,8 +3101,9 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding) ...@@ -3101,8 +3101,9 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
Py_DECREF(u); Py_DECREF(u);
return NULL; return NULL;
} }
r = PyString_AsString(w); assert(PyBytes_Check(w));
rn = PyString_Size(w); r = PyBytes_AsString(w);
rn = PyBytes_Size(w);
assert(rn % 2 == 0); assert(rn % 2 == 0);
for (i = 0; i < rn; i += 2) { for (i = 0; i < rn; i += 2) {
sprintf(p, "\\u%02x%02x", sprintf(p, "\\u%02x%02x",
......
...@@ -412,6 +412,36 @@ PyDoc_STRVAR(cmp_doc, ...@@ -412,6 +412,36 @@ PyDoc_STRVAR(cmp_doc,
\n\ \n\
Return negative if x<y, zero if x==y, positive if x>y."); Return negative if x<y, zero if x==y, positive if x>y.");
static char *
source_as_string(PyObject *cmd)
{
char *str;
Py_ssize_t size;
if (!PyObject_CheckReadBuffer(cmd) &&
!PyUnicode_Check(cmd)) {
PyErr_SetString(PyExc_TypeError,
"eval()/exec() arg 1 must be a string, bytes or code object");
return NULL;
}
if (PyUnicode_Check(cmd)) {
cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL);
if (cmd == NULL)
return NULL;
}
if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) {
return NULL;
}
if (strlen(str) != size) {
PyErr_SetString(PyExc_TypeError,
"source code string cannot contain null bytes");
return NULL;
}
return str;
}
static PyObject * static PyObject *
builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{ {
...@@ -422,8 +452,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) ...@@ -422,8 +452,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
int dont_inherit = 0; int dont_inherit = 0;
int supplied_flags = 0; int supplied_flags = 0;
PyCompilerFlags cf; PyCompilerFlags cf;
PyObject *result = NULL, *cmd, *tmp = NULL; PyObject *cmd;
Py_ssize_t length;
static char *kwlist[] = {"source", "filename", "mode", "flags", static char *kwlist[] = {"source", "filename", "mode", "flags",
"dont_inherit", NULL}; "dont_inherit", NULL};
...@@ -432,22 +461,11 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) ...@@ -432,22 +461,11 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
&supplied_flags, &dont_inherit)) &supplied_flags, &dont_inherit))
return NULL; return NULL;
cf.cf_flags = supplied_flags; cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
if (PyUnicode_Check(cmd)) { str = source_as_string(cmd);
tmp = PyUnicode_AsUTF8String(cmd); if (str == NULL)
if (tmp == NULL)
return NULL;
cmd = tmp;
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
}
if (PyObject_AsReadBuffer(cmd, (const void **)&str, &length))
return NULL; return NULL;
if ((size_t)length != strlen(str)) {
PyErr_SetString(PyExc_TypeError,
"compile() expected string without null bytes");
goto cleanup;
}
if (strcmp(startstr, "exec") == 0) if (strcmp(startstr, "exec") == 0)
start = Py_file_input; start = Py_file_input;
...@@ -458,7 +476,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) ...@@ -458,7 +476,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
else { else {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec' or 'eval' or 'single'"); "compile() arg 3 must be 'exec' or 'eval' or 'single'");
goto cleanup; return NULL;
} }
if (supplied_flags & if (supplied_flags &
...@@ -466,17 +484,14 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) ...@@ -466,17 +484,14 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{ {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"compile(): unrecognised flags"); "compile(): unrecognised flags");
goto cleanup; return NULL;
} }
/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */
if (!dont_inherit) { if (!dont_inherit) {
PyEval_MergeCompilerFlags(&cf); PyEval_MergeCompilerFlags(&cf);
} }
result = Py_CompileStringFlags(str, filename, start, &cf); return Py_CompileStringFlags(str, filename, start, &cf);
cleanup:
Py_XDECREF(tmp);
return result;
} }
PyDoc_STRVAR(compile_doc, PyDoc_STRVAR(compile_doc,
...@@ -584,28 +599,14 @@ builtin_eval(PyObject *self, PyObject *args) ...@@ -584,28 +599,14 @@ builtin_eval(PyObject *self, PyObject *args)
return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals); return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
} }
if (!PyString_Check(cmd) && str = source_as_string(cmd);
!PyUnicode_Check(cmd)) { if (str == NULL)
PyErr_SetString(PyExc_TypeError,
"eval() arg 1 must be a string or code object");
return NULL; return NULL;
}
cf.cf_flags = 0;
if (PyUnicode_Check(cmd)) {
tmp = PyUnicode_AsUTF8String(cmd);
if (tmp == NULL)
return NULL;
cmd = tmp;
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
}
if (PyString_AsStringAndSize(cmd, &str, NULL)) {
Py_XDECREF(tmp);
return NULL;
}
while (*str == ' ' || *str == '\t') while (*str == ' ' || *str == '\t')
str++; str++;
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
(void)PyEval_MergeCompilerFlags(&cf); (void)PyEval_MergeCompilerFlags(&cf);
result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
Py_XDECREF(tmp); Py_XDECREF(tmp);
...@@ -694,25 +695,16 @@ builtin_exec(PyObject *self, PyObject *args) ...@@ -694,25 +695,16 @@ builtin_exec(PyObject *self, PyObject *args)
locals); locals);
} }
else { else {
PyObject *tmp = NULL; char *str = source_as_string(prog);
char *str;
PyCompilerFlags cf; PyCompilerFlags cf;
cf.cf_flags = 0; if (str == NULL)
if (PyUnicode_Check(prog)) {
tmp = PyUnicode_AsUTF8String(prog);
if (tmp == NULL)
return NULL;
prog = tmp;
cf.cf_flags |= PyCF_SOURCE_IS_UTF8;
}
if (PyString_AsStringAndSize(prog, &str, NULL))
return NULL; return NULL;
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
if (PyEval_MergeCompilerFlags(&cf)) if (PyEval_MergeCompilerFlags(&cf))
v = PyRun_StringFlags(str, Py_file_input, globals, v = PyRun_StringFlags(str, Py_file_input, globals,
locals, &cf); locals, &cf);
else else
v = PyRun_String(str, Py_file_input, globals, locals); v = PyRun_String(str, Py_file_input, globals, locals);
Py_XDECREF(tmp);
} }
if (v == NULL) if (v == NULL)
return NULL; return NULL;
......
...@@ -883,7 +883,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -883,7 +883,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
char **buffer; char **buffer;
const char *encoding; const char *encoding;
PyObject *s; PyObject *s;
int size, recode_strings; int recode_strings;
Py_ssize_t size;
char *ptr;
/* Get 'e' parameter: the encoding name */ /* Get 'e' parameter: the encoding name */
encoding = (const char *)va_arg(*p_va, const char *); encoding = (const char *)va_arg(*p_va, const char *);
...@@ -912,6 +914,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -912,6 +914,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
if (!recode_strings && PyString_Check(arg)) { if (!recode_strings && PyString_Check(arg)) {
s = arg; s = arg;
Py_INCREF(s); Py_INCREF(s);
size = PyString_GET_SIZE(s);
ptr = PyString_AS_STRING(s);
} }
else { else {
PyObject *u; PyObject *u;
...@@ -931,14 +935,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -931,14 +935,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
if (s == NULL) if (s == NULL)
return converterr("(encoding failed)", return converterr("(encoding failed)",
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
if (!PyString_Check(s)) { if (!PyBytes_Check(s)) {
Py_DECREF(s); Py_DECREF(s);
return converterr( return converterr(
"(encoder failed to return a string)", "(encoder failed to return bytes)",
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
} }
size = PyBytes_GET_SIZE(s);
ptr = PyBytes_AS_STRING(s);
} }
size = PyString_GET_SIZE(s);
/* Write output; output is guaranteed to be 0-terminated */ /* Write output; output is guaranteed to be 0-terminated */
if (*format == '#') { if (*format == '#') {
...@@ -994,9 +999,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -994,9 +999,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
} }
} }
memcpy(*buffer, memcpy(*buffer, ptr, size+1);
PyString_AS_STRING(s),
size + 1);
STORE_SIZE(size); STORE_SIZE(size);
} else { } else {
/* Using a 0-terminated buffer: /* Using a 0-terminated buffer:
...@@ -1012,8 +1015,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -1012,8 +1015,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
PyMem_Free()ing it after usage PyMem_Free()ing it after usage
*/ */
if ((Py_ssize_t)strlen(PyString_AS_STRING(s)) if ((Py_ssize_t)strlen(ptr) != size) {
!= size) {
Py_DECREF(s); Py_DECREF(s);
return converterr( return converterr(
"(encoded string without NULL bytes)", "(encoded string without NULL bytes)",
...@@ -1030,9 +1032,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, ...@@ -1030,9 +1032,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
return converterr("(cleanup problem)", return converterr("(cleanup problem)",
arg, msgbuf, bufsize); arg, msgbuf, bufsize);
} }
memcpy(*buffer, memcpy(*buffer, ptr, size+1);
PyString_AS_STRING(s),
size + 1);
} }
Py_DECREF(s); Py_DECREF(s);
break; break;
......
...@@ -1254,6 +1254,9 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, ...@@ -1254,6 +1254,9 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
for (i = 0; i < npath; i++) { for (i = 0; i < npath; i++) {
PyObject *copy = NULL; PyObject *copy = NULL;
PyObject *v = PyList_GetItem(path, i); PyObject *v = PyList_GetItem(path, i);
PyObject *origv = v;
char *base;
Py_ssize_t size;
if (!v) if (!v)
return NULL; return NULL;
if (PyUnicode_Check(v)) { if (PyUnicode_Check(v)) {
...@@ -1263,15 +1266,24 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, ...@@ -1263,15 +1266,24 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
return NULL; return NULL;
v = copy; v = copy;
} }
else if (PyString_Check(v)) {
if (!PyString_Check(v)) base = PyString_AS_STRING(v);
size = PyString_GET_SIZE(v);
}
else if (PyBytes_Check(v)) {
base = PyBytes_AS_STRING(v);
size = PyBytes_GET_SIZE(v);
}
else {
Py_XDECREF(copy);
continue; continue;
len = PyString_GET_SIZE(v); }
len = size;
if (len + 2 + namelen + MAXSUFFIXSIZE >= buflen) { if (len + 2 + namelen + MAXSUFFIXSIZE >= buflen) {
Py_XDECREF(copy); Py_XDECREF(copy);
continue; /* Too long */ continue; /* Too long */
} }
strcpy(buf, PyString_AS_STRING(v)); strcpy(buf, base);
if (strlen(buf) != len) { if (strlen(buf) != len) {
Py_XDECREF(copy); Py_XDECREF(copy);
continue; /* v contains '\0' */ continue; /* v contains '\0' */
...@@ -1282,7 +1294,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, ...@@ -1282,7 +1294,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf,
PyObject *importer; PyObject *importer;
importer = get_path_importer(path_importer_cache, importer = get_path_importer(path_importer_cache,
path_hooks, v); path_hooks, origv);
if (importer == NULL) { if (importer == NULL) {
Py_XDECREF(copy); Py_XDECREF(copy);
return NULL; return NULL;
......
...@@ -263,14 +263,14 @@ w_object(PyObject *v, WFILE *p) ...@@ -263,14 +263,14 @@ w_object(PyObject *v, WFILE *p)
return; return;
} }
w_byte(TYPE_UNICODE, p); w_byte(TYPE_UNICODE, p);
n = PyString_GET_SIZE(utf8); n = PyBytes_GET_SIZE(utf8);
if (n > INT_MAX) { if (n > INT_MAX) {
p->depth--; p->depth--;
p->error = 1; p->error = 1;
return; return;
} }
w_long((long)n, p); w_long((long)n, p);
w_string(PyString_AS_STRING(utf8), (int)n, p); w_string(PyBytes_AS_STRING(utf8), (int)n, p);
Py_DECREF(utf8); Py_DECREF(utf8);
} }
else if (PyTuple_Check(v)) { else if (PyTuple_Check(v)) {
...@@ -1031,7 +1031,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) ...@@ -1031,7 +1031,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
if (wf.ptr - base > PY_SSIZE_T_MAX) { if (wf.ptr - base > PY_SSIZE_T_MAX) {
Py_DECREF(wf.str); Py_DECREF(wf.str);
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"too much marshall data for a string"); "too much marshal data for a string");
return NULL; return NULL;
} }
_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)); _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment