Commit f1006618 authored by Martin v. Löwis's avatar Martin v. Löwis

Change Py_BuildValue to generate Unicode objects for

's' and 'c' codes.
Change pickle to dump bytes objects using the 'S'
code, and to load the 'S' code as byte objects.
Change datetime and array to generate and expect
bytes objects in reduce/unreduce.
parent 093633ba
...@@ -506,6 +506,20 @@ class Pickler: ...@@ -506,6 +506,20 @@ class Pickler:
self.memoize(obj) self.memoize(obj)
dispatch[str8] = save_string dispatch[str8] = save_string
def save_bytes(self, obj):
# Like save_string
if self.bin:
n = len(obj)
if n < 256:
self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj))
else:
self.write(BINSTRING + pack("<i", n) + bytes(obj))
else:
# Strip leading 'b'
self.write(STRING + bytes(repr(obj).lstrip("b")) + b'\n')
self.memoize(obj)
dispatch[bytes] = save_bytes
def save_unicode(self, obj, pack=struct.pack): def save_unicode(self, obj, pack=struct.pack):
if self.bin: if self.bin:
encoded = obj.encode('utf-8') encoded = obj.encode('utf-8')
...@@ -931,12 +945,12 @@ class Unpickler: ...@@ -931,12 +945,12 @@ class Unpickler:
break break
else: else:
raise ValueError, "insecure string pickle" raise ValueError, "insecure string pickle"
self.append(str8(codecs.escape_decode(rep)[0])) self.append(bytes(codecs.escape_decode(rep)[0]))
dispatch[STRING[0]] = load_string dispatch[STRING[0]] = load_string
def load_binstring(self): def load_binstring(self):
len = mloads(b'i' + self.read(4)) len = mloads(b'i' + self.read(4))
self.append(str8(self.read(len))) self.append(self.read(len))
dispatch[BINSTRING[0]] = load_binstring dispatch[BINSTRING[0]] = load_binstring
def load_unicode(self): def load_unicode(self):
...@@ -950,7 +964,7 @@ class Unpickler: ...@@ -950,7 +964,7 @@ class Unpickler:
def load_short_binstring(self): def load_short_binstring(self):
len = ord(self.read(1)) len = ord(self.read(1))
self.append(str8(self.read(len))) self.append(self.read(len))
dispatch[SHORT_BINSTRING[0]] = load_short_binstring dispatch[SHORT_BINSTRING[0]] = load_short_binstring
def load_tuple(self): def load_tuple(self):
......
...@@ -1097,8 +1097,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase): ...@@ -1097,8 +1097,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
# This shouldn't blow up because of the month byte alone. If # This shouldn't blow up because of the month byte alone. If
# the implementation changes to do more-careful checking, it may # the implementation changes to do more-careful checking, it may
# blow up because other fields are insane. # blow up because other fields are insane.
# XXX Maybe this will have to become bytes? self.theclass(bytes(base[:2] + chr(ord_byte) + base[3:]))
self.theclass(str8(base[:2] + chr(ord_byte) + base[3:]))
############################################################################# #############################################################################
# datetime tests # datetime tests
......
...@@ -1126,7 +1126,7 @@ array_reduce(arrayobject *array) ...@@ -1126,7 +1126,7 @@ array_reduce(arrayobject *array)
Py_INCREF(dict); Py_INCREF(dict);
} }
if (array->ob_size > 0) { if (array->ob_size > 0) {
result = Py_BuildValue("O(cs#)O", result = Py_BuildValue("O(cy#)O",
array->ob_type, array->ob_type,
array->ob_descr->typecode, array->ob_descr->typecode,
array->ob_item, array->ob_item,
......
...@@ -1151,6 +1151,92 @@ save_string(Picklerobject *self, PyObject *args, int doput) ...@@ -1151,6 +1151,92 @@ save_string(Picklerobject *self, PyObject *args, int doput)
} }
static int
save_bytes(Picklerobject *self, PyObject *args, int doput)
{
int size, len;
PyObject *repr=0;
if ((size = PyBytes_Size(args)) < 0)
return -1;
if (!self->bin) {
char *repr_str;
static char string = STRING;
if (!( repr = PyObject_ReprStr8(args)))
return -1;
if ((len = PyString_Size(repr)) < 0)
goto err;
repr_str = PyString_AS_STRING((PyStringObject *)repr);
/* Strip leading 's' due to repr() of str8() returning s'...' */
if (repr_str[0] == 'b') {
repr_str++;
len--;
}
if (self->write_func(self, &string, 1) < 0)
goto err;
if (self->write_func(self, repr_str, len) < 0)
goto err;
if (self->write_func(self, "\n", 1) < 0)
goto err;
Py_XDECREF(repr);
}
else {
int i;
char c_str[5];
if ((size = PyBytes_Size(args)) < 0)
return -1;
if (size < 256) {
c_str[0] = SHORT_BINSTRING;
c_str[1] = size;
len = 2;
}
else if (size <= INT_MAX) {
c_str[0] = BINSTRING;
for (i = 1; i < 5; i++)
c_str[i] = (int)(size >> ((i - 1) * 8));
len = 5;
}
else
return -1; /* string too large */
if (self->write_func(self, c_str, len) < 0)
return -1;
if (size > 128 && Pdata_Check(self->file)) {
if (write_other(self, NULL, 0) < 0) return -1;
PDATA_APPEND(self->file, args, -1);
}
else {
if (self->write_func(self,
PyBytes_AsString(args),
size) < 0)
return -1;
}
}
if (doput)
if (put(self, args) < 0)
return -1;
return 0;
err:
Py_XDECREF(repr);
return -1;
}
/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
backslash and newline characters to \uXXXX escapes. */ backslash and newline characters to \uXXXX escapes. */
static PyObject * static PyObject *
...@@ -2086,11 +2172,11 @@ save(Picklerobject *self, PyObject *args, int pers_save) ...@@ -2086,11 +2172,11 @@ save(Picklerobject *self, PyObject *args, int pers_save)
type = args->ob_type; type = args->ob_type;
switch (type->tp_name[0]) { switch (type->tp_name[0]) {
case 'b': case 'b': /* XXX may want to save short byte strings here. */
if (args == Py_False || args == Py_True) { if (args == Py_False || args == Py_True) {
res = save_bool(self, args); res = save_bool(self, args);
goto finally; goto finally;
} }
break; break;
case 'i': case 'i':
if (type == &PyLong_Type) { if (type == &PyLong_Type) {
...@@ -2197,6 +2283,11 @@ save(Picklerobject *self, PyObject *args, int pers_save) ...@@ -2197,6 +2283,11 @@ save(Picklerobject *self, PyObject *args, int pers_save)
res = save_global(self, args, NULL); res = save_global(self, args, NULL);
goto finally; goto finally;
} }
else if (type == &PyBytes_Type) {
res = save_bytes(self, args, 1);
goto finally;
}
break;
} }
if (!pers_save && self->inst_pers_func) { if (!pers_save && self->inst_pers_func) {
...@@ -3131,11 +3222,17 @@ load_string(Unpicklerobject *self) ...@@ -3131,11 +3222,17 @@ load_string(Unpicklerobject *self)
goto insecure; goto insecure;
/********************************************/ /********************************************/
/* XXX avoid going through str8 here. */
str = PyString_DecodeEscape(p, len, NULL, 0, NULL); str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
free(s); free(s);
if (str) { if (str) {
PDATA_PUSH(self->stack, str, -1); PyObject *str2 = PyBytes_FromStringAndSize(
res = 0; PyString_AsString(str), PyString_Size(str));
Py_DECREF(str);
if (str2) {
PDATA_PUSH(self->stack, str2, -1);
res = 0;
}
} }
return res; return res;
...@@ -3160,7 +3257,7 @@ load_binstring(Unpicklerobject *self) ...@@ -3160,7 +3257,7 @@ load_binstring(Unpicklerobject *self)
if (self->read_func(self, &s, l) < 0) if (self->read_func(self, &s, l) < 0)
return -1; return -1;
if (!( py_string = PyString_FromStringAndSize(s, l))) if (!( py_string = PyBytes_FromStringAndSize(s, l)))
return -1; return -1;
PDATA_PUSH(self->stack, py_string, -1); PDATA_PUSH(self->stack, py_string, -1);
...@@ -3182,7 +3279,7 @@ load_short_binstring(Unpicklerobject *self) ...@@ -3182,7 +3279,7 @@ load_short_binstring(Unpicklerobject *self)
if (self->read_func(self, &s, l) < 0) return -1; if (self->read_func(self, &s, l) < 0) return -1;
if (!( py_string = PyString_FromStringAndSize(s, l))) return -1; if (!( py_string = PyBytes_FromStringAndSize(s, l))) return -1;
PDATA_PUSH(self->stack, py_string, -1); PDATA_PUSH(self->stack, py_string, -1);
return 0; return 0;
......
...@@ -2183,15 +2183,15 @@ date_new(PyTypeObject *type, PyObject *args, PyObject *kw) ...@@ -2183,15 +2183,15 @@ date_new(PyTypeObject *type, PyObject *args, PyObject *kw)
/* Check for invocation from pickle with __getstate__ state */ /* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) == 1 && if (PyTuple_GET_SIZE(args) == 1 &&
PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
PyString_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE && PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE &&
MONTH_IS_SANE(PyString_AS_STRING(state)[2])) MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
{ {
PyDateTime_Date *me; PyDateTime_Date *me;
me = (PyDateTime_Date *) (type->tp_alloc(type, 0)); me = (PyDateTime_Date *) (type->tp_alloc(type, 0));
if (me != NULL) { if (me != NULL) {
char *pdata = PyString_AS_STRING(state); char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE); memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE);
me->hashcode = -1; me->hashcode = -1;
} }
...@@ -2509,13 +2509,13 @@ date_replace(PyDateTime_Date *self, PyObject *args, PyObject *kw) ...@@ -2509,13 +2509,13 @@ date_replace(PyDateTime_Date *self, PyObject *args, PyObject *kw)
return clone; return clone;
} }
static PyObject *date_getstate(PyDateTime_Date *self); static PyObject *date_getstate(PyDateTime_Date *self, int hashable);
static long static long
date_hash(PyDateTime_Date *self) date_hash(PyDateTime_Date *self)
{ {
if (self->hashcode == -1) { if (self->hashcode == -1) {
PyObject *temp = date_getstate(self); PyObject *temp = date_getstate(self, 1);
if (temp != NULL) { if (temp != NULL) {
self->hashcode = PyObject_Hash(temp); self->hashcode = PyObject_Hash(temp);
Py_DECREF(temp); Py_DECREF(temp);
...@@ -2543,18 +2543,22 @@ date_weekday(PyDateTime_Date *self) ...@@ -2543,18 +2543,22 @@ date_weekday(PyDateTime_Date *self)
/* __getstate__ isn't exposed */ /* __getstate__ isn't exposed */
static PyObject * static PyObject *
date_getstate(PyDateTime_Date *self) date_getstate(PyDateTime_Date *self, int hashable)
{ {
return Py_BuildValue( PyObject* field;
"(N)", if (hashable)
PyString_FromStringAndSize((char *)self->data, field = PyString_FromStringAndSize(
_PyDateTime_DATE_DATASIZE)); (char*)self->data, _PyDateTime_DATE_DATASIZE);
else
field = PyBytes_FromStringAndSize(
(char*)self->data, _PyDateTime_DATE_DATASIZE);
return Py_BuildValue("(N)", field);
} }
static PyObject * static PyObject *
date_reduce(PyDateTime_Date *self, PyObject *arg) date_reduce(PyDateTime_Date *self, PyObject *arg)
{ {
return Py_BuildValue("(ON)", self->ob_type, date_getstate(self)); return Py_BuildValue("(ON)", self->ob_type, date_getstate(self, 0));
} }
static PyMethodDef date_methods[] = { static PyMethodDef date_methods[] = {
...@@ -2998,9 +3002,9 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw) ...@@ -2998,9 +3002,9 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
/* Check for invocation from pickle with __getstate__ state */ /* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) >= 1 && if (PyTuple_GET_SIZE(args) >= 1 &&
PyTuple_GET_SIZE(args) <= 2 && PyTuple_GET_SIZE(args) <= 2 &&
PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
PyString_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE && PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE &&
((unsigned char) (PyString_AS_STRING(state)[0])) < 24) ((unsigned char) (PyBytes_AS_STRING(state)[0])) < 24)
{ {
PyDateTime_Time *me; PyDateTime_Time *me;
char aware; char aware;
...@@ -3016,7 +3020,7 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw) ...@@ -3016,7 +3020,7 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
aware = (char)(tzinfo != Py_None); aware = (char)(tzinfo != Py_None);
me = (PyDateTime_Time *) (type->tp_alloc(type, aware)); me = (PyDateTime_Time *) (type->tp_alloc(type, aware));
if (me != NULL) { if (me != NULL) {
char *pdata = PyString_AS_STRING(state); char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE); memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE);
me->hashcode = -1; me->hashcode = -1;
...@@ -3331,7 +3335,7 @@ time_getstate(PyDateTime_Time *self) ...@@ -3331,7 +3335,7 @@ time_getstate(PyDateTime_Time *self)
PyObject *basestate; PyObject *basestate;
PyObject *result = NULL; PyObject *result = NULL;
basestate = PyString_FromStringAndSize((char *)self->data, basestate = PyBytes_FromStringAndSize((char *)self->data,
_PyDateTime_TIME_DATASIZE); _PyDateTime_TIME_DATASIZE);
if (basestate != NULL) { if (basestate != NULL) {
if (! HASTZINFO(self) || self->tzinfo == Py_None) if (! HASTZINFO(self) || self->tzinfo == Py_None)
...@@ -3513,9 +3517,9 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) ...@@ -3513,9 +3517,9 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
/* Check for invocation from pickle with __getstate__ state */ /* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) >= 1 && if (PyTuple_GET_SIZE(args) >= 1 &&
PyTuple_GET_SIZE(args) <= 2 && PyTuple_GET_SIZE(args) <= 2 &&
PyString_Check(state = PyTuple_GET_ITEM(args, 0)) && PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
PyString_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE && PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE &&
MONTH_IS_SANE(PyString_AS_STRING(state)[2])) MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
{ {
PyDateTime_DateTime *me; PyDateTime_DateTime *me;
char aware; char aware;
...@@ -3531,7 +3535,7 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw) ...@@ -3531,7 +3535,7 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
aware = (char)(tzinfo != Py_None); aware = (char)(tzinfo != Py_None);
me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware)); me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware));
if (me != NULL) { if (me != NULL) {
char *pdata = PyString_AS_STRING(state); char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE); memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE);
me->hashcode = -1; me->hashcode = -1;
...@@ -4375,8 +4379,8 @@ datetime_getstate(PyDateTime_DateTime *self) ...@@ -4375,8 +4379,8 @@ datetime_getstate(PyDateTime_DateTime *self)
PyObject *basestate; PyObject *basestate;
PyObject *result = NULL; PyObject *result = NULL;
basestate = PyString_FromStringAndSize((char *)self->data, basestate = PyBytes_FromStringAndSize((char *)self->data,
_PyDateTime_DATETIME_DATASIZE); _PyDateTime_DATETIME_DATASIZE);
if (basestate != NULL) { if (basestate != NULL) {
if (! HASTZINFO(self) || self->tzinfo == Py_None) if (! HASTZINFO(self) || self->tzinfo == Py_None)
result = PyTuple_Pack(1, basestate); result = PyTuple_Pack(1, basestate);
......
...@@ -2724,6 +2724,9 @@ PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); ...@@ -2724,6 +2724,9 @@ PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
static PyObject * static PyObject *
bytes_reduce(PyBytesObject *self) bytes_reduce(PyBytesObject *self)
{ {
/* XXX: This currently returns a Py_UNICODE-widened string
in the tuple which is completely useless. Pickle stopped
using it for that reason. */
return Py_BuildValue("(O(s#))", return Py_BuildValue("(O(s#))",
self->ob_type, self->ob_type,
self->ob_bytes == NULL ? "" : self->ob_bytes, self->ob_bytes == NULL ? "" : self->ob_bytes,
......
...@@ -831,28 +831,32 @@ my_basename(char *name) ...@@ -831,28 +831,32 @@ my_basename(char *name)
static PyObject * static PyObject *
SyntaxError_str(PySyntaxErrorObject *self) SyntaxError_str(PySyntaxErrorObject *self)
{ {
int have_filename = 0;
int have_lineno = 0; int have_lineno = 0;
char *filename = 0;
/* XXX -- do all the additional formatting with filename and /* XXX -- do all the additional formatting with filename and
lineno here */ lineno here */
have_filename = (self->filename != NULL) && if (self->filename) {
PyString_Check(self->filename); if (PyString_Check(self->filename))
filename = PyString_AsString(self->filename);
else if (PyUnicode_Check(self->filename))
filename = PyUnicode_AsString(self->filename);
}
have_lineno = (self->lineno != NULL) && PyInt_CheckExact(self->lineno); have_lineno = (self->lineno != NULL) && PyInt_CheckExact(self->lineno);
if (!have_filename && !have_lineno) if (!filename && !have_lineno)
return PyObject_Unicode(self->msg ? self->msg : Py_None); return PyObject_Unicode(self->msg ? self->msg : Py_None);
if (have_filename && have_lineno) if (filename && have_lineno)
return PyUnicode_FromFormat("%S (%s, line %ld)", return PyUnicode_FromFormat("%S (%s, line %ld)",
self->msg ? self->msg : Py_None, self->msg ? self->msg : Py_None,
my_basename(PyString_AS_STRING(self->filename)), my_basename(filename),
PyInt_AsLong(self->lineno)); PyInt_AsLong(self->lineno));
else if (have_filename) else if (filename)
return PyUnicode_FromFormat("%S (%s)", return PyUnicode_FromFormat("%S (%s)",
self->msg ? self->msg : Py_None, self->msg ? self->msg : Py_None,
my_basename(PyString_AS_STRING(self->filename))); my_basename(filename));
else /* only have_lineno */ else /* only have_lineno */
return PyUnicode_FromFormat("%S (line %ld)", return PyUnicode_FromFormat("%S (line %ld)",
self->msg ? self->msg : Py_None, self->msg ? self->msg : Py_None,
......
...@@ -387,7 +387,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) ...@@ -387,7 +387,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
{ {
char p[1]; char p[1];
p[0] = (char)va_arg(*p_va, int); p[0] = (char)va_arg(*p_va, int);
return PyString_FromStringAndSize(p, 1); return PyUnicode_FromStringAndSize(p, 1);
} }
case 'C': case 'C':
{ {
...@@ -438,7 +438,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) ...@@ -438,7 +438,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
} }
n = (Py_ssize_t)m; n = (Py_ssize_t)m;
} }
v = PyString_FromStringAndSize(str, n); v = PyUnicode_FromStringAndSize(str, n);
} }
return v; return v;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment