Commit ad7d8d10 authored by Guido van Rossum's avatar Guido van Rossum

Rough and dirty job -- allow concatenation of bytes and arbitrary

buffer-supporting objects (Unicode always excluded), and also of
str and bytes.
(For some reason u"" + b"" doesn't fail, I'll investigate later.)
parent dc0b1a10
...@@ -34,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type; ...@@ -34,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyBytes_Type;
/* Direct API functions */ /* Direct API functions */
PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *); PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_Concat(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t);
PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *);
PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); PyAPI_FUNC(char *) PyBytes_AsString(PyObject *);
......
...@@ -371,8 +371,10 @@ class BytesTest(unittest.TestCase): ...@@ -371,8 +371,10 @@ class BytesTest(unittest.TestCase):
b1 = bytes("abc") b1 = bytes("abc")
b2 = bytes("def") b2 = bytes("def")
self.assertEqual(b1 + b2, bytes("abcdef")) self.assertEqual(b1 + b2, bytes("abcdef"))
self.assertRaises(TypeError, lambda: b1 + "def") self.assertEqual(b1 + "def", bytes("abcdef"))
self.assertRaises(TypeError, lambda: "abc" + b2) self.assertEqual("def" + b1, bytes("defabc"))
self.assertRaises(TypeError, lambda: b1 + u"def")
##self.assertRaises(TypeError, lambda: u"abc" + b2) # XXX FIXME
def test_repeat(self): def test_repeat(self):
b = bytes("abc") b = bytes("abc")
...@@ -393,6 +395,14 @@ class BytesTest(unittest.TestCase): ...@@ -393,6 +395,14 @@ class BytesTest(unittest.TestCase):
self.assertEqual(b, bytes("abcdef")) self.assertEqual(b, bytes("abcdef"))
self.assertEqual(b, b1) self.assertEqual(b, b1)
self.failUnless(b is b1) self.failUnless(b is b1)
b += "xyz"
self.assertEqual(b, b"abcdefxyz")
try:
b += u""
except TypeError:
pass
else:
self.fail("bytes += unicode didn't raise TypeError")
def test_irepeat(self): def test_irepeat(self):
b = bytes("abc") b = bytes("abc")
...@@ -643,13 +653,35 @@ class BytesTest(unittest.TestCase): ...@@ -643,13 +653,35 @@ class BytesTest(unittest.TestCase):
q = pm.loads(ps) q = pm.loads(ps)
self.assertEqual(b, q) self.assertEqual(b, q)
def test_strip(self):
b = b'mississippi'
self.assertEqual(b.strip(b'i'), b'mississipp')
self.assertEqual(b.strip(b'm'), b'ississippi')
self.assertEqual(b.strip(b'pi'), b'mississ')
self.assertEqual(b.strip(b'im'), b'ssissipp')
self.assertEqual(b.strip(b'pim'), b'ssiss')
def test_lstrip(self):
b = b'mississippi'
self.assertEqual(b.lstrip(b'i'), b'mississippi')
self.assertEqual(b.lstrip(b'm'), b'ississippi')
self.assertEqual(b.lstrip(b'pi'), b'mississippi')
self.assertEqual(b.lstrip(b'im'), b'ssissippi')
self.assertEqual(b.lstrip(b'pim'), b'ssissippi')
def test_rstrip(self):
b = b'mississippi'
self.assertEqual(b.rstrip(b'i'), b'mississipp')
self.assertEqual(b.rstrip(b'm'), b'mississippi')
self.assertEqual(b.rstrip(b'pi'), b'mississ')
self.assertEqual(b.rstrip(b'im'), b'mississipp')
self.assertEqual(b.rstrip(b'pim'), b'mississ')
# Optimizations: # Optimizations:
# __iter__? (optimization) # __iter__? (optimization)
# __reversed__? (optimization) # __reversed__? (optimization)
# XXX Some string methods? (Those that don't use character properties) # XXX More string methods? (Those that don't use character properties)
# lstrip, rstrip, strip?? (currently un-pepped)
# join
# There are tests in string_tests.py that are more # There are tests in string_tests.py that are more
# comprehensive for things like split, partition, etc. # comprehensive for things like split, partition, etc.
......
...@@ -31,7 +31,10 @@ PyBytes_Init(void) ...@@ -31,7 +31,10 @@ PyBytes_Init(void)
/* end nullbytes support */ /* end nullbytes support */
static int _getbytevalue(PyObject* arg, int *value) /* Helpers */
static int
_getbytevalue(PyObject* arg, int *value)
{ {
PyObject *intarg = PyNumber_Int(arg); PyObject *intarg = PyNumber_Int(arg);
if (! intarg) if (! intarg)
...@@ -45,6 +48,24 @@ static int _getbytevalue(PyObject* arg, int *value) ...@@ -45,6 +48,24 @@ static int _getbytevalue(PyObject* arg, int *value)
return 1; return 1;
} }
Py_ssize_t
_getbuffer(PyObject *obj, void **ptr)
{
PyBufferProcs *buffer = obj->ob_type->tp_as_buffer;
if (buffer == NULL ||
PyUnicode_Check(obj) ||
buffer->bf_getreadbuffer == NULL ||
buffer->bf_getsegcount == NULL ||
buffer->bf_getsegcount(obj, NULL) != 1)
{
*ptr = NULL;
return -1;
}
return buffer->bf_getreadbuffer(obj, 0, ptr);
}
/* Direct API functions */ /* Direct API functions */
PyObject * PyObject *
...@@ -140,56 +161,63 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) ...@@ -140,56 +161,63 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
return 0; return 0;
} }
/* Functions stuffed into the type object */ PyObject *
PyBytes_Concat(PyObject *a, PyObject *b)
static Py_ssize_t
bytes_length(PyBytesObject *self)
{
return self->ob_size;
}
static PyObject *
bytes_concat(PyBytesObject *self, PyObject *other)
{ {
Py_ssize_t asize, bsize, size;
void *aptr, *bptr;
PyBytesObject *result; PyBytesObject *result;
Py_ssize_t mysize;
Py_ssize_t size;
if (!PyBytes_Check(other)) { asize = _getbuffer(a, &aptr);
PyErr_Format(PyExc_TypeError, bsize = _getbuffer(b, &bptr);
"can't concat bytes to %.100s", other->ob_type->tp_name); if (asize < 0 || bsize < 0) {
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
a->ob_type->tp_name, b->ob_type->tp_name);
return NULL; return NULL;
} }
mysize = self->ob_size; size = asize + bsize;
size = mysize + ((PyBytesObject *)other)->ob_size;
if (size < 0) if (size < 0)
return PyErr_NoMemory(); return PyErr_NoMemory();
result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size); result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, size);
if (result != NULL) { if (result != NULL) {
memcpy(result->ob_bytes, self->ob_bytes, self->ob_size); memcpy(result->ob_bytes, aptr, asize);
memcpy(result->ob_bytes + self->ob_size, memcpy(result->ob_bytes + asize, bptr, bsize);
((PyBytesObject *)other)->ob_bytes,
((PyBytesObject *)other)->ob_size);
} }
return (PyObject *)result; return (PyObject *)result;
} }
/* Functions stuffed into the type object */
static Py_ssize_t
bytes_length(PyBytesObject *self)
{
return self->ob_size;
}
static PyObject *
bytes_concat(PyBytesObject *self, PyObject *other)
{
return PyBytes_Concat((PyObject *)self, other);
}
static PyObject * static PyObject *
bytes_iconcat(PyBytesObject *self, PyObject *other) bytes_iconcat(PyBytesObject *self, PyObject *other)
{ {
Py_ssize_t mysize; void *optr;
Py_ssize_t osize; Py_ssize_t osize;
Py_ssize_t mysize;
Py_ssize_t size; Py_ssize_t size;
if (!PyBytes_Check(other)) { osize = _getbuffer(other, &optr);
if (osize < 0) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"can't concat bytes to %.100s", other->ob_type->tp_name); "can't concat bytes to %.100s", other->ob_type->tp_name);
return NULL; return NULL;
} }
mysize = self->ob_size; mysize = self->ob_size;
osize = ((PyBytesObject *)other)->ob_size;
size = mysize + osize; size = mysize + osize;
if (size < 0) if (size < 0)
return PyErr_NoMemory(); return PyErr_NoMemory();
...@@ -197,7 +225,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other) ...@@ -197,7 +225,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
self->ob_size = size; self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0) else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL; return NULL;
memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize); memcpy(self->ob_bytes + mysize, optr, osize);
Py_INCREF(self); Py_INCREF(self);
return (PyObject *)self; return (PyObject *)self;
} }
...@@ -366,15 +394,10 @@ static int ...@@ -366,15 +394,10 @@ static int
bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
PyObject *values) PyObject *values)
{ {
int avail; Py_ssize_t avail, needed;
int needed; void *bytes;
char *bytes;
if (values == NULL) { if (values == (PyObject *)self) {
bytes = NULL;
needed = 0;
}
else if (values == (PyObject *)self || !PyBytes_Check(values)) {
/* Make a copy an call this function recursively */ /* Make a copy an call this function recursively */
int err; int err;
values = PyBytes_FromObject(values); values = PyBytes_FromObject(values);
...@@ -384,10 +407,19 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi, ...@@ -384,10 +407,19 @@ bytes_setslice(PyBytesObject *self, Py_ssize_t lo, Py_ssize_t hi,
Py_DECREF(values); Py_DECREF(values);
return err; return err;
} }
if (values == NULL) {
/* del b[lo:hi] */
bytes = NULL;
needed = 0;
}
else { else {
assert(PyBytes_Check(values)); needed = _getbuffer(values, &bytes);
bytes = ((PyBytesObject *)values)->ob_bytes; if (needed < 0) {
needed = ((PyBytesObject *)values)->ob_size; PyErr_Format(PyExc_TypeError,
"can't set bytes slice from %.100s",
values->ob_type->tp_name);
return -1;
}
} }
if (lo < 0) if (lo < 0)
...@@ -840,42 +872,26 @@ bytes_str(PyBytesObject *self) ...@@ -840,42 +872,26 @@ bytes_str(PyBytesObject *self)
static PyObject * static PyObject *
bytes_richcompare(PyObject *self, PyObject *other, int op) bytes_richcompare(PyObject *self, PyObject *other, int op)
{ {
PyBufferProcs *self_buffer, *other_buffer;
Py_ssize_t self_size, other_size; Py_ssize_t self_size, other_size;
void *self_bytes, *other_bytes; void *self_bytes, *other_bytes;
PyObject *res; PyObject *res;
Py_ssize_t minsize; Py_ssize_t minsize;
int cmp; int cmp;
/* For backwards compatibility, bytes can be compared to anything that /* Bytes can be compared to anything that supports the (binary) buffer
supports the (binary) buffer API. Except Unicode. */ API. Except Unicode. */
if (PyUnicode_Check(self) || PyUnicode_Check(other)) { self_size = _getbuffer(self, &self_bytes);
if (self_size < 0) {
Py_INCREF(Py_NotImplemented); Py_INCREF(Py_NotImplemented);
return Py_NotImplemented; return Py_NotImplemented;
} }
self_buffer = self->ob_type->tp_as_buffer; other_size = _getbuffer(other, &other_bytes);
if (self_buffer == NULL || if (other_size < 0) {
self_buffer->bf_getreadbuffer == NULL ||
self_buffer->bf_getsegcount == NULL ||
self_buffer->bf_getsegcount(self, NULL) != 1)
{
Py_INCREF(Py_NotImplemented); Py_INCREF(Py_NotImplemented);
return Py_NotImplemented; return Py_NotImplemented;
} }
self_size = self_buffer->bf_getreadbuffer(self, 0, &self_bytes);
other_buffer = other->ob_type->tp_as_buffer;
if (other_buffer == NULL ||
other_buffer->bf_getreadbuffer == NULL ||
other_buffer->bf_getsegcount == NULL ||
other_buffer->bf_getsegcount(self, NULL) != 1)
{
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
other_size = other_buffer->bf_getreadbuffer(other, 0, &other_bytes);
if (self_size != other_size && (op == Py_EQ || op == Py_NE)) { if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
/* Shortcut: if the lengths differ, the objects differ */ /* Shortcut: if the lengths differ, the objects differ */
...@@ -2435,6 +2451,93 @@ bytes_remove(PyBytesObject *self, PyObject *arg) ...@@ -2435,6 +2451,93 @@ bytes_remove(PyBytesObject *self, PyObject *arg)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
/* XXX These two helpers could be optimized if argsize == 1 */
Py_ssize_t
lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
void *argptr, Py_ssize_t argsize)
{
Py_ssize_t i = 0;
while (i < mysize && memchr(argptr, myptr[i], argsize))
i++;
return i;
}
Py_ssize_t
rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
void *argptr, Py_ssize_t argsize)
{
Py_ssize_t i = mysize - 1;
while (i >= 0 && memchr(argptr, myptr[i], argsize))
i--;
return i + 1;
}
PyDoc_STRVAR(strip__doc__,
"B.strip(bytes) -> bytes\n\
\n\
Strip leading and trailing bytes contained in the argument.");
static PyObject *
bytes_strip(PyBytesObject *self, PyObject *arg)
{
Py_ssize_t left, right, mysize, argsize;
void *myptr, *argptr;
if (arg == NULL || !PyBytes_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
return NULL;
}
myptr = self->ob_bytes;
mysize = self->ob_size;
argptr = ((PyBytesObject *)arg)->ob_bytes;
argsize = ((PyBytesObject *)arg)->ob_size;
left = lstrip_helper(myptr, mysize, argptr, argsize);
right = rstrip_helper(myptr, mysize, argptr, argsize);
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(lstrip__doc__,
"B.lstrip(bytes) -> bytes\n\
\n\
Strip leading bytes contained in the argument.");
static PyObject *
bytes_lstrip(PyBytesObject *self, PyObject *arg)
{
Py_ssize_t left, right, mysize, argsize;
void *myptr, *argptr;
if (arg == NULL || !PyBytes_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
return NULL;
}
myptr = self->ob_bytes;
mysize = self->ob_size;
argptr = ((PyBytesObject *)arg)->ob_bytes;
argsize = ((PyBytesObject *)arg)->ob_size;
left = lstrip_helper(myptr, mysize, argptr, argsize);
right = mysize;
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(rstrip__doc__,
"B.rstrip(bytes) -> bytes\n\
\n\
Strip trailing bytes contained in the argument.");
static PyObject *
bytes_rstrip(PyBytesObject *self, PyObject *arg)
{
Py_ssize_t left, right, mysize, argsize;
void *myptr, *argptr;
if (arg == NULL || !PyBytes_Check(arg)) {
PyErr_SetString(PyExc_TypeError, "strip() requires a bytes argument");
return NULL;
}
myptr = self->ob_bytes;
mysize = self->ob_size;
argptr = ((PyBytesObject *)arg)->ob_bytes;
argsize = ((PyBytesObject *)arg)->ob_size;
left = 0;
right = rstrip_helper(myptr, mysize, argptr, argsize);
return PyBytes_FromStringAndSize(self->ob_bytes + left, right - left);
}
PyDoc_STRVAR(decode_doc, PyDoc_STRVAR(decode_doc,
"B.decode([encoding[,errors]]) -> unicode obect.\n\ "B.decode([encoding[,errors]]) -> unicode obect.\n\
...@@ -2659,6 +2762,9 @@ bytes_methods[] = { ...@@ -2659,6 +2762,9 @@ bytes_methods[] = {
{"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__}, {"reverse", (PyCFunction)bytes_reverse, METH_NOARGS, reverse__doc__},
{"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__}, {"pop", (PyCFunction)bytes_pop, METH_VARARGS, pop__doc__},
{"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__}, {"remove", (PyCFunction)bytes_remove, METH_O, remove__doc__},
{"strip", (PyCFunction)bytes_strip, METH_O, strip__doc__},
{"lstrip", (PyCFunction)bytes_lstrip, METH_O, lstrip__doc__},
{"rstrip", (PyCFunction)bytes_rstrip, METH_O, rstrip__doc__},
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc}, {"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
{"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc}, {"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
{"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS, {"fromhex", (PyCFunction)bytes_fromhex, METH_VARARGS|METH_CLASS,
......
...@@ -1347,7 +1347,7 @@ merge_class_dict(PyObject* dict, PyObject* aclass) ...@@ -1347,7 +1347,7 @@ merge_class_dict(PyObject* dict, PyObject* aclass)
/* Helper for PyObject_Dir without arguments: returns the local scope. */ /* Helper for PyObject_Dir without arguments: returns the local scope. */
static PyObject * static PyObject *
_dir_locals() _dir_locals(void)
{ {
PyObject *names; PyObject *names;
PyObject *locals = PyEval_GetLocals(); PyObject *locals = PyEval_GetLocals();
...@@ -1892,4 +1892,3 @@ _PyTrash_destroy_chain(void) ...@@ -1892,4 +1892,3 @@ _PyTrash_destroy_chain(void)
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
...@@ -948,6 +948,8 @@ string_concat(register PyStringObject *a, register PyObject *bb) ...@@ -948,6 +948,8 @@ string_concat(register PyStringObject *a, register PyObject *bb)
if (PyUnicode_Check(bb)) if (PyUnicode_Check(bb))
return PyUnicode_Concat((PyObject *)a, bb); return PyUnicode_Concat((PyObject *)a, bb);
#endif #endif
if (PyBytes_Check(bb))
return PyBytes_Concat((PyObject *)a, bb);
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"cannot concatenate 'str' and '%.200s' objects", "cannot concatenate 'str' and '%.200s' objects",
bb->ob_type->tp_name); bb->ob_type->tp_name);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment