Commit 164fe558 authored by Marc-André Lemburg's avatar Marc-André Lemburg

This patch changes the way the string .encode() method works slightly

and introduces a new method .decode().

The major change is that strg.encode() will no longer try to convert
Unicode returns from the codec into a string, but instead pass along
the Unicode object as-is. The same is now true for all other codec
return types. The underlying C APIs were changed accordingly.

Note that even though this does have the potential of breaking
existing code, the chances are low since conversion from Unicode
previously took place using the default encoding which is normally
set to ASCII rendering this auto-conversion mechanism useless for
most Unicode encodings.

The good news is that you can now use .encode() and .decode() with
much greater ease and that the door was opened for better accessibility
of the builtin codecs.

As demonstration of the new feature, the patch includes a few new
codecs which allow string to string encoding and decoding (rot13,
hex, zip, uu, base64).

Written by Marc-Andre Lemburg. Copyright assigned to the PSF.
parent bb9a908a
...@@ -2326,30 +2326,44 @@ interned string object with the same value. ...@@ -2326,30 +2326,44 @@ interned string object with the same value.
int size, int size,
const char *encoding, const char *encoding,
const char *errors} const char *errors}
Create a string object by decoding \var{size} bytes of the encoded Creates an object by decoding \var{size} bytes of the encoded
buffer \var{s}. \var{encoding} and \var{errors} have the same meaning buffer \var{s} using the codec registered
for \var{encoding}. \var{encoding} and \var{errors} have the same meaning
as the parameters of the same name in the unicode() builtin as the parameters of the same name in the unicode() builtin
function. The codec to be used is looked up using the Python codec function. The codec to be used is looked up using the Python codec
registry. Returns \NULL{} in case an exception was raised by the registry. Returns \NULL{} in case an exception was raised by the
codec. codec.
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyString_Encode}{const Py_UNICODE *s, \begin{cfuncdesc}{PyObject*}{PyString_AsDecodedObject}{PyObject *str,
const char *encoding,
const char *errors}
Decodes a string object by passing it to the codec registered
for \var{encoding} and returns the result as Python
object. \var{encoding} and \var{errors} have the same meaning as the
parameters of the same name in the string .encode() method. The codec
to be used is looked up using the Python codec registry. Returns
\NULL{} in case an exception was raised by the codec.
\end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyString_Encode}{const char *s,
int size, int size,
const char *encoding, const char *encoding,
const char *errors} const char *errors}
Encodes the \ctype{Py_UNICODE} buffer of the given size and returns a Encodes the \ctype{char} buffer of the given size by passing it to
Python string object. \var{encoding} and \var{errors} have the same the codec registered for \var{encoding} and returns a Python object.
\var{encoding} and \var{errors} have the same
meaning as the parameters of the same name in the string .encode() meaning as the parameters of the same name in the string .encode()
method. The codec to be used is looked up using the Python codec method. The codec to be used is looked up using the Python codec
registry. Returns \NULL{} in case an exception was raised by the registry. Returns \NULL{} in case an exception was raised by the
codec. codec.
\end{cfuncdesc} \end{cfuncdesc}
\begin{cfuncdesc}{PyObject*}{PyString_AsEncodedString}{PyObject *unicode, \begin{cfuncdesc}{PyObject*}{PyString_AsEncodedObject}{PyObject *str,
const char *encoding, const char *encoding,
const char *errors} const char *errors}
Encodes a string object and returns the result as Python string Encodes a string object using the codec registered
for \var{encoding} and returns the result as Python
object. \var{encoding} and \var{errors} have the same meaning as the object. \var{encoding} and \var{errors} have the same meaning as the
parameters of the same name in the string .encode() method. The codec parameters of the same name in the string .encode() method. The codec
to be used is looked up using the Python codec registry. Returns to be used is looked up using the Python codec registry. Returns
......
...@@ -78,7 +78,7 @@ extern DL_IMPORT(void) _Py_ReleaseInternedStrings(void); ...@@ -78,7 +78,7 @@ extern DL_IMPORT(void) _Py_ReleaseInternedStrings(void);
/* --- Generic Codecs ----------------------------------------------------- */ /* --- Generic Codecs ----------------------------------------------------- */
/* Create a string object by decoding the encoded string s of the /* Create an object by decoding the encoded string s of the
given size. */ given size. */
extern DL_IMPORT(PyObject*) PyString_Decode( extern DL_IMPORT(PyObject*) PyString_Decode(
...@@ -89,7 +89,7 @@ extern DL_IMPORT(PyObject*) PyString_Decode( ...@@ -89,7 +89,7 @@ extern DL_IMPORT(PyObject*) PyString_Decode(
); );
/* Encodes a char buffer of the given size and returns a /* Encodes a char buffer of the given size and returns a
Python string object. */ Python object. */
extern DL_IMPORT(PyObject*) PyString_Encode( extern DL_IMPORT(PyObject*) PyString_Encode(
const char *s, /* string char buffer */ const char *s, /* string char buffer */
...@@ -98,15 +98,52 @@ extern DL_IMPORT(PyObject*) PyString_Encode( ...@@ -98,15 +98,52 @@ extern DL_IMPORT(PyObject*) PyString_Encode(
const char *errors /* error handling */ const char *errors /* error handling */
); );
/* Encodes a string object and returns the result as Python string /* Encodes a string object and returns the result as Python
object. */ object. */
extern DL_IMPORT(PyObject*) PyString_AsEncodedObject(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Encodes a string object and returns the result as Python string
object.
If the codec returns an Unicode object, the object is converted
back to a string using the default encoding.
DEPRECATED - use PyString_AsEncodedObject() instead. */
extern DL_IMPORT(PyObject*) PyString_AsEncodedString( extern DL_IMPORT(PyObject*) PyString_AsEncodedString(
PyObject *str, /* string object */ PyObject *str, /* string object */
const char *encoding, /* encoding */ const char *encoding, /* encoding */
const char *errors /* error handling */ const char *errors /* error handling */
); );
/* Decodes a string object and returns the result as Python
object. */
extern DL_IMPORT(PyObject*) PyString_AsDecodedObject(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Decodes a string object and returns the result as Python string
object.
If the codec returns an Unicode object, the object is converted
back to a string using the default encoding.
DEPRECATED - use PyString_AsDecodedObject() instead. */
extern DL_IMPORT(PyObject*) PyString_AsDecodedString(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Provides access to the internal data buffer and size of a string /* Provides access to the internal data buffer and size of a string
object or the default encoded version of an Unicode object. Passing object or the default encoded version of an Unicode object. Passing
NULL as *len parameter will force the string buffer to be NULL as *len parameter will force the string buffer to be
......
...@@ -72,6 +72,14 @@ class UserString: ...@@ -72,6 +72,14 @@ class UserString:
def center(self, width): return self.__class__(self.data.center(width)) def center(self, width): return self.__class__(self.data.center(width))
def count(self, sub, start=0, end=sys.maxint): def count(self, sub, start=0, end=sys.maxint):
return self.data.count(sub, start, end) return self.data.count(sub, start, end)
def decode(self, encoding=None, errors=None): # XXX improve this?
if encoding:
if errors:
return self.__class__(self.data.decode(encoding, errors))
else:
return self.__class__(self.data.decode(encoding))
else:
return self.__class__(self.data.decode())
def encode(self, encoding=None, errors=None): # XXX improve this? def encode(self, encoding=None, errors=None): # XXX improve this?
if encoding: if encoding:
if errors: if errors:
......
...@@ -79,4 +79,13 @@ aliases = { ...@@ -79,4 +79,13 @@ aliases = {
'tis260': 'tactis', 'tis260': 'tactis',
'sjis': 'shift_jis', 'sjis': 'shift_jis',
# Content transfer/compression encodings
'rot13': 'rot_13',
'base64': 'base64_codec',
'base_64': 'base64_codec',
'zlib': 'zlib_codec',
'zip': 'zlib_codec',
'hex': 'hex_codec',
'uu': 'uu_codec',
} }
""" Python 'base64_codec' Codec - base64 content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, base64
### Codec APIs
def base64_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = base64.encodestring(input)
return (output, len(input))
def base64_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = base64.decodestring(input)
return (output, len(input))
class Codec(codecs.Codec):
encode = base64_encode
decode = base64_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (base64_encode,base64_decode,StreamReader,StreamWriter)
""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, binascii
### Codec APIs
def hex_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.b2a_hex(input)
return (output, len(input))
def hex_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.a2b_hex(input)
return (output, len(input))
class Codec(codecs.Codec):
encode = hex_encode
decode = hex_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (hex_encode,hex_decode,StreamReader,StreamWriter)
#!/usr/local/bin/python2.1
""" Python Character Mapping Codec for ROT13.
See http://ucsub.colorado.edu/~kominek/rot13/ for details.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0041: 0x004e,
0x0042: 0x004f,
0x0043: 0x0050,
0x0044: 0x0051,
0x0045: 0x0052,
0x0046: 0x0053,
0x0047: 0x0054,
0x0048: 0x0055,
0x0049: 0x0056,
0x004a: 0x0057,
0x004b: 0x0058,
0x004c: 0x0059,
0x004d: 0x005a,
0x004e: 0x0041,
0x004f: 0x0042,
0x0050: 0x0043,
0x0051: 0x0044,
0x0052: 0x0045,
0x0053: 0x0046,
0x0054: 0x0047,
0x0055: 0x0048,
0x0056: 0x0049,
0x0057: 0x004a,
0x0058: 0x004b,
0x0059: 0x004c,
0x005a: 0x004d,
0x0061: 0x006e,
0x0062: 0x006f,
0x0063: 0x0070,
0x0064: 0x0071,
0x0065: 0x0072,
0x0066: 0x0073,
0x0067: 0x0074,
0x0068: 0x0075,
0x0069: 0x0076,
0x006a: 0x0077,
0x006b: 0x0078,
0x006c: 0x0079,
0x006d: 0x007a,
0x006e: 0x0061,
0x006f: 0x0062,
0x0070: 0x0063,
0x0071: 0x0064,
0x0072: 0x0065,
0x0073: 0x0066,
0x0074: 0x0067,
0x0075: 0x0068,
0x0076: 0x0069,
0x0077: 0x006a,
0x0078: 0x006b,
0x0079: 0x006c,
0x007a: 0x006d,
})
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k
### Filter API
def rot13(infile, outfile):
outfile.write(infile.read().encode('rot-13'))
if __name__ == '__main__':
import sys
rot13(sys.stdin, sys.stdout)
""" Python 'uu_codec' Codec - UU content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were
adapted from uu.py which was written by Lance Ellinghouse and
modified by Jack Jansen and Fredrik Lundh.
"""
import codecs, binascii
### Codec APIs
def uu_encode(input,errors='strict',filename='<data>',mode=0666):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
from cStringIO import StringIO
from binascii import b2a_uu
infile = StringIO(input)
outfile = StringIO()
read = infile.read
write = outfile.write
# Encode
write('begin %o %s\n' % (mode & 0777, filename))
chunk = read(45)
while chunk:
write(b2a_uu(chunk))
chunk = read(45)
write(' \nend\n')
return (outfile.getvalue(), len(input))
def uu_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
Note: filename and file mode information in the input data is
ignored.
"""
assert errors == 'strict'
from cStringIO import StringIO
from binascii import a2b_uu
infile = StringIO(input)
outfile = StringIO()
readline = infile.readline
write = outfile.write
# Find start of encoded data
while 1:
s = readline()
if not s:
raise ValueError, 'Missing "begin" line in input data'
if s[:5] == 'begin':
break
# Decode
while 1:
s = readline()
if not s or \
s == 'end\n':
break
try:
data = a2b_uu(s)
except binascii.Error, v:
# Workaround for broken uuencoders by /Fredrik Lundh
nbytes = (((ord(s[0])-32) & 63) * 4 + 5) / 3
data = a2b_uu(s[:nbytes])
#sys.stderr.write("Warning: %s\n" % str(v))
write(data)
if not s:
raise ValueError, 'Truncated input data'
return (outfile.getvalue(), len(input))
class Codec(codecs.Codec):
encode = uu_encode
decode = uu_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (uu_encode,uu_decode,StreamReader,StreamWriter)
""" Python 'zlib_codec' Codec - zlib compression encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import zlib # this codec needs the optional zlib module !
### Codec APIs
def zlib_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = zlib.compress(input)
return (output, len(input))
def zlib_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = zlib.decompress(input)
return (output, len(input))
class Codec(codecs.Codec):
encode = zlib_encode
decode = zlib_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (zlib_encode,zlib_decode,StreamReader,StreamWriter)
"""Common tests shared by test_string and test_userstring""" """Common tests shared by test_string and test_userstring"""
import string import string
from test_support import verify, verbose, TestFailed
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
...@@ -212,3 +213,21 @@ def run_method_tests(test): ...@@ -212,3 +213,21 @@ def run_method_tests(test):
test('endswith', 'helloworld', 0, 'lowo', 3, 8) test('endswith', 'helloworld', 0, 'lowo', 3, 8)
test('endswith', 'ab', 0, 'ab', 0, 1) test('endswith', 'ab', 0, 'ab', 0, 1)
test('endswith', 'ab', 0, 'ab', 0, 0) test('endswith', 'ab', 0, 'ab', 0, 0)
# Encoding/decoding
codecs = [('rot13', 'uryyb jbeyq'),
('base64', 'aGVsbG8gd29ybGQ=\n'),
('hex', '68656c6c6f20776f726c64'),
('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')]
for encoding, data in codecs:
test('encode', 'hello world', data, encoding)
test('decode', data, 'hello world', encoding)
# zlib is optional, so we make the test optional too...
try:
import zlib
except ImportError:
pass
else:
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
verify('hello world'.encode('zlib') == data)
verify(data.decode('zlib') == 'hello world')
...@@ -152,38 +152,70 @@ PyObject *PyString_Decode(const char *s, ...@@ -152,38 +152,70 @@ PyObject *PyString_Decode(const char *s,
const char *encoding, const char *encoding,
const char *errors) const char *errors)
{ {
PyObject *buffer = NULL, *str; PyObject *v, *str;
str = PyString_FromStringAndSize(s, size);
if (str == NULL)
return NULL;
v = PyString_AsDecodedString(str, encoding, errors);
Py_DECREF(str);
return v;
}
PyObject *PyString_AsDecodedObject(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
if (!PyString_Check(str)) {
PyErr_BadArgument();
goto onError;
}
if (encoding == NULL) if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding(); encoding = PyUnicode_GetDefaultEncoding();
/* Decode via the codec registry */ /* Decode via the codec registry */
buffer = PyBuffer_FromMemory((void *)s, size); v = PyCodec_Decode(str, encoding, errors);
if (buffer == NULL) if (v == NULL)
goto onError; goto onError;
str = PyCodec_Decode(buffer, encoding, errors);
if (str == NULL) return v;
onError:
return NULL;
}
PyObject *PyString_AsDecodedString(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
v = PyString_AsDecodedObject(str, encoding, errors);
if (v == NULL)
goto onError; goto onError;
/* Convert Unicode to a string using the default encoding */ /* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(str)) { if (PyUnicode_Check(v)) {
PyObject *temp = str; PyObject *temp = v;
str = PyUnicode_AsEncodedString(str, NULL, NULL); v = PyUnicode_AsEncodedString(v, NULL, NULL);
Py_DECREF(temp); Py_DECREF(temp);
if (str == NULL) if (v == NULL)
goto onError; goto onError;
} }
if (!PyString_Check(str)) { if (!PyString_Check(v)) {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"decoder did not return a string object (type=%.400s)", "decoder did not return a string object (type=%.400s)",
str->ob_type->tp_name); v->ob_type->tp_name);
Py_DECREF(str); Py_DECREF(v);
goto onError; goto onError;
} }
Py_DECREF(buffer);
return str; return v;
onError: onError:
Py_XDECREF(buffer);
return NULL; return NULL;
} }
...@@ -202,7 +234,7 @@ PyObject *PyString_Encode(const char *s, ...@@ -202,7 +234,7 @@ PyObject *PyString_Encode(const char *s,
return v; return v;
} }
PyObject *PyString_AsEncodedString(PyObject *str, PyObject *PyString_AsEncodedObject(PyObject *str,
const char *encoding, const char *encoding,
const char *errors) const char *errors)
{ {
...@@ -220,6 +252,23 @@ PyObject *PyString_AsEncodedString(PyObject *str, ...@@ -220,6 +252,23 @@ PyObject *PyString_AsEncodedString(PyObject *str,
v = PyCodec_Encode(str, encoding, errors); v = PyCodec_Encode(str, encoding, errors);
if (v == NULL) if (v == NULL)
goto onError; goto onError;
return v;
onError:
return NULL;
}
PyObject *PyString_AsEncodedString(PyObject *str,
const char *encoding,
const char *errors)
{
PyObject *v;
v = PyString_AsEncodedString(str, encoding, errors);
if (v == NULL)
goto onError;
/* Convert Unicode to a string using the default encoding */ /* Convert Unicode to a string using the default encoding */
if (PyUnicode_Check(v)) { if (PyUnicode_Check(v)) {
PyObject *temp = v; PyObject *temp = v;
...@@ -235,6 +284,7 @@ PyObject *PyString_AsEncodedString(PyObject *str, ...@@ -235,6 +284,7 @@ PyObject *PyString_AsEncodedString(PyObject *str,
Py_DECREF(v); Py_DECREF(v);
goto onError; goto onError;
} }
return v; return v;
onError: onError:
...@@ -1779,10 +1829,10 @@ string_endswith(PyStringObject *self, PyObject *args) ...@@ -1779,10 +1829,10 @@ string_endswith(PyStringObject *self, PyObject *args)
static char encode__doc__[] = static char encode__doc__[] =
"S.encode([encoding[,errors]]) -> string\n\ "S.encode([encoding[,errors]]) -> object\n\
\n\ \n\
Return an encoded string version of S. Default encoding is the current\n\ Encodes S using the codec registered for encoding. encoding defaults\n\
default string encoding. errors may be given to set a different error\n\ to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\ handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a ValueError. Other possible values are 'ignore' and 'replace'."; a ValueError. Other possible values are 'ignore' and 'replace'.";
...@@ -1793,7 +1843,26 @@ string_encode(PyStringObject *self, PyObject *args) ...@@ -1793,7 +1843,26 @@ string_encode(PyStringObject *self, PyObject *args)
char *errors = NULL; char *errors = NULL;
if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
return NULL; return NULL;
return PyString_AsEncodedString((PyObject *)self, encoding, errors); return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
}
static char decode__doc__[] =
"S.decode([encoding[,errors]]) -> object\n\
\n\
Decodes S using the codec registered for encoding. encoding defaults\n\
to the default encoding. errors may be given to set a different error\n\
handling scheme. Default is 'strict' meaning that encoding errors raise\n\
a ValueError. Other possible values are 'ignore' and 'replace'.";
static PyObject *
string_decode(PyStringObject *self, PyObject *args)
{
char *encoding = NULL;
char *errors = NULL;
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
return NULL;
return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
} }
...@@ -2371,6 +2440,7 @@ string_methods[] = { ...@@ -2371,6 +2440,7 @@ string_methods[] = {
{"rjust", (PyCFunction)string_rjust, 1, rjust__doc__}, {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
{"center", (PyCFunction)string_center, 1, center__doc__}, {"center", (PyCFunction)string_center, 1, center__doc__},
{"encode", (PyCFunction)string_encode, 1, encode__doc__}, {"encode", (PyCFunction)string_encode, 1, encode__doc__},
{"decode", (PyCFunction)string_decode, 1, decode__doc__},
{"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__}, {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
{"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__}, {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
#if 0 #if 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment