Commit 244651aa authored by Antoine Pitrou's avatar Antoine Pitrou

Merged revisions 72283-72284 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r72283 | antoine.pitrou | 2009-05-04 20:32:32 +0200 (lun., 04 mai 2009) | 4 lines

  Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal sequences.
  Patch by Nick Barnes and Victor Stinner.
........
  r72284 | antoine.pitrou | 2009-05-04 20:32:50 +0200 (lun., 04 mai 2009) | 3 lines

  Add Nick Barnes to ACKS.
........
parent 375c0197
...@@ -858,10 +858,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( ...@@ -858,10 +858,8 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
const Py_UNICODE *data, /* Unicode char buffer */ const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
int encodeSetO, /* force the encoder to encode characters in int base64SetO, /* Encode RFC2152 Set O characters in base64 */
Set O, as described in RFC2152 */ int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
int encodeWhiteSpace, /* force the encoder to encode space, tab,
carriage return and linefeed characters */
const char *errors /* error handling */ const char *errors /* error handling */
); );
......
...@@ -867,19 +867,31 @@ class UnicodeTest( ...@@ -867,19 +867,31 @@ class UnicodeTest(
('+?', b'+-?'), ('+?', b'+-?'),
(r'\\?', b'+AFwAXA?'), (r'\\?', b'+AFwAXA?'),
(r'\\\?', b'+AFwAXABc?'), (r'\\\?', b'+AFwAXABc?'),
(r'++--', b'+-+---') (r'++--', b'+-+---'),
('\U000abcde', b'+2m/c3g-'), # surrogate pairs
('/', b'/'),
] ]
for (x, y) in utfTests: for (x, y) in utfTests:
self.assertEqual(x.encode('utf-7'), y) self.assertEqual(x.encode('utf-7'), y)
# surrogates not supported # Unpaired surrogates not supported
self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7') self.assertRaises(UnicodeError, str, b'+3ADYAA-', 'utf-7')
self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd') self.assertEqual(str(b'+3ADYAA-', 'utf-7', 'replace'), '\ufffd\ufffd')
# Issue #2242: crash on some Windows/MSVC versions # Issue #2242: crash on some Windows/MSVC versions
self.assertRaises(UnicodeDecodeError, b'+\xc1'.decode, 'utf-7') self.assertEqual(b'+\xc1'.decode('utf-7'), '\xc1')
# Direct encoded characters
set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
# Optional direct characters
set_o = '!"#$%&*;<=>@[]^_`{|}'
for c in set_d:
self.assertEqual(c.encode('utf7'), c.encode('ascii'))
self.assertEqual(c.encode('ascii').decode('utf7'), c)
for c in set_o:
self.assertEqual(c.encode('ascii').decode('utf7'), c)
def test_codecs_utf8(self): def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'') self.assertEqual(''.encode('utf-8'), b'')
......
...@@ -35,6 +35,7 @@ Luigi Ballabio ...@@ -35,6 +35,7 @@ Luigi Ballabio
Jeff Balogh Jeff Balogh
Michael J. Barber Michael J. Barber
Chris Barker Chris Barker
Nick Barnes
Quentin Barnes Quentin Barnes
Richard Barran Richard Barran
Cesar Eduardo Barros Cesar Eduardo Barros
......
...@@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1? ...@@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #4426: The UTF-7 decoder was too strict and didn't accept some legal
sequences. Patch by Nick Barnes and Victor Stinner.
- Issue #3672: Reject surrogates in utf-8 codec; add surrogates error handler. - Issue #3672: Reject surrogates in utf-8 codec; add surrogates error handler.
- Issue #5883: In the io module, the BufferedIOBase and TextIOBase ABCs have - Issue #5883: In the io module, the BufferedIOBase and TextIOBase ABCs have
......
...@@ -1702,69 +1702,84 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler ...@@ -1702,69 +1702,84 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
/* --- UTF-7 Codec -------------------------------------------------------- */ /* --- UTF-7 Codec -------------------------------------------------------- */
/* see RFC2152 for details */ /* See RFC2152 for details. We encode conservatively and decode liberally. */
static /* Three simple macros defining base-64. */
char utf7_special[128] = {
/* indicate whether a UTF-7 character is special i.e. cannot be directly
encoded:
0 - not special
1 - special
2 - whitespace (optional)
3 - RFC2152 Set O (optional) */
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,
}; /* Is c a base-64 character? */
#define IS_BASE64(c) \
(((c) >= 'A' && (c) <= 'Z') || \
((c) >= 'a' && (c) <= 'z') || \
((c) >= '0' && (c) <= '9') || \
(c) == '+' || (c) == '/')
/* Note: The comparison (c) <= 0 is a trick to work-around gcc /* given that c is a base-64 character, what is its base-64 value? */
warnings about the comparison always being false; since
utf7_special[0] is 1, we can safely make that one comparison
true */
#define SPECIAL(c, encodeO, encodeWS) \ #define FROM_BASE64(c) \
((c) > 127 || (c) <= 0 || utf7_special[(c)] == 1 || \ (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' : \
(encodeWS && (utf7_special[(c)] == 2)) || \ ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 : \
(encodeO && (utf7_special[(c)] == 3))) ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 : \
(c) == '+' ? 62 : 63)
#define B64(n) \ /* What is the base-64 character of the bottom 6 bits of n? */
#define TO_BASE64(n) \
("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]) ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
#define B64CHAR(c) \
(ISALNUM(c) || (c) == '+' || (c) == '/') /* DECODE_DIRECT: this byte encountered in a UTF-7 string should be
#define UB64(c) \ * decoded as itself. We are permissive on decoding; the only ASCII
((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \ * byte not decoding to itself is the + which begins a base64
(c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 ) * string. */
#define ENCODE(out, ch, bits) \ #define DECODE_DIRECT(c) \
while (bits >= 6) { \ ((c) <= 127 && (c) != '+')
*out++ = B64(ch >> (bits-6)); \
bits -= 6; \ /* The UTF-7 encoder treats ASCII characters differently according to
} * whether they are Set D, Set O, Whitespace, or special (i.e. none of
* the above). See RFC2152. This array identifies these different
#define DECODE(out, ch, bits, surrogate) \ * sets:
while (bits >= 16) { \ * 0 : "Set D"
Py_UNICODE outCh = (Py_UNICODE) ((ch >> (bits-16)) & 0xffff); \ * alphanumeric and '(),-./:?
bits -= 16; \ * 1 : "Set O"
if (surrogate) { \ * !"#$%&*;<=>@[]^_`{|}
/* We have already generated an error for the high surrogate \ * 2 : "whitespace"
so let's not bother seeing if the low surrogate is correct or not */ \ * ht nl cr sp
surrogate = 0; \ * 3 : special (must be base64 encoded)
} else if (0xDC00 <= outCh && outCh <= 0xDFFF) { \ * everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127)
/* This is a surrogate pair. Unfortunately we can't represent \ */
it in a 16-bit character */ \
surrogate = 1; \ static
errmsg = "code pairs are not supported"; \ char utf7_category[128] = {
goto utf7Error; \ /* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */
} else { \ 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
*out++ = outCh; \ /* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */
} \ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
} /* sp ! " # $ % & ' ( ) * + , - . / */
2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0,
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
/* @ A B C D E F G H I J K L M N O */
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
/* ` a b c d e f g h i j k l m n o */
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* p q r s t u v w x y z { | } ~ del */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3,
};
/* ENCODE_DIRECT: this character should be encoded as itself. The
* answer depends on whether we are encoding set O as itself, and also
* on whether we are encoding whitespace as itself. RFC2152 makes it
* clear that the answers to these questions vary between
* applications, so this code needs to be flexible. */
#define ENCODE_DIRECT(c, directO, directWS) \
((c) < 128 && (c) > 0 && \
((utf7_category[(c)] == 0) || \
(directWS && (utf7_category[(c)] == 2)) || \
(directO && (utf7_category[(c)] == 1))))
PyObject *PyUnicode_DecodeUTF7(const char *s, PyObject *PyUnicode_DecodeUTF7(const char *s,
Py_ssize_t size, Py_ssize_t size,
...@@ -1773,6 +1788,13 @@ PyObject *PyUnicode_DecodeUTF7(const char *s, ...@@ -1773,6 +1788,13 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
} }
/* The decoder. The only state we preserve is our read position,
* i.e. how many characters we have consumed. So if we end in the
* middle of a shift sequence we have to back off the read position
* and the output to the beginning of the sequence, otherwise we lose
* all the shift state (seen bits, number of bits seen, high
* surrogate). */
PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
Py_ssize_t size, Py_ssize_t size,
const char *errors, const char *errors,
...@@ -1787,9 +1809,10 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -1787,9 +1809,10 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
Py_UNICODE *p; Py_UNICODE *p;
const char *errmsg = ""; const char *errmsg = "";
int inShift = 0; int inShift = 0;
unsigned int bitsleft = 0; Py_UNICODE *shiftOutStart;
unsigned long charsleft = 0; unsigned int base64bits = 0;
int surrogate = 0; unsigned long base64buffer = 0;
Py_UNICODE surrogate = 0;
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
...@@ -1803,6 +1826,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -1803,6 +1826,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
} }
p = unicode->str; p = unicode->str;
shiftOutStart = p;
e = s + size; e = s + size;
while (s < e) { while (s < e) {
...@@ -1810,72 +1834,101 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -1810,72 +1834,101 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
restart: restart:
ch = (unsigned char) *s; ch = (unsigned char) *s;
if (inShift) { if (inShift) { /* in a base-64 section */
if ((ch == '-') || !B64CHAR(ch)) { if (IS_BASE64(ch)) { /* consume a base-64 character */
inShift = 0; base64buffer = (base64buffer << 6) | FROM_BASE64(ch);
base64bits += 6;
s++; s++;
if (base64bits >= 16) {
/* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate); /* we have enough bits for a UTF-16 value */
if (bitsleft >= 6) { Py_UNICODE outCh = (Py_UNICODE)
/* The shift sequence has a partial character in it. If (base64buffer >> (base64bits-16));
bitsleft < 6 then we could just classify it as padding base64bits -= 16;
but that is not the case here */ base64buffer &= (1 << base64bits) - 1; /* clear high bits */
if (surrogate) {
errmsg = "partial character in shift sequence"; /* expecting a second surrogate */
goto utf7Error; if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
#ifdef Py_UNICODE_WIDE
*p++ = (((surrogate & 0x3FF)<<10)
| (outCh & 0x3FF)) + 0x10000;
#else
*p++ = surrogate;
*p++ = outCh;
#endif
surrogate = 0;
}
else {
surrogate = 0;
errmsg = "second surrogate missing";
goto utf7Error;
}
}
else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */
surrogate = outCh;
}
else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
errmsg = "unexpected second surrogate";
goto utf7Error;
}
else {
*p++ = outCh;
}
} }
/* According to RFC2152 the remaining bits should be zero. We }
choose to signal an error/insert a replacement character else { /* now leaving a base-64 section */
here so indicate the potential of a misencoded character. */ inShift = 0;
s++;
/* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */ if (surrogate) {
if (bitsleft && charsleft << (sizeof(charsleft) * 8 - bitsleft)) { errmsg = "second surrogate missing at end of shift sequence";
errmsg = "non-zero padding bits in shift sequence";
goto utf7Error; goto utf7Error;
} }
if (base64bits > 0) { /* left-over bits */
if (ch == '-') { if (base64bits >= 6) {
if ((s < e) && (*(s) == '-')) { /* We've seen at least one base-64 character */
*p++ = '-'; errmsg = "partial character in shift sequence";
inShift = 1; goto utf7Error;
} }
} else if (SPECIAL(ch,0,0)) { else {
errmsg = "unexpected special character"; /* Some bits remain; they should be zero */
goto utf7Error; if (base64buffer != 0) {
} else { errmsg = "non-zero padding bits in shift sequence";
goto utf7Error;
}
}
}
if (ch != '-') {
/* '-' is absorbed; other terminating
characters are preserved */
*p++ = ch; *p++ = ch;
} }
} else {
charsleft = (charsleft << 6) | UB64(ch);
bitsleft += 6;
s++;
/* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate);
} }
} }
else if ( ch == '+' ) { else if ( ch == '+' ) {
startinpos = s-starts; startinpos = s-starts;
s++; s++; /* consume '+' */
if (s < e && *s == '-') { if (s < e && *s == '-') { /* '+-' encodes '+' */
s++; s++;
*p++ = '+'; *p++ = '+';
} else }
{ else { /* begin base64-encoded section */
inShift = 1; inShift = 1;
bitsleft = 0; shiftOutStart = p;
base64bits = 0;
} }
} }
else if (SPECIAL(ch,0,0)) { else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
startinpos = s-starts; *p++ = ch;
errmsg = "unexpected special character";
s++; s++;
goto utf7Error;
} }
else { else {
*p++ = ch; startinpos = s-starts;
s++; s++;
errmsg = "unexpected special character";
goto utf7Error;
} }
continue; continue;
utf7Error: utf7Error:
outpos = p-PyUnicode_AS_UNICODE(unicode); outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = s-starts; endinpos = s-starts;
if (unicode_decode_call_errorhandler( if (unicode_decode_call_errorhandler(
...@@ -1886,23 +1939,35 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -1886,23 +1939,35 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
goto onError; goto onError;
} }
if (inShift && !consumed) { /* end of string */
outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = size; if (inShift && !consumed) { /* in shift sequence, no more to follow */
if (unicode_decode_call_errorhandler( /* if we're in an inconsistent state, that's an error */
errors, &errorHandler, if (surrogate ||
"utf7", "unterminated shift sequence", (base64bits >= 6) ||
&starts, &e, &startinpos, &endinpos, &exc, &s, (base64bits > 0 && base64buffer != 0)) {
&unicode, &outpos, &p)) outpos = p-PyUnicode_AS_UNICODE(unicode);
goto onError; endinpos = size;
if (s < e) if (unicode_decode_call_errorhandler(
goto restart; errors, &errorHandler,
"utf7", "unterminated shift sequence",
&starts, &e, &startinpos, &endinpos, &exc, &s,
&unicode, &outpos, &p))
goto onError;
if (s < e)
goto restart;
}
} }
/* return state */
if (consumed) { if (consumed) {
if(inShift) if (inShift) {
p = shiftOutStart; /* back off output */
*consumed = startinpos; *consumed = startinpos;
else }
else {
*consumed = s-starts; *consumed = s-starts;
}
} }
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0) if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
...@@ -1922,27 +1987,27 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, ...@@ -1922,27 +1987,27 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
Py_ssize_t size, Py_ssize_t size,
int encodeSetO, int base64SetO,
int encodeWhiteSpace, int base64WhiteSpace,
const char *errors) const char *errors)
{ {
PyObject *v; PyObject *v;
/* It might be possible to tighten this worst case */ /* It might be possible to tighten this worst case */
Py_ssize_t cbAllocated = 5 * size; Py_ssize_t allocated = 5 * size;
int inShift = 0; int inShift = 0;
Py_ssize_t i = 0; Py_ssize_t i = 0;
unsigned int bitsleft = 0; unsigned int base64bits = 0;
unsigned long charsleft = 0; unsigned long base64buffer = 0;
char * out; char * out;
char * start; char * start;
if (size == 0) if (size == 0)
return PyBytes_FromStringAndSize(NULL, 0); return PyBytes_FromStringAndSize(NULL, 0);
if (cbAllocated / 5 != size) if (allocated / 5 != size)
return PyErr_NoMemory(); return PyErr_NoMemory();
v = PyBytes_FromStringAndSize(NULL, cbAllocated); v = PyBytes_FromStringAndSize(NULL, allocated);
if (v == NULL) if (v == NULL)
return NULL; return NULL;
...@@ -1950,78 +2015,76 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, ...@@ -1950,78 +2015,76 @@ PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
for (;i < size; ++i) { for (;i < size; ++i) {
Py_UNICODE ch = s[i]; Py_UNICODE ch = s[i];
if (!inShift) { if (inShift) {
if (ch == '+') { if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
*out++ = '+'; /* shifting out */
*out++ = '-'; if (base64bits) { /* output remaining bits */
} else if (SPECIAL(ch, encodeSetO, encodeWhiteSpace)) { *out++ = TO_BASE64(base64buffer << (6-base64bits));
charsleft = ch; base64buffer = 0;
bitsleft = 16; base64bits = 0;
*out++ = '+'; }
/* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft); inShift = 0;
inShift = bitsleft > 0;
} else {
*out++ = (char) ch;
}
} else {
if (!SPECIAL(ch, encodeSetO, encodeWhiteSpace)) {
*out++ = B64(charsleft << (6-bitsleft));
charsleft = 0;
bitsleft = 0;
/* Characters not in the BASE64 set implicitly unshift the sequence /* Characters not in the BASE64 set implicitly unshift the sequence
so no '-' is required, except if the character is itself a '-' */ so no '-' is required, except if the character is itself a '-' */
if (B64CHAR(ch) || ch == '-') { if (IS_BASE64(ch) || ch == '-') {
*out++ = '-'; *out++ = '-';
} }
inShift = 0;
*out++ = (char) ch; *out++ = (char) ch;
} else { }
bitsleft += 16; else {
charsleft = (charsleft << 16) | ch; goto encode_char;
/* out, charsleft, bitsleft = */ ENCODE(out, charsleft, bitsleft); }
}
/* If the next character is special then we don't need to terminate else { /* not in a shift sequence */
the shift sequence. If the next character is not a BASE64 character if (ch == '+') {
or '-' then the shift sequence will be terminated implicitly and we *out++ = '+';
don't have to insert a '-'. */
if (bitsleft == 0) {
if (i + 1 < size) {
Py_UNICODE ch2 = s[i+1];
if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) {
} else if (B64CHAR(ch2) || ch2 == '-') {
*out++ = '-';
inShift = 0;
} else {
inShift = 0;
}
}
else {
*out++ = '-'; *out++ = '-';
inShift = 0; }
} else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
} *out++ = (char) ch;
}
else {
*out++ = '+';
inShift = 1;
goto encode_char;
} }
} }
continue;
encode_char:
#ifdef Py_UNICODE_WIDE
if (ch >= 0x10000) {
/* code first surrogate */
base64bits += 16;
base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
while (base64bits >= 6) {
*out++ = TO_BASE64(base64buffer >> (base64bits-6));
base64bits -= 6;
}
/* prepare second surrogate */
ch = 0xDC00 | ((ch-0x10000) & 0x3FF);
}
#endif
base64bits += 16;
base64buffer = (base64buffer << 16) | ch;
while (base64bits >= 6) {
*out++ = TO_BASE64(base64buffer >> (base64bits-6));
base64bits -= 6;
}
} }
if (bitsleft) { if (base64bits)
*out++= B64(charsleft << (6-bitsleft) ); *out++= TO_BASE64(base64buffer << (6-base64bits) );
if (inShift)
*out++ = '-'; *out++ = '-';
}
if (_PyBytes_Resize(&v, out - start) < 0) if (_PyBytes_Resize(&v, out - start) < 0)
return NULL; return NULL;
return v; return v;
} }
#undef SPECIAL #undef IS_BASE64
#undef B64 #undef FROM_BASE64
#undef B64CHAR #undef TO_BASE64
#undef UB64 #undef DECODE_DIRECT
#undef ENCODE #undef ENCODE_DIRECT
#undef DECODE
/* --- UTF-8 Codec -------------------------------------------------------- */ /* --- UTF-8 Codec -------------------------------------------------------- */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment