Commit 4894c306 authored by Walter Dörwald's avatar Walter Dörwald

Fix a bug in the memory reallocation code of PyUnicode_TranslateCharmap().

charmaptranslate_makespace() allocated more memory than required for the
next replacement but didn't remember that fact, so memory size was growing
exponentially every time a replacement string is longer that one character.
This fixes SF bug #828737.
parent 6a5b0277
...@@ -690,6 +690,18 @@ class CodecCallbackTest(unittest.TestCase): ...@@ -690,6 +690,18 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
def test_bug828737(self):
charmap = {
ord("&"): u"&",
ord("<"): u"&lt;",
ord(">"): u"&gt;",
ord('"'): u"&quot;",
}
for n in (1, 10, 100, 1000):
text = u'abc<def>ghi'*n
text.translate(charmap)
def test_main(): def test_main():
test.test_support.run_unittest(CodecCallbackTest) test.test_support.run_unittest(CodecCallbackTest)
......
...@@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) ...@@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
if not reallocate and adjust various state variables. if not reallocate and adjust various state variables.
Return 0 on success, -1 on error */ Return 0 on success, -1 on error */
static static
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize, int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
int requiredsize) int requiredsize)
{ {
if (requiredsize > *outsize) { int oldsize = PyUnicode_GET_SIZE(*outobj);
if (requiredsize > oldsize) {
/* remember old output position */ /* remember old output position */
int outpos = *outp-PyUnicode_AS_UNICODE(*outobj); int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
/* exponentially overallocate to minimize reallocations */ /* exponentially overallocate to minimize reallocations */
if (requiredsize < 2 * *outsize) if (requiredsize < 2 * oldsize)
requiredsize = 2 * *outsize; requiredsize = 2 * oldsize;
if (_PyUnicode_Resize(outobj, requiredsize) < 0) if (_PyUnicode_Resize(outobj, requiredsize) < 0)
return -1; return -1;
*outp = PyUnicode_AS_UNICODE(*outobj) + outpos; *outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
*outsize = requiredsize;
} }
return 0; return 0;
} }
...@@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz ...@@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz
The called must decref result. The called must decref result.
Return 0 on success, -1 on error. */ Return 0 on success, -1 on error. */
static static
int charmaptranslate_output(Py_UNICODE c, PyObject *mapping, int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res) int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
PyObject **res)
{ {
if (charmaptranslate_lookup(c, mapping, res)) if (charmaptranslate_lookup(*curinp, mapping, res))
return -1; return -1;
if (*res==NULL) { if (*res==NULL) {
/* not found => default to 1:1 mapping */ /* not found => default to 1:1 mapping */
*(*outp)++ = (Py_UNICODE)c; *(*outp)++ = *curinp;
} }
else if (*res==Py_None) else if (*res==Py_None)
; ;
...@@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping, ...@@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
} }
else if (repsize!=0) { else if (repsize!=0) {
/* more than one character */ /* more than one character */
int requiredsize = *outsize + repsize - 1; int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize)) (insize - (*curinp-*startinp)) +
repsize - 1;
if (charmaptranslate_makespace(outobj, outp, requiredsize))
return -1; return -1;
memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize); memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
*outp += repsize; *outp += repsize;
...@@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
Py_UNICODE *str; Py_UNICODE *str;
/* current output position */ /* current output position */
int respos = 0; int respos = 0;
int ressize;
char *reason = "character maps to <undefined>"; char *reason = "character maps to <undefined>";
PyObject *errorHandler = NULL; PyObject *errorHandler = NULL;
PyObject *exc = NULL; PyObject *exc = NULL;
...@@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
replacements, if we need more, we'll resize */ replacements, if we need more, we'll resize */
res = PyUnicode_FromUnicode(NULL, size); res = PyUnicode_FromUnicode(NULL, size);
if (res == NULL) if (res == NULL)
goto onError; goto onError;
if (size == 0) if (size == 0)
return res; return res;
str = PyUnicode_AS_UNICODE(res); str = PyUnicode_AS_UNICODE(res);
ressize = size;
while (p<endp) { while (p<endp) {
/* try to encode it */ /* try to encode it */
PyObject *x = NULL; PyObject *x = NULL;
if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) { if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
Py_XDECREF(x); Py_XDECREF(x);
goto onError; goto onError;
} }
...@@ -3340,7 +3341,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3340,7 +3341,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
/* find all untranslatable characters */ /* find all untranslatable characters */
while (collend < endp) { while (collend < endp) {
if (charmaptranslate_lookup(*collend, mapping, &x)) if (charmaptranslate_lookup(*collend, mapping, &x))
goto onError; goto onError;
Py_XDECREF(x); Py_XDECREF(x);
if (x!=Py_None) if (x!=Py_None)
...@@ -3379,7 +3380,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3379,7 +3380,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
char buffer[2+29+1+1]; char buffer[2+29+1+1];
char *cp; char *cp;
sprintf(buffer, "&#%d;", (int)*p); sprintf(buffer, "&#%d;", (int)*p);
if (charmaptranslate_makespace(&res, &str, &ressize, if (charmaptranslate_makespace(&res, &str,
(str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend))) (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
goto onError; goto onError;
for (cp = buffer; *cp; ++cp) for (cp = buffer; *cp; ++cp)
...@@ -3395,7 +3396,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3395,7 +3396,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
goto onError; goto onError;
/* generate replacement */ /* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode); repsize = PyUnicode_GET_SIZE(repunicode);
if (charmaptranslate_makespace(&res, &str, &ressize, if (charmaptranslate_makespace(&res, &str,
(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) { (str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
...@@ -3409,7 +3410,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, ...@@ -3409,7 +3410,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
} }
/* Resize if we allocated to much */ /* Resize if we allocated to much */
respos = str-PyUnicode_AS_UNICODE(res); respos = str-PyUnicode_AS_UNICODE(res);
if (respos<ressize) { if (respos<PyUnicode_GET_SIZE(res)) {
if (_PyUnicode_Resize(&res, respos) < 0) if (_PyUnicode_Resize(&res, respos) < 0)
goto onError; goto onError;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment