PEP 293 implemention (from SF patch http://www.python.org/sf/432401)

3aeb632c · Walter Dörwald · 94fab762 · 3aeb632c · 3aeb632c · 3aeb632c
Commit 3aeb632c authored Sep 02, 2002 by Walter Dörwald
12 changed files
--- a/Doc/lib/libcodecs.tex
+++ b/Doc/lib/libcodecs.tex
@@ -17,7 +17,7 @@

 This module defines base classes for standard Python codecs (encoders
 and decoders) and provides access to the internal Python codec
-registry which manages the codec lookup process.
+registry which manages the codec and error handling lookup process.

 It defines the following functions:

@@ -98,6 +98,43 @@ Raises a \exception{LookupError} in case the encoding cannot be found.
 To simplify working with encoded files or stream, the module
 also defines these utility functions:

+\begin{funcdesc}{register_error}{name, error_handler}
+Register the error handling function \var{error_handler} under the
+name \var{name}. \vari{error_handler} will be called during encoding
+and decoding in case of an error, when \var{name} is specified as the
+errors parameter. \var{error_handler} will be called with an
+\exception{UnicodeEncodeError}, \exception{UnicodeDecodeError} or
+\exception{UnicodeTranslateError} instance and must return a tuple
+with a replacement for the unencodable/undecodable part of the input
+and a position where encoding/decoding should continue.
+\end{funcdesc}
+
+\begin{funcdesc}{lookup_error}{name}
+Return the error handler previously register under the name \var{name}.
+
+Raises a \exception{LookupError} in case the handler cannot be found.
+\end{funcdesc}
+
+\begin{funcdesc}{strict_errors}{exception}
+Implements the \code{strict} error handling.
+\end{funcdesc}
+
+\begin{funcdesc}{replace_errors}{exception}
+Implements the \code{replace} error handling.
+\end{funcdesc}
+
+\begin{funcdesc}{ignore_errors}{exception}
+Implements the \code{ignore} error handling.
+\end{funcdesc}
+
+\begin{funcdesc}{xmlcharrefreplace_errors_errors}{exception}
+Implements the \code{xmlcharrefreplace} error handling.
+\end{funcdesc}
+
+\begin{funcdesc}{backslashreplace_errors_errors}{exception}
+Implements the \code{backslashreplace} error handling.
+\end{funcdesc}
+
 \begin{funcdesc}{open}{filename, mode\optional{, encoding\optional{,
                       errors\optional{, buffering}}}}
 Open an encoded file using the given \var{mode} and return

--- a/Doc/lib/libexcs.tex
+++ b/Doc/lib/libexcs.tex
@@ -335,6 +335,24 @@ Raised when an \keyword{assert} statement fails.
 \versionadded{2.0}
 \end{excdesc}

+\begin{excdesc}{UnicodeEncodeError}
+  Raised when a Unicode-related error occurs during encoding.  It
+  is a subclass of \exception{UnicodeError}.
+\versionadded{2.3}
+\end{excdesc}
+
+\begin{excdesc}{UnicodeDecodeError}
+  Raised when a Unicode-related error occurs during decoding.  It
+  is a subclass of \exception{UnicodeError}.
+\versionadded{2.3}
+\end{excdesc}
+
+\begin{excdesc}{UnicodeTranslateError}
+  Raised when a Unicode-related error occurs during translating.  It
+  is a subclass of \exception{UnicodeError}.
+\versionadded{2.3}
+\end{excdesc}
+
 \begin{excdesc}{ValueError}
  Raised when a built-in operation or function receives an argument
  that has the right type but an inappropriate value, and the
@@ -426,6 +444,9 @@ The class hierarchy for built-in exceptions is:
     |    |    +-- FloatingPointError
     |    +-- ValueError
     |    |    +-- UnicodeError
+     |    |        +-- UnicodeEncodeError
+     |    |        +-- UnicodeDecodeError
+     |    |        +-- UnicodeTranslateError
     |    +-- ReferenceError
     |    +-- SystemError
     |    +-- MemoryError

--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -117,6 +117,36 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter(
       const char *errors
       );

+/* Unicode encoding error handling callback registry API */
+
+/* Register the error handling callback function error under the name
+   name. This function will be called by the codec when it encounters
+   unencodable characters/undecodable bytes and doesn't know the
+   callback name, when name is specified as the error parameter
+   in the call to the encode/decode function.
+   Return 0 on success, -1 on error */
+PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error);
+
+/* Lookup the error handling callback function registered under the
+   name error. As a special case NULL can be passed, in which case
+   the error handling callback for "strict" will be returned. */
+PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name);
+
+/* raise exc as an exception */
+PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc);
+
+/* ignore the unicode error, skipping the faulty input */
+PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc);
+
+/* replace the unicode error with ? or U+FFFD */
+PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc);
+
+/* replace the unicode encode error with XML character references */
+PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
+
+/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
+PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
+
 #ifdef __cplusplus
 }
 #endif

--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -54,6 +54,9 @@ PyAPI_DATA(PyObject *) PyExc_SystemExit;
 PyAPI_DATA(PyObject *) PyExc_TypeError;
 PyAPI_DATA(PyObject *) PyExc_UnboundLocalError;
 PyAPI_DATA(PyObject *) PyExc_UnicodeError;
+PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError;
+PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError;
+PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError;
 PyAPI_DATA(PyObject *) PyExc_ValueError;
 PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;
 #ifdef MS_WINDOWS
@@ -114,6 +117,69 @@ PyAPI_FUNC(void) PyErr_SetInterrupt(void);
 PyAPI_FUNC(void) PyErr_SyntaxLocation(char *, int);
 PyAPI_FUNC(PyObject *) PyErr_ProgramText(char *, int);

+/* The following functions are used to create and modify unicode
+   exceptions from C */
+/* create a UnicodeDecodeError object */
+PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create(
+	const char *, const char *, int, int, int, const char *);
+
+/* create a UnicodeEncodeError object */
+PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create(
+	const char *, const Py_UNICODE *, int, int, int, const char *);
+
+/* create a UnicodeTranslateError object */
+PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
+	const Py_UNICODE *, int, int, int, const char *);
+
+/* get the encoding attribute */
+PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetEncoding(PyObject *);
+
+/* get the object attribute */
+PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *);
+
+/* get the value of the start attribute (the int * may not be NULL)
+   return 0 on success, -1 on failure */
+PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, int *);
+PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, int *);
+PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, int *);
+
+/* assign a new value to the start attribute
+   return 0 on success, -1 on failure */
+PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, int);
+PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, int);
+PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, int);
+
+/* get the value of the end attribute (the int *may not be NULL)
+ return 0 on success, -1 on failure */
+PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, int *);
+PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, int *);
+PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, int *);
+
+/* assign a new value to the end attribute
+   return 0 on success, -1 on failure */
+PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, int);
+PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, int);
+PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, int);
+
+/* get the value of the reason attribute */
+PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *);
+PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *);
+
+/* assign a new value to the reason attribute
+   return 0 on success, -1 on failure */
+PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason(
+	PyObject *, const char *);
+PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason(
+	PyObject *, const char *);
+PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason(
+	PyObject *, const char *);
+
+
 /* These APIs aren't really part of the error implementation, but
   often needed to format error messages; the native C lib APIs are
   not available on all platforms, which is why we provide emulations

--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -20,7 +20,10 @@ except ImportError, why:
 __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
           "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
-           "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE"]
+           "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
+           "strict_errors", "ignore_errors", "replace_errors",
+           "xmlcharrefreplace_errors",
+           "register_error", "lookup_error"]

 ### Constants

@@ -632,6 +635,14 @@ def make_encoding_map(decoding_map):
            m[v] = None
    return m

+### error handlers
+
+strict_errors = lookup_error("strict")
+ignore_errors = lookup_error("ignore")
+replace_errors = lookup_error("replace")
+xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
+backslashreplace_errors = lookup_error("backslashreplace")
+
 # Tell modulefinder that using codecs probably needs the encodings
 # package
 _false = 0

--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
+import test.test_support, unittest
+import sys, codecs, htmlentitydefs, unicodedata
+
+class CodecCallbackTest(unittest.TestCase):
+
+    def test_xmlcharrefreplace(self):
+        # replace unencodable characters which numeric character entities.
+        # For ascii, latin-1 and charmaps this is completely implemented
+        # in C and should be reasonably fast.
+        s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
+        self.assertEqual(
+            s.encode("ascii", "xmlcharrefreplace"),
+            "&#12473;&#12497;&#12514; &#228;nd eggs"
+        )
+        self.assertEqual(
+            s.encode("latin-1", "xmlcharrefreplace"),
+            "&#12473;&#12497;&#12514; \xe4nd eggs"
+        )
+
+    def test_xmlcharnamereplace(self):
+        # This time use a named character entity for unencodable
+        # characters, if one is available.
+        names = {}
+        for (key, value) in htmlentitydefs.entitydefs.items():
+            if len(value)==1:
+                names[unicode(value, "latin-1")] = unicode(key, "latin-1")
+            else:
+                names[unichr(int(value[2:-1]))] = unicode(key, "latin-1")
+
+        def xmlcharnamereplace(exc):
+            if not isinstance(exc, UnicodeEncodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = []
+            for c in exc.object[exc.start:exc.end]:
+                try:
+                    l.append(u"&%s;" % names[c])
+                except KeyError:
+                    l.append(u"&#%d;" % ord(c))
+            return (u"".join(l), exc.end)
+
+        codecs.register_error(
+            "test.xmlcharnamereplace", xmlcharnamereplace)
+
+        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
+        sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
+        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
+        sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
+        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
+        sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
+        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
+
+    def test_uninamereplace(self):
+        # We're using the names from the unicode database this time,
+        # and we're doing "systax highlighting" here, i.e. we include
+        # the replaced text in ANSI escape sequences. For this it is
+        # useful that the error handler is not called for every single
+        # unencodable character, but for a complete sequence of
+        # unencodable characters, otherwise we would output many
+        # unneccessary escape sequences.
+
+        def uninamereplace(exc):
+            if not isinstance(exc, UnicodeEncodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = []
+            for c in exc.object[exc.start:exc.end]:
+                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
+            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
+
+        codecs.register_error(
+            "test.uninamereplace", uninamereplace)
+
+        sin = u"\xac\u1234\u20ac\u8000"
+        sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
+        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
+
+        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, 0x8000\033[0m"
+        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
+
+        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1m0x8000\033[0m"
+        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
+
+    def test_backslashescape(self):
+        # Does the same as the "unicode-escape" encoding, but with different
+        # base encodings.
+        sin = u"a\xac\u1234\u20ac\u8000"
+        if sys.maxunicode > 0xffff:
+            sin += unichr(sys.maxunicode)
+        sout = "a\\xac\\u1234\\u20ac\\u8000"
+        if sys.maxunicode > 0xffff:
+            sout += "\\U%08x" % sys.maxunicode
+        self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
+
+        sout = "a\xac\\u1234\\u20ac\\u8000"
+        if sys.maxunicode > 0xffff:
+            sout += "\\U%08x" % sys.maxunicode
+        self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
+
+        sout = "a\xac\\u1234\xa4\\u8000"
+        if sys.maxunicode > 0xffff:
+            sout += "\\U%08x" % sys.maxunicode
+        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
+
+    def test_relaxedutf8(self):
+        # This is the test for a decoding callback handler,
+        # that relaxes the UTF-8 minimal encoding restriction.
+        # A null byte that is encoded as "\xc0\x80" will be
+        # decoded as a null byte. All other illegal sequences
+        # will be handled strictly.
+        def relaxedutf8(exc):
+            if not isinstance(exc, UnicodeDecodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
+                return (u"\x00", exc.start+2) # retry after two bytes
+            else:
+                raise exc
+
+        codecs.register_error(
+            "test.relaxedutf8", relaxedutf8)
+
+        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
+        sout = u"a\x00b\x00c\xfc\x00\x00"
+        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
+        sin = "\xc0\x80\xc0\x81"
+        self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
+
+    def test_charmapencode(self):
+        # For charmap encodings the replacement string will be
+        # mapped through the encoding again. This means, that
+        # to be able to use e.g. the "replace" handler, the
+        # charmap has to have a mapping for "?".
+        charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
+        sin = u"abc"
+        sout = "AABBCC"
+        self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
+
+        sin = u"abcA"
+        self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
+
+        charmap[ord("?")] = "XYZ"
+        sin = u"abcDEF"
+        sout = "AABBCCXYZXYZXYZ"
+        self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
+
+        charmap[ord("?")] = u"XYZ"
+        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
+
+        charmap[ord("?")] = u"XYZ"
+        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
+
+    def test_callbacks(self):
+        def handler1(exc):
+            if not isinstance(exc, UnicodeEncodeError) \
+               and not isinstance(exc, UnicodeDecodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
+            return (u"[%s]" % u"".join(l), exc.end)
+
+        codecs.register_error("test.handler1", handler1)
+
+        def handler2(exc):
+            if not isinstance(exc, UnicodeDecodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
+            return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
+
+        codecs.register_error("test.handler2", handler2)
+
+        s = "\x00\x81\x7f\x80\xff"
+
+        self.assertEqual(
+            s.decode("ascii", "test.handler1"),
+            u"\x00[<129>]\x7f[<128>][<255>]"
+        )
+        self.assertEqual(
+            s.decode("ascii", "test.handler2"),
+            u"\x00[<129>][<128>]"
+        )
+
+        self.assertEqual(
+            "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
+            u"\u3042[<92><117><51><120>]xx"
+        )
+
+        self.assertEqual(
+            "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
+            u"\u3042[<92><117><51><120><120>]"
+        )
+
+        self.assertEqual(
+            codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
+            u"z[<98>][<99>]"
+        )
+
+        self.assertEqual(
+            u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
+            u"g[<252><223>]rk"
+        )
+
+        self.assertEqual(
+            u"g\xfc\xdf".encode("ascii", "test.handler1"),
+            u"g[<252><223>]"
+        )
+
+    def test_longstrings(self):
+        # test long strings to check for memory overflow problems
+        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
+        # register the handlers under different names,
+        # to prevent the codec from recognizing the name
+        for err in errors:
+            codecs.register_error("test." + err, codecs.lookup_error(err))
+        l = 1000
+        errors += [ "test." + err for err in errors ]
+        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
+            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
+                for err in errors:
+                   try:
+                       uni.encode(enc, err)
+                   except UnicodeError:
+                       pass
+
+    def check_exceptionobjectargs(self, exctype, args, msg):
+        # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
+        # check with one missing argument
+        self.assertRaises(TypeError, exctype, *args[:-1])
+        # check with one missing argument
+        self.assertRaises(TypeError, exctype, *(args + ["too much"]))
+        # check with one argument of the wrong type
+        wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
+        for i in xrange(len(args)):
+            for wrongarg in wrongargs:
+                if type(wrongarg) is type(args[i]):
+                   continue
+                # build argument array
+                callargs = []
+                for j in xrange(len(args)):
+                    if i==j:
+                        callargs.append(wrongarg)
+                    else:
+                        callargs.append(args[i])
+                self.assertRaises(TypeError, exctype, *callargs)
+        exc = exctype(*args)
+        self.assertEquals(str(exc), msg)
+
+    def test_unicodeencodeerror(self):
+        self.check_exceptionobjectargs(
+            UnicodeEncodeError,
+            ["ascii", u"g\xfcrk", 1, 2, "ouch"],
+            "'ascii' codec can't encode character '\ufc' in position 1: ouch"
+        )
+        self.check_exceptionobjectargs(
+            UnicodeEncodeError,
+            ["ascii", u"g\xfcrk", 1, 4, "ouch"],
+            "'ascii' codec can't encode characters in position 1-3: ouch"
+        )
+        self.check_exceptionobjectargs(
+            UnicodeEncodeError,
+            ["ascii", u"\xfcx", 0, 1, "ouch"],
+            "'ascii' codec can't encode character '\ufc' in position 0: ouch"
+        )
+
+    def test_unicodedecodeerror(self):
+        self.check_exceptionobjectargs(
+            UnicodeDecodeError,
+            ["ascii", "g\xfcrk", 1, 2, "ouch"],
+            "'ascii' codec can't decode byte 0xfc in position 1: ouch"
+        )
+        self.check_exceptionobjectargs(
+            UnicodeDecodeError,
+            ["ascii", "g\xfcrk", 1, 3, "ouch"],
+            "'ascii' codec can't decode bytes in position 1-2: ouch"
+        )
+
+    def test_unicodetranslateerror(self):
+        self.check_exceptionobjectargs(
+            UnicodeTranslateError,
+            [u"g\xfcrk", 1, 2, "ouch"],
+            "can't translate character '\\ufc' in position 1: ouch"
+        )
+        self.check_exceptionobjectargs(
+            UnicodeTranslateError,
+            [u"g\xfcrk", 1, 3, "ouch"],
+            "can't translate characters in position 1-2: ouch"
+        )
+
+    def test_badandgoodstrictexceptions(self):
+        self.assertRaises(
+            TypeError,
+            codecs.strict_errors,
+            42
+        )
+        self.assertRaises(
+            Exception,
+            codecs.strict_errors,
+            Exception("ouch")
+        )
+
+        self.assertRaises(
+            UnicodeEncodeError,
+            codecs.strict_errors,
+            UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
+        )
+
+    def test_badandgoodignoreexceptions(self):
+        self.assertRaises(
+           TypeError,
+           codecs.ignore_errors,
+           42
+        )
+        self.assertRaises(
+           TypeError,
+           codecs.ignore_errors,
+           UnicodeError("ouch")
+        )
+        self.assertEquals(
+            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
+            (u"", 1)
+        )
+        self.assertEquals(
+            codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
+            (u"", 1)
+        )
+        self.assertEquals(
+            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
+            (u"", 1)
+        )
+
+    def test_badandgoodreplaceexceptions(self):
+        self.assertRaises(
+           TypeError,
+           codecs.replace_errors,
+           42
+        )
+        self.assertRaises(
+           TypeError,
+           codecs.replace_errors,
+           UnicodeError("ouch")
+        )
+        self.assertEquals(
+            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
+            (u"?", 1)
+        )
+        self.assertEquals(
+            codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
+            (u"\ufffd", 1)
+        )
+        self.assertEquals(
+            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
+            (u"\ufffd", 1)
+        )
+
+    def test_badandgoodxmlcharrefreplaceexceptions(self):
+        self.assertRaises(
+           TypeError,
+           codecs.xmlcharrefreplace_errors,
+           42
+        )
+        self.assertRaises(
+           TypeError,
+           codecs.xmlcharrefreplace_errors,
+           UnicodeError("ouch")
+        )
+        self.assertEquals(
+            codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
+            (u"&#%d;" % 0x3042, 1)
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.xmlcharrefreplace_errors,
+            UnicodeError("ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.xmlcharrefreplace_errors,
+            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.xmlcharrefreplace_errors,
+            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
+        )
+
+    def test_badandgoodbackslashreplaceexceptions(self):
+        self.assertRaises(
+           TypeError,
+           codecs.backslashreplace_errors,
+           42
+        )
+        self.assertRaises(
+           TypeError,
+           codecs.backslashreplace_errors,
+           UnicodeError("ouch")
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
+            (u"\\u3042", 1)
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
+            (u"\\x00", 1)
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
+            (u"\\xff", 1)
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
+            (u"\\u0100", 1)
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
+            (u"\\uffff", 1)
+        )
+        if sys.maxunicode>0xffff:
+            self.assertEquals(
+                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
+                (u"\\U00010000", 1)
+            )
+            self.assertEquals(
+                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
+                (u"\\U0010ffff", 1)
+            )
+
+        self.assertRaises(
+            TypeError,
+            codecs.backslashreplace_errors,
+            UnicodeError("ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.backslashreplace_errors,
+            UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
+        )
+        self.assertRaises(
+            TypeError,
+            codecs.backslashreplace_errors,
+            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
+        )
+
+    def test_badhandlerresults(self):
+        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
+        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
+
+        for res in results:
+            codecs.register_error("test.badhandler", lambda: res)
+            for enc in encs:
+                self.assertRaises(
+                    TypeError,
+                    u"\u3042".encode,
+                    enc,
+                    "test.badhandler"
+                )
+            for (enc, bytes) in (
+                ("ascii", "\xff"),
+                ("utf-8", "\xff"),
+                ("utf-7", "+x-")
+            ):
+                self.assertRaises(
+                    TypeError,
+                    bytes.decode,
+                    enc,
+                    "test.badhandler"
+                )
+
+    def test_lookup(self):
+        self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
+        self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
+        self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
+        self.assertEquals(
+            codecs.xmlcharrefreplace_errors,
+            codecs.lookup_error("xmlcharrefreplace")
+        )
+        self.assertEquals(
+            codecs.backslashreplace_errors,
+            codecs.lookup_error("backslashreplace")
+        )
+
+def test_main():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(CodecCallbackTest))
+    test.test_support.run_suite(suite)
+
+if __name__ == "__main__":
+    test_main()
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -57,6 +57,9 @@ Type/class unification and new-style classes

 Core and builtins

+- Codec error handling callbacks (PEP 293) are implemented.
+  Error handling in unicode.encode or str.decode can now be customized.
+
 - A subtle change to the semantics of the built-in function intern():
  interned strings are no longer immortal.  You must keep a reference
  to the return value intern() around to get the benefit.

--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -706,6 +706,32 @@ mbcs_encode(PyObject *self,
 #endif /* MS_WINDOWS */
 #endif /* Py_USING_UNICODE */

+/* --- Error handler registry --------------------------------------------- */
+
+static PyObject *register_error(PyObject *self, PyObject *args)
+{
+    const char *name;
+    PyObject *handler;
+
+    if (!PyArg_ParseTuple(args, "sO:register_error",
+			  &name, &handler))
+	return NULL;
+    if (PyCodec_RegisterError(name, handler))
+        return NULL;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyObject *lookup_error(PyObject *self, PyObject *args)
+{
+    const char *name;
+
+    if (!PyArg_ParseTuple(args, "s:lookup_error",
+			  &name))
+	return NULL;
+    return PyCodec_LookupError(name);
+}
+
 /* --- Module API --------------------------------------------------------- */

 static PyMethodDef _codecs_functions[] = {
@@ -744,6 +770,8 @@ static PyMethodDef _codecs_functions[] = {
    {"mbcs_decode", 		mbcs_decode,			METH_VARARGS},
 #endif
 #endif /* Py_USING_UNICODE */
+    {"register_error", 		register_error,			METH_VARARGS},
+    {"lookup_error", 		lookup_error,			METH_VARARGS},
    {NULL, NULL}		/* sentinel */
 };


--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -2468,7 +2468,9 @@ PyDoc_STRVAR(encode__doc__,
 Encodes S using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.");
+a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
+'xmlcharrefreplace' as well as any other name registered with\n\
+codecs.register_error that is able to handle UnicodeEncodeErrors.");

 static PyObject *
 string_encode(PyStringObject *self, PyObject *args)
@@ -2487,7 +2489,9 @@ PyDoc_STRVAR(decode__doc__,
 Decodes S using the codec registered for encoding. encoding defaults\n\
 to the default encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.");
+a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
+as well as any other name registerd with codecs.register_error that is\n\
+able to handle UnicodeDecodeErrors.");

 static PyObject *
 string_decode(PyStringObject *self, PyObject *args)

--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -528,8 +528,8 @@ PyObject *PyUnicode_Decode(const char *s,
 			   const char *errors)
 {
    PyObject *buffer = NULL, *unicode;
-    
-    if (encoding == NULL) 
+
+    if (encoding == NULL)
 	encoding = PyUnicode_GetDefaultEncoding();

    /* Shortcuts for common default encodings */
@@ -680,6 +680,92 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
    return -1;
 }

+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   if no exception occured, copy the replacement to the output
+   and adjust various state variables.
+   return 0 on success, -1 on error
+*/
+
+static
+int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
+                 const char *encoding, const char *reason,
+                 const char *input, int insize, int *startinpos, int *endinpos, PyObject **exceptionObject, const char **inptr,
+                 PyObject **output, int *outpos, Py_UNICODE **outptr)
+{
+    static char *argparse = "O!i;decoding error handler must return (unicode, int) tuple";
+
+    PyObject *restuple = NULL;
+    PyObject *repunicode = NULL;
+    int outsize = PyUnicode_GET_SIZE(*output);
+    int requiredsize;
+    int newpos;
+    Py_UNICODE *repptr;
+    int repsize;
+    int res = -1;
+
+    if (*errorHandler == NULL) {
+	*errorHandler = PyCodec_LookupError(errors);
+	if (*errorHandler == NULL)
+	   goto onError;
+    }
+
+    if (*exceptionObject == NULL) {
+    	*exceptionObject = PyUnicodeDecodeError_Create(
+	    encoding, input, insize, *startinpos, *endinpos, reason);
+	if (*exceptionObject == NULL)
+	   goto onError;
+    }
+    else {
+	if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos))
+	    goto onError;
+	if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos))
+	    goto onError;
+	if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
+	    goto onError;
+    }
+
+    restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+	goto onError;
+    if (!PyTuple_Check(restuple)) {
+	PyErr_Format(PyExc_TypeError, &argparse[4]);
+	goto onError;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
+	goto onError;
+    if (newpos<0)
+	newpos = 0;
+    else if (newpos>insize)
+	newpos = insize;
+
+    /* need more space? (at least enough for what we
+       have+the replacement+the rest of the string (starting
+       at the new input position), so we won't have to check space
+       when there are no errors in the rest of the string) */
+    repptr = PyUnicode_AS_UNICODE(repunicode);
+    repsize = PyUnicode_GET_SIZE(repunicode);
+    requiredsize = *outpos + repsize + insize-newpos;
+    if (requiredsize > outsize) {
+	if (requiredsize<2*outsize)
+	    requiredsize = 2*outsize;
+	if (PyUnicode_Resize(output, requiredsize))
+	    goto onError;
+	*outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
+    }
+    *endinpos = newpos;
+    *inptr = input + newpos;
+    Py_UNICODE_COPY(*outptr, repptr, repsize);
+    *outptr += repsize;
+    *outpos += repsize;
+    /* we made it! */
+    res = 0;
+
+    onError:
+    Py_XDECREF(restuple);
+    return res;
+}
+
 /* --- UTF-7 Codec -------------------------------------------------------- */

 /* see RFC2152 for details */
@@ -738,40 +824,14 @@ char utf7_special[128] = {
 		} \
    } \

-static
-int utf7_decoding_error(Py_UNICODE **dest,
-                        const char *errors,
-                        const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-7 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        if (dest != NULL) {
-            **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-            (*dest)++;
-        }
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-7 decoding error; unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeUTF7(const char *s,
 			       int size,
 			       const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
@@ -779,7 +839,9 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
    int inShift = 0;
    unsigned int bitsleft = 0;
    unsigned long charsleft = 0;
-	int surrogate = 0;
+    int surrogate = 0;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;

    unicode = _PyUnicode_New(size);
    if (!unicode)
@@ -791,7 +853,9 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
    e = s + size;

    while (s < e) {
-        Py_UNICODE ch = *s;
+        Py_UNICODE ch;
+        restart:
+        ch = *s;

        if (inShift) {
            if ((ch == '-') || !B64CHAR(ch)) {
@@ -836,6 +900,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
            }
        }
        else if ( ch == '+' ) {
+            startinpos = s-starts;
            s++;
            if (s < e && *s == '-') {
                s++;
@@ -857,21 +922,39 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
        }
        continue;
    utf7Error:
-      if (utf7_decoding_error(&p, errors, errmsg))
-          goto onError;
+        outpos = p-PyUnicode_AS_UNICODE(unicode);
+        endinpos = s-starts;
+        if (unicode_decode_call_errorhandler(
+             errors, &errorHandler,
+             "utf7", errmsg,
+             starts, size, &startinpos, &endinpos, &exc, &s,
+             (PyObject **)&unicode, &outpos, &p))
+        goto onError;
    }

    if (inShift) {
-        if (utf7_decoding_error(&p, errors, "unterminated shift sequence"))
+        outpos = p-PyUnicode_AS_UNICODE(unicode);
+        endinpos = size;
+        if (unicode_decode_call_errorhandler(
+             errors, &errorHandler,
+             "utf7", "unterminated shift sequence",
+             starts, size, &startinpos, &endinpos, &exc, &s,
+             (PyObject **)&unicode, &outpos, &p))
            goto onError;
+        if (s < e)
+           goto restart;
    }

-    if (_PyUnicode_Resize(&unicode, p - unicode->str))
+    if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)))
        goto onError;

+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)unicode;

 onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    Py_DECREF(unicode);
    return NULL;
 }
@@ -1001,46 +1084,21 @@ char utf8_code_length[256] = {
    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
 };

-static
-int utf8_decoding_error(const char **source,
-                        Py_UNICODE **dest,
-                        const char *errors,
-                        const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-8 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        (*source)++;
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        (*source)++;
-        **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-        (*dest)++;
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-8 decoding error; unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeUTF8(const char *s,
 			       int size,
 			       const char *errors)
 {
+    const char *starts = s;
    int n;
+    int startinpos;
+    int endinpos;
+    int outpos;
    const char *e;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const char *errmsg = "";
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;

    /* Note: size will always be longer than the resulting Unicode
       character count */
@@ -1067,6 +1125,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,

        if (s + n > e) {
 	    errmsg = "unexpected end of data";
+	    startinpos = s-starts;
+	    endinpos = size;
 	    goto utf8Error;
 	}

@@ -1074,19 +1134,27 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,

        case 0:
            errmsg = "unexpected code byte";
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
 	    goto utf8Error;

        case 1:
            errmsg = "internal error";
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
 	    goto utf8Error;

        case 2:
            if ((s[1] & 0xc0) != 0x80) {
                errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+2;
 		goto utf8Error;
 	    }
            ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
            if (ch < 0x80) {
+		startinpos = s-starts;
+		endinpos = startinpos+2;
                errmsg = "illegal encoding";
 		goto utf8Error;
 	    }
@@ -1098,6 +1166,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
            if ((s[1] & 0xc0) != 0x80 || 
                (s[2] & 0xc0) != 0x80) {
                errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+3;
 		goto utf8Error;
 	    }
            ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
@@ -1110,6 +1180,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
 		       unit.
 		*/
                errmsg = "illegal encoding";
+		startinpos = s-starts;
+		endinpos = startinpos+3;
 		goto utf8Error;
 	    }
 	    else
@@ -1121,6 +1193,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
                (s[2] & 0xc0) != 0x80 ||
                (s[3] & 0xc0) != 0x80) {
                errmsg = "invalid data";
+		startinpos = s-starts;
+		endinpos = startinpos+4;
 		goto utf8Error;
 	    }
            ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
@@ -1132,6 +1206,8 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
 					 UTF-16 */
 	    {
                errmsg = "illegal encoding";
+		startinpos = s-starts;
+		endinpos = startinpos+4;
 		goto utf8Error;
 	    }
 #ifdef Py_UNICODE_WIDE
@@ -1153,23 +1229,34 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
        default:
            /* Other sizes are only needed for UCS-4 */
            errmsg = "unsupported Unicode code range";
+	    startinpos = s-starts;
+	    endinpos = startinpos+n;
 	    goto utf8Error;
        }
        s += n;
 	continue;
 	
    utf8Error:
-      if (utf8_decoding_error(&s, &p, errors, errmsg))
-          goto onError;
+    outpos = p-PyUnicode_AS_UNICODE(unicode);
+    if (unicode_decode_call_errorhandler(
+	     errors, &errorHandler,
+	     "utf8", errmsg,
+	     starts, size, &startinpos, &endinpos, &exc, &s,
+	     (PyObject **)&unicode, &outpos, &p))
+	goto onError;
    }

    /* Adjust length */
    if (_PyUnicode_Resize(&unicode, p - unicode->str))
        goto onError;

+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)unicode;

 onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    Py_DECREF(unicode);
    return NULL;
 }
@@ -1287,43 +1374,16 @@ PyObject *PyUnicode_AsUTF8String(PyObject *unicode)

 /* --- UTF-16 Codec ------------------------------------------------------- */

-static
-int utf16_decoding_error(Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "UTF-16 decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	if (dest) {
-	    **dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	    (*dest)++;
-	}
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "UTF-16 decoding error; "
-		     "unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 PyObject *
 PyUnicode_DecodeUTF16(const char *s,
 		      int size,
 		      const char *errors,
 		      int *byteorder)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
    PyUnicodeObject *unicode;
    Py_UNICODE *p;
    const unsigned char *q, *e;
@@ -1335,13 +1395,8 @@ PyUnicode_DecodeUTF16(const char *s,
 #else
    int ihi = 0, ilo = 1;
 #endif
-
-    /* size should be an even number */
-    if (size & 1) {
-        if (utf16_decoding_error(NULL, errors, "truncated data"))
-            return NULL;
-        --size;  /* else ignore the oddball byte */
-    }
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;

    /* Note: size will always be longer than the resulting Unicode
       character count */
@@ -1398,7 +1453,18 @@ PyUnicode_DecodeUTF16(const char *s,
    }

    while (q < e) {
-	Py_UNICODE ch = (q[ihi] << 8) | q[ilo];
+	Py_UNICODE ch;
+	/* remaing bytes at the end? (size should be even) */
+	if (e-q<2) {
+	    errmsg = "truncated data";
+	    startinpos = ((const char *)q)-starts;
+	    endinpos = ((const char *)e)-starts;
+	    goto utf16Error;
+	    /* The remaining input chars are ignored if the callback
+	       chooses to skip the input */
+	}
+	ch = (q[ihi] << 8) | q[ilo];
+
 	q += 2;

 	if (ch < 0xD800 || ch > 0xDFFF) {
@@ -1409,6 +1475,8 @@ PyUnicode_DecodeUTF16(const char *s,
 	/* UTF-16 code pair: */
 	if (q >= e) {
 	    errmsg = "unexpected end of data";
+	    startinpos = (((const char *)q)-2)-starts;
+	    endinpos = ((const char *)e)-starts;
 	    goto utf16Error;
 	}
 	if (0xD800 <= ch && ch <= 0xDBFF) {
@@ -1425,15 +1493,24 @@ PyUnicode_DecodeUTF16(const char *s,
 	    }
 	    else {
                errmsg = "illegal UTF-16 surrogate";
+		startinpos = (((const char *)q)-4)-starts;
+		endinpos = startinpos+2;
 		goto utf16Error;
 	    }

 	}
 	errmsg = "illegal encoding";
+	startinpos = (((const char *)q)-2)-starts;
+	endinpos = startinpos+2;
 	/* Fall through to report the error */

    utf16Error:
-	if (utf16_decoding_error(&p, errors, errmsg))
+	outpos = p-PyUnicode_AS_UNICODE(unicode);
+	if (unicode_decode_call_errorhandler(
+	         errors, &errorHandler,
+	         "utf16", errmsg,
+	         starts, size, &startinpos, &endinpos, &exc, (const char **)&q,
+	         (PyObject **)&unicode, &outpos, &p))
 	    goto onError;
    }

@@ -1444,10 +1521,14 @@ PyUnicode_DecodeUTF16(const char *s,
    if (_PyUnicode_Resize(&unicode, p - unicode->str))
        goto onError;

+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)unicode;

 onError:
    Py_DECREF(unicode);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;
 }

@@ -1528,63 +1609,43 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode)

 /* --- Unicode Escape Codec ----------------------------------------------- */

-static
-int unicodeescape_decoding_error(Py_UNICODE **x,
-                                 const char *errors,
-                                 const char *details) 
-{
-    if ((errors == NULL) ||
-        (strcmp(errors,"strict") == 0)) {
-        PyErr_Format(PyExc_UnicodeError,
-                     "Unicode-Escape decoding error: %.400s",
-                     details);
-        return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-        return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-        **x = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*x)++;
-        return 0;
-    }
-    else {
-        PyErr_Format(PyExc_ValueError,
-                     "Unicode-Escape decoding error; "
-                     "unknown error handling code: %.400s",
-                     errors);
-        return -1;
-    }
-}
-
 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;

 PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
 					int size,
 					const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    int i;
    PyUnicodeObject *v;
-    Py_UNICODE *p, *buf;
+    Py_UNICODE *p;
    const char *end;
    char* message;
    Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;

    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
-       length after conversion to the true value. */
+       length after conversion to the true value.
+       (but if the error callback returns a long replacement string
+       we'll have to allocate more space) */
    v = _PyUnicode_New(size);
    if (v == NULL)
        goto onError;
    if (size == 0)
        return (PyObject *)v;

-    p = buf = PyUnicode_AS_UNICODE(v);
+    p = PyUnicode_AS_UNICODE(v);
    end = s + size;

    while (s < end) {
        unsigned char c;
        Py_UNICODE x;
-        int i, digits;
+        int digits;

        /* Non-escape characters are interpreted as Unicode ordinals */
        if (*s != '\\') {
@@ -1592,6 +1653,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
            continue;
        }

+        startinpos = s-starts;
        /* \ - Escapes */
        s++;
        switch (*s++) {
@@ -1640,14 +1702,28 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
            message = "truncated \\UXXXXXXXX escape";
        hexescape:
            chr = 0;
-            for (i = 0; i < digits; i++) {
+            outpos = p-PyUnicode_AS_UNICODE(v);
+            if (s+digits>end) {
+                endinpos = size;
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", "end of string in escape sequence",
+                    starts, size, &startinpos, &endinpos, &exc, &s,
+                    (PyObject **)&v, &outpos, &p))
+                    goto onError;
+                goto nextByte;
+            }
+            for (i = 0; i < digits; ++i) {
                c = (unsigned char) s[i];
                if (!isxdigit(c)) {
-                    if (unicodeescape_decoding_error(&p, errors, message))
+                    endinpos = (s+i+1)-starts;
+                    if (unicode_decode_call_errorhandler(
+                        errors, &errorHandler,
+                        "unicodeescape", message,
+                        starts, size, &startinpos, &endinpos, &exc, &s,
+                        (PyObject **)&v, &outpos, &p))
                        goto onError;
-                    chr = 0xffffffff;
-                    i++;
-                    break;
+                    goto nextByte;
                }
                chr = (chr<<4) & ~0xF;
                if (c >= '0' && c <= '9')
@@ -1659,9 +1735,9 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
            }
            s += i;
            if (chr == 0xffffffff)
-                    /* _decoding_error will have already written into the
-                       target buffer. */
-                    break;
+                /* _decoding_error will have already written into the
+                   target buffer. */
+                break;
        store:
            /* when we get here, chr is a 32-bit unicode character */
            if (chr <= 0xffff)
@@ -1678,10 +1754,13 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
 #endif
            } else {
-                if (unicodeescape_decoding_error(
-                    &p, errors,
-                    "illegal Unicode character")
-                    )
+                endinpos = s-starts;
+                outpos = p-PyUnicode_AS_UNICODE(v);
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", "illegal Unicode character",
+                    starts, size, &startinpos, &endinpos, &exc, &s,
+                    (PyObject **)&v, &outpos, &p))
                    goto onError;
            }
            break;
@@ -1717,13 +1796,27 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                        goto store;
                }
            }
-            if (unicodeescape_decoding_error(&p, errors, message))
+            endinpos = s-starts;
+            outpos = p-PyUnicode_AS_UNICODE(v);
+            if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "unicodeescape", message,
+                starts, size, &startinpos, &endinpos, &exc, &s,
+                (PyObject **)&v, &outpos, &p))
                goto onError;
            break;

        default:
            if (s > end) {
-                if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
+                message = "\\ at end of string";
+                s--;
+                endinpos = s-starts;
+                outpos = p-PyUnicode_AS_UNICODE(v);
+                if (unicode_decode_call_errorhandler(
+                    errors, &errorHandler,
+                    "unicodeescape", message,
+                    starts, size, &startinpos, &endinpos, &exc, &s,
+                    (PyObject **)&v, &outpos, &p))
                    goto onError;
            }
            else {
@@ -1732,9 +1825,11 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
            }
            break;
        }
+        nextByte:
+        ;
    }
-    if (_PyUnicode_Resize(&v, (int)(p - buf)))
-                goto onError;
+    if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
+        goto onError;
    return (PyObject *)v;

 ucnhashError:
@@ -1742,10 +1837,14 @@ ucnhashError:
        PyExc_UnicodeError,
        "\\N escapes not supported (can't load unicodedata module)"
        );
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;

 onError:
    Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;
 }

@@ -1909,20 +2008,27 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 					   int size,
 					   const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
    PyUnicodeObject *v;
-    Py_UNICODE *p, *buf;
+    Py_UNICODE *p;
    const char *end;
    const char *bs;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
    
    /* Escaped strings will always be longer than the resulting
       Unicode string, so we start with size here and then reduce the
-       length after conversion to the true value. */
+       length after conversion to the true value. (But decoding error
+       handler might have to resize the string) */
    v = _PyUnicode_New(size);
    if (v == NULL)
 	goto onError;
    if (size == 0)
 	return (PyObject *)v;
-    p = buf = PyUnicode_AS_UNICODE(v);
+    p = PyUnicode_AS_UNICODE(v);
    end = s + size;
    while (s < end) {
 	unsigned char c;
@@ -1934,6 +2040,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 	    *p++ = (unsigned char)*s++;
 	    continue;
 	}
+	startinpos = s-starts;

 	/* \u-escapes are only interpreted iff the number of leading
 	   backslashes if odd */
@@ -1952,15 +2059,18 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 	s++;

 	/* \uXXXX with 4 hex digits */
-	for (x = 0, i = 0; i < 4; i++) {
-	    c = (unsigned char)s[i];
+	outpos = p-PyUnicode_AS_UNICODE(v);
+	for (x = 0, i = 0; i < 4; ++i, ++s) {
+	    c = (unsigned char)*s;
 	    if (!isxdigit(c)) {
-		if (unicodeescape_decoding_error(&p, errors,
-						 "truncated \\uXXXX"))
+		endinpos = s-starts;
+		if (unicode_decode_call_errorhandler(
+		    errors, &errorHandler,
+		    "rawunicodeescape", "truncated \\uXXXX",
+		    starts, size, &startinpos, &endinpos, &exc, &s,
+		    (PyObject **)&v, &outpos, &p))
 		    goto onError;
-		x = 0xffffffff;
-		i++;
-		break;
+		goto nextByte;
 	    }
 	    x = (x<<4) & ~0xF;
 	    if (c >= '0' && c <= '9')
@@ -1970,16 +2080,20 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
 	    else
 		x += 10 + c - 'A';
 	}
-	s += i;
-	if (x != 0xffffffff)
-		*p++ = x;
+	*p++ = x;
+	nextByte:
+	;
    }
-    if (_PyUnicode_Resize(&v, (int)(p - buf)))
+    if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)v;
    
 onError:
    Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;
 }

@@ -2059,71 +2173,271 @@ PyObject *PyUnicode_DecodeLatin1(const char *s,
    return NULL;
 }

-static
-int latin1_encoding_error(const Py_UNICODE **source,
-			  char **dest,
-			  const char *errors,
-			  const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "Latin-1 encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
+/* create or adjust a UnicodeEncodeError */
+static void make_encode_exception(PyObject **exceptionObject,
+    const char *encoding,
+    const Py_UNICODE *unicode, int size,
+    int startpos, int endpos,
+    const char *reason)
+{
+    if (*exceptionObject == NULL) {
+	*exceptionObject = PyUnicodeEncodeError_Create(
+	    encoding, unicode, size, startpos, endpos, reason);
    }
    else {
-	PyErr_Format(PyExc_ValueError,
-		     "Latin-1 encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
+	if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
+	    goto onError;
+	if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
+	    goto onError;
+	if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
+	    goto onError;
+	return;
+	onError:
+	Py_DECREF(*exceptionObject);
+	*exceptionObject = NULL;
    }
 }

-PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
-				 int size,
-				 const char *errors)
+/* raises a UnicodeEncodeError */
+static void raise_encode_exception(PyObject **exceptionObject,
+    const char *encoding,
+    const Py_UNICODE *unicode, int size,
+    int startpos, int endpos,
+    const char *reason)
 {
-    PyObject *repr;
-    char *s, *start;
+    make_encode_exception(exceptionObject,
+	encoding, unicode, size, startpos, endpos, reason);
+    if (*exceptionObject != NULL)
+	PyCodec_StrictErrors(*exceptionObject);
+}

-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-	return repr;
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   put the result into newpos and return the replacement string, which
+   has to be freed by the caller */
+static PyObject *unicode_encode_call_errorhandler(const char *errors,
+    PyObject **errorHandler,
+    const char *encoding, const char *reason,
+    const Py_UNICODE *unicode, int size, PyObject **exceptionObject,
+    int startpos, int endpos,
+    int *newpos)
+{
+    static char *argparse = "O!i;encoding error handler must return (unicode, int) tuple";

-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 256) {
-	    if (latin1_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(256)"))
-		goto onError;
+    PyObject *restuple;
+    PyObject *resunicode;
+
+    if (*errorHandler == NULL) {
+	*errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
+	    return NULL;
+    }
+
+    make_encode_exception(exceptionObject,
+	encoding, unicode, size, startpos, endpos, reason);
+    if (*exceptionObject == NULL)
+	return NULL;
+
+    restuple = PyObject_CallFunctionObjArgs(
+	*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+	return NULL;
+    if (!PyTuple_Check(restuple)) {
+	PyErr_Format(PyExc_TypeError, &argparse[4]);
+	Py_DECREF(restuple);
+	return NULL;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
+	&resunicode, newpos)) {
+	Py_DECREF(restuple);
+	return NULL;
+    }
+    if (*newpos<0)
+	*newpos = 0;
+    else if (*newpos>size)
+	*newpos = size;
+    Py_INCREF(resunicode);
+    Py_DECREF(restuple);
+    return resunicode;
+}
+
+static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
+				 int size,
+				 const char *errors,
+				 int limit)
+{
+    /* output object */
+    PyObject *res;
+    /* pointers to the beginning and end+1 of input */
+    const Py_UNICODE *startp = p;
+    const Py_UNICODE *endp = p + size;
+    /* pointer to the beginning of the unencodable characters */
+    /* const Py_UNICODE *badp = NULL; */
+    /* pointer into the output */
+    char *str;
+    /* current output position */
+    int respos = 0;
+    int ressize;
+    char *encoding = (limit == 256) ? "latin-1" : "ascii";
+    char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
+
+    /* allocate enough for a simple encoding without
+       replacements, if we need more, we'll resize */
+    res = PyString_FromStringAndSize(NULL, size);
+    if (res == NULL)
+        goto onError;
+    if (size == 0)
+	return res;
+    str = PyString_AS_STRING(res);
+    ressize = size;
+
+    while (p<endp) {
+	Py_UNICODE c = *p;
+
+	/* can we encode this? */
+	if (c<limit) {
+	    /* no overflow check, because we know that the space is enough */
+	    *str++ = (char)c;
+	    ++p;
+	}
+	else {
+	    int unicodepos = p-startp;
+	    int requiredsize;
+	    PyObject *repunicode;
+	    int repsize;
+	    int newpos;
+	    int respos;
+	    Py_UNICODE *uni2;
+	    /* startpos for collecting unencodable chars */
+	    const Py_UNICODE *collstart = p;
+	    const Py_UNICODE *collend = p;
+	    /* find all unecodable characters */
+	    while ((collend < endp) && ((*collend)>=limit))
+		++collend;
+	    /* cache callback name lookup (if not done yet, i.e. it's the first error) */
+	    if (known_errorHandler==-1) {
+		if ((errors==NULL) || (!strcmp(errors, "strict")))
+		    known_errorHandler = 1;
+		else if (!strcmp(errors, "replace"))
+		    known_errorHandler = 2;
+		else if (!strcmp(errors, "ignore"))
+		    known_errorHandler = 3;
+		else if (!strcmp(errors, "xmlcharrefreplace"))
+		    known_errorHandler = 4;
+		else
+		    known_errorHandler = 0;
+	    }
+	    switch (known_errorHandler) {
+		case 1: /* strict */
+		    raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
+		    goto onError;
+		case 2: /* replace */
+		    while (collstart++<collend)
+			*str++ = '?'; /* fall through */
+		case 3: /* ignore */
+		    p = collend;
+		    break;
+		case 4: /* xmlcharrefreplace */
+		    respos = str-PyString_AS_STRING(res);
+		    /* determine replacement size (temporarily (mis)uses p) */
+		    for (p = collstart, repsize = 0; p < collend; ++p) {
+			if (*p<10)
+			    repsize += 2+1+1;
+			else if (*p<100)
+			    repsize += 2+2+1;
+			else if (*p<1000)
+			    repsize += 2+3+1;
+			else if (*p<10000)
+			    repsize += 2+4+1;
+			else if (*p<100000)
+			    repsize += 2+5+1;
+			else if (*p<1000000)
+			    repsize += 2+6+1;
+			else
+			    repsize += 2+7+1;
+		    }
+		    requiredsize = respos+repsize+(endp-collend);
+		    if (requiredsize > ressize) {
+			if (requiredsize<2*ressize)
+			    requiredsize = 2*ressize;
+			if (_PyString_Resize(&res, requiredsize))
+			    goto onError;
+			str = PyString_AS_STRING(res) + respos;
+			ressize = requiredsize;
+		    }
+		    /* generate replacement (temporarily (mis)uses p) */
+		    for (p = collstart; p < collend; ++p) {
+			str += sprintf(str, "&#%d;", (int)*p);
+		    }
+		    p = collend;
+		    break;
+		default:
+		    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+			encoding, reason, startp, size, &exc,
+			collstart-startp, collend-startp, &newpos);
+		    if (repunicode == NULL)
+			goto onError;
+		    /* need more space? (at least enough for what we
+		       have+the replacement+the rest of the string, so
+		       we won't have to check space for encodable characters) */
+		    respos = str-PyString_AS_STRING(res);
+		    repsize = PyUnicode_GET_SIZE(repunicode);
+		    requiredsize = respos+repsize+(endp-collend);
+		    if (requiredsize > ressize) {
+			if (requiredsize<2*ressize)
+			    requiredsize = 2*ressize;
+			if (_PyString_Resize(&res, requiredsize)) {
+			    Py_DECREF(repunicode);
+			    goto onError;
+			}
+			str = PyString_AS_STRING(res) + respos;
+			ressize = requiredsize;
+		    }
+		    /* check if there is anything unencodable in the replacement
+		       and copy it to the output */
+		    for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
+			c = *uni2;
+			if (c >= limit) {
+			    raise_encode_exception(&exc, encoding, startp, size,
+				unicodepos, unicodepos+1, reason);
+			    Py_DECREF(repunicode);
+			    goto onError;
+			}
+			*str = (char)c;
+		    }
+		    p = startp + newpos;
+		    Py_DECREF(repunicode);
+	    }
 	}
-	else
-            *s++ = (char)ch;
    }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	_PyString_Resize(&repr, s - start);
-    return repr;
+    /* Resize if we allocated to much */
+    respos = str-PyString_AS_STRING(res);
+    if (respos<ressize)
+       /* If this falls res will be NULL */
+	_PyString_Resize(&res, respos);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    return res;

- onError:
-    Py_DECREF(repr);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;
 }

+PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
+				 int size,
+				 const char *errors)
+{
+    return unicode_encode_ucs1(p, size, errors, 256);
+}
+
 PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
 {
    if (!PyUnicode_Check(unicode)) {
@@ -2137,42 +2451,19 @@ PyObject *PyUnicode_AsLatin1String(PyObject *unicode)

 /* --- 7-bit ASCII Codec -------------------------------------------------- */

-static
-int ascii_decoding_error(const char **source,
-			 Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "ASCII decoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "ASCII decoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeASCII(const char *s,
 				int size,
 				const char *errors)
 {
+    const char *starts = s;
    PyUnicodeObject *v;
    Py_UNICODE *p;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    const char *e;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
    
    /* ASCII is equivalent to the first 128 ordinals in Unicode. */
    if (size == 1 && *(unsigned char*)s < 128) {
@@ -2186,89 +2477,44 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
    if (size == 0)
 	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	register unsigned char c;
-
-	c = (unsigned char)*s++;
-	if (c < 128)
+    e = s + size;
+    while (s < e) {
+	register unsigned char c = (unsigned char)*s;
+	if (c < 128) {
 	    *p++ = c;
-	else if (ascii_decoding_error(&s, &p, errors, 
-				      "ordinal not in range(128)"))
+	    ++s;
+	}
+	else {
+	    startinpos = s-starts;
+	    endinpos = startinpos + 1;
+	    outpos = p-PyUnicode_AS_UNICODE(v);
+	    if (unicode_decode_call_errorhandler(
+		 errors, &errorHandler,
+		 "ascii", "ordinal not in range(128)",
+		 starts, size, &startinpos, &endinpos, &exc, &s,
+		 (PyObject **)&v, &outpos, &p))
 		goto onError;
+	}
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
 	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)v;
    
 onError:
    Py_XDECREF(v);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return NULL;
 }

-static
-int ascii_encoding_error(const Py_UNICODE **source,
-			 char **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "ASCII encoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "ASCII encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
 				int size,
 				const char *errors)
 {
-    PyObject *repr;
-    char *s, *start;
-
-    repr = PyString_FromStringAndSize(NULL, size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-	return repr;
-
-    s = PyString_AS_STRING(repr);
-    start = s;
-    while (size-- > 0) {
-        Py_UNICODE ch = *p++;
-	if (ch >= 128) {
-	    if (ascii_encoding_error(&p, &s, errors, 
-				      "ordinal not in range(128)"))
-		goto onError;
-	}
-	else
-            *s++ = (char)ch;
-    }
-    /* Resize if error handling skipped some characters */
-    if (s - start < PyString_GET_SIZE(repr))
-	_PyString_Resize(&repr, s - start);
-    return repr;
-
- onError:
-    Py_DECREF(repr);
-    return NULL;
+    return unicode_encode_ucs1(p, size, errors, 128);
 }

 PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
@@ -2348,44 +2594,21 @@ PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,

 /* --- Character Mapping Codec -------------------------------------------- */

-static
-int charmap_decoding_error(const char **source,
-			 Py_UNICODE **dest,
-			 const char *errors,
-			 const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "charmap decoding error: %.400s",
-		     details);
-	return -1;
-    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
-    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = Py_UNICODE_REPLACEMENT_CHARACTER;
-	(*dest)++;
-	return 0;
-    }
-    else {
-	PyErr_Format(PyExc_ValueError,
-		     "charmap decoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
-    }
-}
-
 PyObject *PyUnicode_DecodeCharmap(const char *s,
 				  int size,
 				  PyObject *mapping,
 				  const char *errors)
 {
+    const char *starts = s;
+    int startinpos;
+    int endinpos;
+    int outpos;
+    const char *e;
    PyUnicodeObject *v;
    Py_UNICODE *p;
    int extrachars = 0;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
    
    /* Default to Latin-1 */
    if (mapping == NULL)
@@ -2397,8 +2620,9 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
    if (size == 0)
 	return (PyObject *)v;
    p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	unsigned char ch = *s++;
+    e = s + size;
+    while (s < e) {
+	unsigned char ch = *s;
 	PyObject *w, *x;

 	/* Get mapping (char ordinal -> integer, Unicode char or None) */
@@ -2430,11 +2654,18 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 	}
 	else if (x == Py_None) {
 	    /* undefined mapping */
-	    if (charmap_decoding_error(&s, &p, errors, 
-				       "character maps to <undefined>")) {
+	    outpos = p-PyUnicode_AS_UNICODE(v);
+	    startinpos = s-starts;
+	    endinpos = startinpos+1;
+	    if (unicode_decode_call_errorhandler(
+		 errors, &errorHandler,
+		 "charmap", "character maps to <undefined>",
+		 starts, size, &startinpos, &endinpos, &exc, &s,
+		 (PyObject **)&v, &outpos, &p)) {
 		Py_DECREF(x);
 		goto onError;
 	    }
+	    continue;
 	}
 	else if (PyUnicode_Check(x)) {
 	    int targetsize = PyUnicode_GET_SIZE(x);
@@ -2474,45 +2705,233 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
 	    goto onError;
 	}
 	Py_DECREF(x);
+	++s;
    }
    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
 	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
 	    goto onError;
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    return (PyObject *)v;
    
 onError:
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
    Py_XDECREF(v);
    return NULL;
 }

-static
-int charmap_encoding_error(const Py_UNICODE **source,
-			   char **dest,
-			   const char *errors,
-			   const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "charmap encoding error: %.400s",
-		     details);
-	return -1;
+/* Lookup the character ch in the mapping. If the character
+   can't be found, Py_None is returned (or NULL, if another
+   error occured). */
+static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
+{
+    PyObject *w = PyInt_FromLong((long)c);
+    PyObject *x;
+
+    if (w == NULL)
+	 return NULL;
+    x = PyObject_GetItem(mapping, w);
+    Py_DECREF(w);
+    if (x == NULL) {
+	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+	    /* No mapping found means: mapping is undefined. */
+	    PyErr_Clear();
+	    x = Py_None;
+	    Py_INCREF(x);
+	    return x;
+	} else
+	    return NULL;
    }
-    else if (strcmp(errors,"ignore") == 0) {
-	return 0;
+    else if (PyInt_Check(x)) {
+	long value = PyInt_AS_LONG(x);
+	if (value < 0 || value > 255) {
+	    PyErr_SetString(PyExc_TypeError,
+			     "character mapping must be in range(256)");
+	    Py_DECREF(x);
+	    return NULL;
+	}
+	return x;
    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
-	return 0;
+    else if (PyString_Check(x))
+	return x;
+    else {
+	/* wrong return value */
+	PyErr_SetString(PyExc_TypeError,
+	      "character mapping must return integer, None or str");
+	Py_DECREF(x);
+	return NULL;
    }
+}
+
+/* lookup the character, put the result in the output string and adjust
+   various state variables. Reallocate the output string if not enough
+   space is available. Return a new reference to the object that
+   was put in the output buffer, or Py_None, if the mapping was undefined
+   (in which case no character was written) or NULL, if a
+   reallocation error ocurred. The called must decref the result */
+static
+PyObject *charmapencode_output(Py_UNICODE c, PyObject *mapping,
+    PyObject **outobj, int *outpos)
+{
+    PyObject *rep = charmapencode_lookup(c, mapping);
+
+    if (rep==NULL)
+	return NULL;
+    else if (rep==Py_None)
+	return rep;
    else {
-	PyErr_Format(PyExc_ValueError,
-		     "charmap encoding error; "
-		     "unknown error handling code: %.400s",
-		     errors);
-	return -1;
+	char *outstart = PyString_AS_STRING(*outobj);
+	int outsize = PyString_GET_SIZE(*outobj);
+	if (PyInt_Check(rep)) {
+	    int requiredsize = *outpos+1;
+	    if (outsize<requiredsize) {
+		/* exponentially overallocate to minimize reallocations */
+		if (requiredsize < 2*outsize)
+		    requiredsize = 2*outsize;
+		if (_PyString_Resize(outobj, requiredsize)) {
+		    Py_DECREF(rep);
+		    return NULL;
+		}
+		outstart = PyString_AS_STRING(*outobj);
+	    }
+	    outstart[(*outpos)++] = (char)PyInt_AS_LONG(rep);
+	}
+	else {
+	    const char *repchars = PyString_AS_STRING(rep);
+	    int repsize = PyString_GET_SIZE(rep);
+	    int requiredsize = *outpos+repsize;
+	    if (outsize<requiredsize) {
+		/* exponentially overallocate to minimize reallocations */
+		if (requiredsize < 2*outsize)
+		    requiredsize = 2*outsize;
+		if (_PyString_Resize(outobj, requiredsize)) {
+		    Py_DECREF(rep);
+		    return NULL;
+		}
+		outstart = PyString_AS_STRING(*outobj);
+	    }
+	    memcpy(outstart + *outpos, repchars, repsize);
+	    *outpos += repsize;
+	}
+    }
+    return rep;
+}
+
+/* handle an error in PyUnicode_EncodeCharmap
+   Return 0 on success, -1 on error */
+static
+int charmap_encoding_error(
+    const Py_UNICODE *p, int size, int *inpos, PyObject *mapping,
+    PyObject **exceptionObject,
+    int *known_errorHandler, PyObject *errorHandler, const char *errors,
+    PyObject **res, int *respos)
+{
+    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+    int repsize;
+    int newpos;
+    Py_UNICODE *uni2;
+    /* startpos for collecting unencodable chars */
+    int collstartpos = *inpos;
+    int collendpos = *inpos+1;
+    int collpos;
+    char *encoding = "charmap";
+    char *reason = "character maps to <undefined>";
+
+    PyObject *x;
+    /* find all unencodable characters */
+    while (collendpos < size) {
+	x = charmapencode_lookup(p[collendpos], mapping);
+	if (x==NULL)
+	    return -1;
+	else if (x!=Py_None) {
+	    Py_DECREF(x);
+	    break;
+	}
+	Py_DECREF(x);
+	++collendpos;
+    }
+    /* cache callback name lookup
+     * (if not done yet, i.e. it's the first error) */
+    if (*known_errorHandler==-1) {
+	if ((errors==NULL) || (!strcmp(errors, "strict")))
+	    *known_errorHandler = 1;
+	else if (!strcmp(errors, "replace"))
+	    *known_errorHandler = 2;
+	else if (!strcmp(errors, "ignore"))
+	    *known_errorHandler = 3;
+	else if (!strcmp(errors, "xmlcharrefreplace"))
+	    *known_errorHandler = 4;
+	else
+	    *known_errorHandler = 0;
+    }
+    switch (*known_errorHandler) {
+	case 1: /* strict */
+	    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+	    return -1;
+	case 2: /* replace */
+	    for (collpos = collstartpos; collpos<collendpos; ++collpos) {
+		x = charmapencode_output('?', mapping, res, respos);
+		if (x==NULL) {
+		    return -1;
+		}
+		else if (x==Py_None) {
+		    Py_DECREF(x);
+		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+		    return -1;
+		}
+		Py_DECREF(x);
+	    }
+	    /* fall through */
+	case 3: /* ignore */
+	    *inpos = collendpos;
+	    break;
+	case 4: /* xmlcharrefreplace */
+	    /* generate replacement (temporarily (mis)uses p) */
+	    for (collpos = collstartpos; collpos < collendpos; ++collpos) {
+		char buffer[2+29+1+1];
+		char *cp;
+		sprintf(buffer, "&#%d;", (int)p[collpos]);
+		for (cp = buffer; *cp; ++cp) {
+		    x = charmapencode_output(*cp, mapping, res, respos);
+		    if (x==NULL)
+			return -1;
+		    else if (x==Py_None) {
+			Py_DECREF(x);
+			raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+			return -1;
+		    }
+		    Py_DECREF(x);
+		}
+	    }
+	    *inpos = collendpos;
+	    break;
+	default:
+	    repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+		encoding, reason, p, size, exceptionObject,
+		collstartpos, collendpos, &newpos);
+	    if (repunicode == NULL)
+		return -1;
+	    /* generate replacement  */
+	    repsize = PyUnicode_GET_SIZE(repunicode);
+	    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+		x = charmapencode_output(*uni2, mapping, res, respos);
+		if (x==NULL) {
+		    Py_DECREF(repunicode);
+		    return -1;
+		}
+		else if (x==Py_None) {
+		    Py_DECREF(repunicode);
+		    Py_DECREF(x);
+		    raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+		    return -1;
+		}
+		Py_DECREF(x);
+	    }
+	    *inpos = newpos;
+	    Py_DECREF(repunicode);
    }
+    return 0;
 }

 PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
@@ -2520,101 +2939,62 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
 				  PyObject *mapping,
 				  const char *errors)
 {
-    PyObject *v;
-    char *s;
-    int extrachars = 0;
+    /* output object */
+    PyObject *res = NULL;
+    /* current input position */
+    int inpos = 0;
+    /* current output position */
+    int respos = 0;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
+     * 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;

    /* Default to Latin-1 */
    if (mapping == NULL)
 	return PyUnicode_EncodeLatin1(p, size, errors);

-    v = PyString_FromStringAndSize(NULL, size);
-    if (v == NULL)
-        return NULL;
+    /* allocate enough for a simple encoding without
+       replacements, if we need more, we'll resize */
+    res = PyString_FromStringAndSize(NULL, size);
+    if (res == NULL)
+        goto onError;
    if (size == 0)
-	return v;
-    s = PyString_AS_STRING(v);
-    while (size-- > 0) {
-	Py_UNICODE ch = *p++;
-	PyObject *w, *x;
+	return res;

-	/* Get mapping (Unicode ordinal -> string char, integer or None) */
-	w = PyInt_FromLong((long)ch);
-	if (w == NULL)
+    while (inpos<size) {
+	/* try to encode it */
+	PyObject *x = charmapencode_output(p[inpos], mapping, &res, &respos);
+	if (x==NULL) /* error */
 	    goto onError;
-	x = PyObject_GetItem(mapping, w);
-	Py_DECREF(w);
-	if (x == NULL) {
-	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found means: mapping is undefined. */
-		PyErr_Clear();
-		x = Py_None;
-		Py_INCREF(x);
-	    } else
+	if (x==Py_None) { /* unencodable character */
+	    if (charmap_encoding_error(p, size, &inpos, mapping,
+		&exc,
+		&known_errorHandler, errorHandler, errors,
+		&res, &respos))
 		goto onError;
 	}
+	else
+	    /* done with this character => adjust input position */
+	    ++inpos;
+	Py_DECREF(x);
+    }

-	/* Apply mapping */
-	if (PyInt_Check(x)) {
-	    long value = PyInt_AS_LONG(x);
-	    if (value < 0 || value > 255) {
-		PyErr_SetString(PyExc_TypeError,
-				"character mapping must be in range(256)");
-		Py_DECREF(x);
-		goto onError;
-	    }
-	    *s++ = (char)value;
-	}
-	else if (x == Py_None) {
-	    /* undefined mapping */
-	    if (charmap_encoding_error(&p, &s, errors, 
-				       "character maps to <undefined>")) {
-		Py_DECREF(x);
-		goto onError;
-	    }
-	}
-	else if (PyString_Check(x)) {
-	    int targetsize = PyString_GET_SIZE(x);
-
-	    if (targetsize == 1)
-		/* 1-1 mapping */
-		*s++ = *PyString_AS_STRING(x);
-
-	    else if (targetsize > 1) {
-		/* 1-n mapping */
-		if (targetsize > extrachars) {
-		    /* resize first */
-		    int oldpos = (int)(s - PyString_AS_STRING(v));
-		    int needed = (targetsize - extrachars) + \
-			         (targetsize << 2);
-		    extrachars += needed;
-		    if (_PyString_Resize(&v, PyString_GET_SIZE(v) + needed)) {
-			Py_DECREF(x);
-			goto onError;
-		    }
-		    s = PyString_AS_STRING(v) + oldpos;
-		}
-		memcpy(s, PyString_AS_STRING(x), targetsize);
-		s += targetsize;
-		extrachars -= targetsize;
-	    }
-	    /* 1-0 mapping: skip the character */
-	}
-	else {
-	    /* wrong return value */
-	    PyErr_SetString(PyExc_TypeError,
-		  "character mapping must return integer, None or unicode");
-	    Py_DECREF(x);
+    /* Resize if we allocated to much */
+    if (respos<PyString_GET_SIZE(res)) {
+	if (_PyString_Resize(&res, respos))
 	    goto onError;
-	}
-	Py_DECREF(x);
    }
-    if (s - PyString_AS_STRING(v) < PyString_GET_SIZE(v))
-	_PyString_Resize(&v, (int)(s - PyString_AS_STRING(v)));
-    return v;
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
+    return res;

- onError:
-    Py_XDECREF(v);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
    return NULL;
 }

@@ -2631,115 +3011,344 @@ PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
 				   NULL);
 }

+/* create or adjust a UnicodeTranslateError */
+static void make_translate_exception(PyObject **exceptionObject,
+    const Py_UNICODE *unicode, int size,
+    int startpos, int endpos,
+    const char *reason)
+{
+    if (*exceptionObject == NULL) {
+    	*exceptionObject = PyUnicodeTranslateError_Create(
+	    unicode, size, startpos, endpos, reason);
+    }
+    else {
+	if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
+	    goto onError;
+	if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
+	    goto onError;
+	if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
+	    goto onError;
+	return;
+	onError:
+	Py_DECREF(*exceptionObject);
+	*exceptionObject = NULL;
+    }
+}
+
+/* raises a UnicodeTranslateError */
+static void raise_translate_exception(PyObject **exceptionObject,
+    const Py_UNICODE *unicode, int size,
+    int startpos, int endpos,
+    const char *reason)
+{
+    make_translate_exception(exceptionObject,
+	unicode, size, startpos, endpos, reason);
+    if (*exceptionObject != NULL)
+	PyCodec_StrictErrors(*exceptionObject);
+}
+
+/* error handling callback helper:
+   build arguments, call the callback and check the arguments,
+   put the result into newpos and return the replacement string, which
+   has to be freed by the caller */
+static PyObject *unicode_translate_call_errorhandler(const char *errors,
+    PyObject **errorHandler,
+    const char *reason,
+    const Py_UNICODE *unicode, int size, PyObject **exceptionObject,
+    int startpos, int endpos,
+    int *newpos)
+{
+    static char *argparse = "O!i;translating error handler must return (unicode, int) tuple";
+
+    PyObject *restuple;
+    PyObject *resunicode;
+
+    if (*errorHandler == NULL) {
+	*errorHandler = PyCodec_LookupError(errors);
+        if (*errorHandler == NULL)
+	    return NULL;
+    }
+
+    make_translate_exception(exceptionObject,
+	unicode, size, startpos, endpos, reason);
+    if (*exceptionObject == NULL)
+	return NULL;
+
+    restuple = PyObject_CallFunctionObjArgs(
+	*errorHandler, *exceptionObject, NULL);
+    if (restuple == NULL)
+	return NULL;
+    if (!PyTuple_Check(restuple)) {
+	PyErr_Format(PyExc_TypeError, &argparse[4]);
+	Py_DECREF(restuple);
+	return NULL;
+    }
+    if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
+	&resunicode, newpos)) {
+	Py_DECREF(restuple);
+	return NULL;
+    }
+    if (*newpos<0)
+	*newpos = 0;
+    else if (*newpos>size)
+	*newpos = size;
+    Py_INCREF(resunicode);
+    Py_DECREF(restuple);
+    return resunicode;
+}
+
+/* Lookup the character ch in the mapping and put the result in result,
+   which must be decrefed by the caller.
+   Return 0 on success, -1 on error */
 static
-int translate_error(const Py_UNICODE **source,
-		    Py_UNICODE **dest,
-		    const char *errors,
-		    const char *details) 
-{
-    if ((errors == NULL) ||
-	(strcmp(errors,"strict") == 0)) {
-	PyErr_Format(PyExc_UnicodeError,
-		     "translate error: %.400s",
-		     details);
-	return -1;
+int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
+{
+    PyObject *w = PyInt_FromLong((long)c);
+    PyObject *x;
+
+    if (w == NULL)
+	 return -1;
+    x = PyObject_GetItem(mapping, w);
+    Py_DECREF(w);
+    if (x == NULL) {
+	if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+	    /* No mapping found means: use 1:1 mapping. */
+	    PyErr_Clear();
+	    *result = NULL;
+	    return 0;
+	} else
+	    return -1;
    }
-    else if (strcmp(errors,"ignore") == 0) {
+    else if (x == Py_None) {
+	*result = x;
 	return 0;
    }
-    else if (strcmp(errors,"replace") == 0) {
-	**dest = '?';
-	(*dest)++;
+    else if (PyInt_Check(x)) {
+	long value = PyInt_AS_LONG(x);
+	long max = PyUnicode_GetMax();
+	if (value < 0 || value > max) {
+	    PyErr_Format(PyExc_TypeError,
+			     "character mapping must be in range(0x%lx)", max+1);
+	    Py_DECREF(x);
+	    return -1;
+	}
+	*result = x;
+	return 0;
+    }
+    else if (PyUnicode_Check(x)) {
+	*result = x;
 	return 0;
    }
    else {
-	PyErr_Format(PyExc_ValueError,
-		     "translate error; "
-		     "unknown error handling code: %.400s",
-		     errors);
+	/* wrong return value */
+	PyErr_SetString(PyExc_TypeError,
+	      "character mapping must return integer, None or unicode");
+	return -1;
+    }
+}
+/* ensure that *outobj is at least requiredsize characters long,
+if not reallocate and adjust various state variables.
+Return 0 on success, -1 on error */
+static
+int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
+    int requiredsize)
+{
+    if (requiredsize > *outsize) {
+	/* remember old output position */
+	int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
+	/* exponentially overallocate to minimize reallocations */
+	if (requiredsize < 2 * *outsize)
+	    requiredsize = 2 * *outsize;
+	if (_PyUnicode_Resize(outobj, requiredsize))
+	    return -1;
+	*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
+	*outsize = requiredsize;
+    }
+    return 0;
+}
+/* lookup the character, put the result in the output string and adjust
+   various state variables. Return a new reference to the object that
+   was put in the output buffer in *result, or Py_None, if the mapping was
+   undefined (in which case no character was written).
+   The called must decref result.
+   Return 0 on success, -1 on error. */
+static
+int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
+    PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
+{
+    if (charmaptranslate_lookup(c, mapping, res))
 	return -1;
+    if (*res==NULL) {
+	/* not found => default to 1:1 mapping */
+	*(*outp)++ = (Py_UNICODE)c;
+    }
+    else if (*res==Py_None)
+	;
+    else if (PyInt_Check(*res)) {
+	/* no overflow check, because we know that the space is enough */
+	*(*outp)++ = (Py_UNICODE)PyInt_AS_LONG(*res);
+    }
+    else if (PyUnicode_Check(*res)) {
+	int repsize = PyUnicode_GET_SIZE(*res);
+	if (repsize==1) {
+	    /* no overflow check, because we know that the space is enough */
+	    *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
+	}
+	else if (repsize!=0) {
+	    /* more than one character */
+	    int requiredsize = *outsize + repsize - 1;
+	    if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
+		return -1;
+	    memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
+	    *outp += repsize;
+	}
    }
+    else
+	return -1;
+    return 0;
 }

-PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *s,
+PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
 				     int size,
 				     PyObject *mapping,
 				     const char *errors)
 {
-    PyUnicodeObject *v;
-    Py_UNICODE *p;
-    
+    /* output object */
+    PyObject *res = NULL;
+    /* pointers to the beginning and end+1 of input */
+    const Py_UNICODE *startp = p;
+    const Py_UNICODE *endp = p + size;
+    /* pointer into the output */
+    Py_UNICODE *str;
+    /* current output position */
+    int respos = 0;
+    int ressize;
+    char *reason = "character maps to <undefined>";
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace,
+     * 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;
+
    if (mapping == NULL) {
 	PyErr_BadArgument();
 	return NULL;
    }
-    
-    /* Output will never be longer than input */
-    v = _PyUnicode_New(size);
-    if (v == NULL)
-	goto onError;
-    if (size == 0)
-	goto done;
-    p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0) {
-	Py_UNICODE ch = *s++;
-	PyObject *w, *x;

-	/* Get mapping */
-	w = PyInt_FromLong(ch);
-	if (w == NULL)
-	    goto onError;
-	x = PyObject_GetItem(mapping, w);
-	Py_DECREF(w);
-	if (x == NULL) {
-	    if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-		/* No mapping found: default to 1-1 mapping */
-		PyErr_Clear();
-		*p++ = ch;
-		continue;
-	    }
+    /* allocate enough for a simple 1:1 translation without
+       replacements, if we need more, we'll resize */
+    res = PyUnicode_FromUnicode(NULL, size);
+    if (res == NULL)
+        goto onError;
+    if (size == 0)
+	return res;
+    str = PyUnicode_AS_UNICODE(res);
+    ressize = size;
+
+    while (p<endp) {
+	/* try to encode it */
+	PyObject *x = NULL;
+	if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
+	    Py_XDECREF(x);
 	    goto onError;
 	}
-
-	/* Apply mapping */
-	if (PyInt_Check(x))
-	    *p++ = (Py_UNICODE)PyInt_AS_LONG(x);
-	else if (x == Py_None) {
-	    /* undefined mapping */
-	    if (translate_error(&s, &p, errors, 
-				"character maps to <undefined>")) {
-		Py_DECREF(x);
-		goto onError;
+	if (x!=Py_None) /* it worked => adjust input pointer */
+	    ++p;
+	else { /* untranslatable character */
+	    PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+	    int repsize;
+	    int newpos;
+	    Py_UNICODE *uni2;
+	    /* startpos for collecting untranslatable chars */
+	    const Py_UNICODE *collstart = p;
+	    const Py_UNICODE *collend = p+1;
+	    const Py_UNICODE *coll;
+
+	    Py_XDECREF(x);
+	    /* find all untranslatable characters */
+	    while (collend < endp) {
+	    	if (charmaptranslate_lookup(*collend, mapping, &x))
+		    goto onError;
+		Py_XDECREF(x);
+		if (x!=Py_None)
+		    break;
+		++collend;
 	    }
-	}
-	else if (PyUnicode_Check(x)) {
-	    if (PyUnicode_GET_SIZE(x) != 1) {
-		/* 1-n mapping */
-		PyErr_SetString(PyExc_NotImplementedError,
-				"1-n mappings are currently not implemented");
-		Py_DECREF(x);
-		goto onError;
+	    /* cache callback name lookup
+	     * (if not done yet, i.e. it's the first error) */
+	    if (known_errorHandler==-1) {
+		if ((errors==NULL) || (!strcmp(errors, "strict")))
+		    known_errorHandler = 1;
+		else if (!strcmp(errors, "replace"))
+		    known_errorHandler = 2;
+		else if (!strcmp(errors, "ignore"))
+		    known_errorHandler = 3;
+		else if (!strcmp(errors, "xmlcharrefreplace"))
+		    known_errorHandler = 4;
+		else
+		    known_errorHandler = 0;
+	    }
+	    switch (known_errorHandler) {
+		case 1: /* strict */
+		    raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
+		    goto onError;
+		case 2: /* replace */
+		    /* No need to check for space, this is a 1:1 replacement */
+		    for (coll = collstart; coll<collend; ++coll)
+			*str++ = '?';
+		    /* fall through */
+		case 3: /* ignore */
+		    p = collend;
+		    break;
+		case 4: /* xmlcharrefreplace */
+		    /* generate replacement (temporarily (mis)uses p) */
+		    for (p = collstart; p < collend; ++p) {
+			char buffer[2+29+1+1];
+			char *cp;
+			sprintf(buffer, "&#%d;", (int)*p);
+			if (charmaptranslate_makespace(&res, &str, &ressize,
+			    (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
+			    goto onError;
+			for (cp = buffer; *cp; ++cp)
+			    *str++ = *cp;
+		    }
+		    p = collend;
+		    break;
+		default:
+		    repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
+			reason, startp, size, &exc,
+			collstart-startp, collend-startp, &newpos);
+		    if (repunicode == NULL)
+			goto onError;
+		    /* generate replacement  */
+		    repsize = PyUnicode_GET_SIZE(repunicode);
+		    if (charmaptranslate_makespace(&res, &str, &ressize,
+			(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
+			Py_DECREF(repunicode);
+			goto onError;
+		    }
+		    for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
+			*str++ = *uni2;
+		    p = startp + newpos;
+		    Py_DECREF(repunicode);
 	    }
-	    *p++ = *PyUnicode_AS_UNICODE(x);
-	}
-	else {
-	    /* wrong return value */
-	    PyErr_SetString(PyExc_TypeError,
-		  "translate mapping must return integer, None or unicode");
-	    Py_DECREF(x);
-	    goto onError;
 	}
-	Py_DECREF(x);
    }
-    if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
-	if (_PyUnicode_Resize(&v, (int)(p - PyUnicode_AS_UNICODE(v))))
+    /* Resize if we allocated to much */
+    respos = str-PyUnicode_AS_UNICODE(res);
+    if (respos<ressize) {
+	if (_PyUnicode_Resize(&res, respos))
 	    goto onError;
+    }
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
+    return res;

- done:
-    return (PyObject *)v;
-    
- onError:
-    Py_XDECREF(v);
+    onError:
+    Py_XDECREF(res);
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
    return NULL;
 }

@@ -2772,6 +3381,13 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
 			    const char *errors)
 {
    Py_UNICODE *p, *end;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    const char *encoding = "decimal";
+    const char *reason = "invalid decimal Unicode string";
+    /* the following variable is used for caching string comparisons
+     * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
+    int known_errorHandler = -1;

    if (output == NULL) {
 	PyErr_BadArgument();
@@ -2781,40 +3397,110 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
    p = s;
    end = s + length;
    while (p < end) {
-	register Py_UNICODE ch = *p++;
+	register Py_UNICODE ch = *p;
 	int decimal;
+	PyObject *repunicode;
+	int repsize;
+	int newpos;
+	Py_UNICODE *uni2;
+	Py_UNICODE *collstart;
+	Py_UNICODE *collend;
 	
 	if (Py_UNICODE_ISSPACE(ch)) {
 	    *output++ = ' ';
+	    ++p;
 	    continue;
 	}
 	decimal = Py_UNICODE_TODECIMAL(ch);
 	if (decimal >= 0) {
 	    *output++ = '0' + decimal;
+	    ++p;
 	    continue;
 	}
 	if (0 < ch && ch < 256) {
 	    *output++ = (char)ch;
+	    ++p;
 	    continue;
 	}
-	/* All other characters are considered invalid */
-	if (errors == NULL || strcmp(errors, "strict") == 0) {
-	    PyErr_SetString(PyExc_ValueError,
-			    "invalid decimal Unicode string");
-	    goto onError;
+	/* All other characters are considered unencodable */
+	collstart = p;
+	collend = p+1;
+	while (collend < end) {
+	    if ((0 < *collend && *collend < 256) ||
+	        !Py_UNICODE_ISSPACE(*collend) ||
+	        Py_UNICODE_TODECIMAL(*collend))
+		break;
 	}
-	else if (strcmp(errors, "ignore") == 0)
-	    continue;
-	else if (strcmp(errors, "replace") == 0) {
-	    *output++ = '?';
-	    continue;
+	/* cache callback name lookup
+	 * (if not done yet, i.e. it's the first error) */
+	if (known_errorHandler==-1) {
+	    if ((errors==NULL) || (!strcmp(errors, "strict")))
+		known_errorHandler = 1;
+	    else if (!strcmp(errors, "replace"))
+		known_errorHandler = 2;
+	    else if (!strcmp(errors, "ignore"))
+		known_errorHandler = 3;
+	    else if (!strcmp(errors, "xmlcharrefreplace"))
+		known_errorHandler = 4;
+	    else
+		known_errorHandler = 0;
+	}
+	switch (known_errorHandler) {
+	    case 1: /* strict */
+		raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
+		goto onError;
+	    case 2: /* replace */
+		for (p = collstart; p < collend; ++p)
+		    *output++ = '?';
+		/* fall through */
+	    case 3: /* ignore */
+		p = collend;
+		break;
+	    case 4: /* xmlcharrefreplace */
+		/* generate replacement (temporarily (mis)uses p) */
+		for (p = collstart; p < collend; ++p)
+		    output += sprintf(output, "&#%d;", (int)*p);
+		p = collend;
+		break;
+	    default:
+		repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+		    encoding, reason, s, length, &exc,
+		    collstart-s, collend-s, &newpos);
+		if (repunicode == NULL)
+		    goto onError;
+		/* generate replacement  */
+		repsize = PyUnicode_GET_SIZE(repunicode);
+		for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+		    Py_UNICODE ch = *uni2;
+		    if (Py_UNICODE_ISSPACE(ch))
+			*output++ = ' ';
+		    else {
+			decimal = Py_UNICODE_TODECIMAL(ch);
+			if (decimal >= 0)
+			    *output++ = '0' + decimal;
+			else if (0 < ch && ch < 256)
+			    *output++ = (char)ch;
+			else {
+			    Py_DECREF(repunicode);
+			    raise_encode_exception(&exc, encoding,
+				s, length, collstart-s, collend-s, reason);
+			    goto onError;
+			}
+		    }
+		}
+		p = s + newpos;
+		Py_DECREF(repunicode);
 	}
    }
    /* 0-terminate the output string */
    *output++ = '\0';
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
    return 0;

 onError:
+    Py_XDECREF(exc);
+    Py_XDECREF(errorHandler);
    return -1;
 }

@@ -3927,7 +4613,9 @@ PyDoc_STRVAR(encode__doc__,
 Return an encoded string version of S. Default encoding is the current\n\
 default string encoding. errors may be given to set a different error\n\
 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
-a ValueError. Other possible values are 'ignore' and 'replace'.");
+a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
+'xmlcharrefreplace' as well as any other name registered with\n\
+codecs.register_error that can handle UnicodeEncodeErrors.");

 static PyObject *
 unicode_encode(PyUnicodeObject *self, PyObject *args)

--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -422,12 +422,409 @@ PyObject *PyCodec_Decode(PyObject *object,
    return NULL;
 }

+static PyObject *_PyCodec_ErrorRegistry;
+
+/* Register the error handling callback function error under the name
+   name. This function will be called by the codec when it encounters
+   an unencodable characters/undecodable bytes and doesn't know the
+   callback name, when name is specified as the error parameter
+   in the call to the encode/decode function.
+   Return 0 on success, -1 on error */
+int PyCodec_RegisterError(const char *name, PyObject *error)
+{
+    if (!PyCallable_Check(error)) {
+	PyErr_SetString(PyExc_TypeError, "handler must be callable");
+	return -1;
+    }
+    return PyDict_SetItemString( _PyCodec_ErrorRegistry, (char *)name, error);
+}
+
+/* Lookup the error handling callback function registered under the
+   name error. As a special case NULL can be passed, in which case
+   the error handling callback for strict encoding will be returned. */
+PyObject *PyCodec_LookupError(const char *name)
+{
+    PyObject *handler = NULL;
+
+    if (name==NULL)
+	name = "strict";
+    handler = PyDict_GetItemString(_PyCodec_ErrorRegistry, (char *)name);
+    if (!handler)
+	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
+    else
+	Py_INCREF(handler);
+    return handler;
+}
+
+static void wrong_exception_type(PyObject *exc)
+{
+    PyObject *type = PyObject_GetAttrString(exc, "__class__");
+    if (type != NULL) {
+	PyObject *name = PyObject_GetAttrString(type, "__name__");
+	Py_DECREF(type);
+	if (name != NULL) {
+	    PyObject *string = PyObject_Str(name);
+	    Py_DECREF(name);
+	    PyErr_Format(PyExc_TypeError, "don't know how to handle %.400s in error callback",
+		PyString_AS_STRING(string));
+	    Py_DECREF(string);
+	}
+    }
+}
+
+PyObject *PyCodec_StrictErrors(PyObject *exc)
+{
+    if (PyInstance_Check(exc))
+	PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
+	    exc);
+    else
+	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
+    return NULL;
+}
+
+
+PyObject *PyCodec_IgnoreErrors(PyObject *exc)
+{
+    int end;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+	if (PyUnicodeDecodeError_GetEnd(exc, &end))
+	    return NULL;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
+	if (PyUnicodeTranslateError_GetEnd(exc, &end))
+	    return NULL;
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+    /* ouch: passing NULL, 0, pos gives None instead of u'' */
+    return Py_BuildValue("(u#i)", &end, 0, end);
+}
+
+
+PyObject *PyCodec_ReplaceErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    int start;
+    int end;
+    int i;
+
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	PyObject *res;
+	Py_UNICODE *p;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+	res = PyUnicode_FromUnicode(NULL, end-start);
+	if (res == NULL)
+	    return NULL;
+	for (p = PyUnicode_AS_UNICODE(res), i = start;
+	    i<end; ++p, ++i)
+	    *p = '?';
+	restuple = Py_BuildValue("(Oi)", res, end);
+	Py_DECREF(res);
+	return restuple;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
+	if (PyUnicodeDecodeError_GetEnd(exc, &end))
+	    return NULL;
+	return Py_BuildValue("(u#i)", &res, 1, end);
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
+	PyObject *res;
+	Py_UNICODE *p;
+	if (PyUnicodeTranslateError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeTranslateError_GetEnd(exc, &end))
+	    return NULL;
+	res = PyUnicode_FromUnicode(NULL, end-start);
+	if (res == NULL)
+	    return NULL;
+	for (p = PyUnicode_AS_UNICODE(res), i = start;
+	    i<end; ++p, ++i)
+	    *p = Py_UNICODE_REPLACEMENT_CHARACTER;
+	restuple = Py_BuildValue("(Oi)", res, end);
+	Py_DECREF(res);
+	return restuple;
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+}
+
+PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
+{
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	PyObject *restuple;
+	PyObject *object;
+	int start;
+	int end;
+	PyObject *res;
+	Py_UNICODE *p;
+	Py_UNICODE *startp;
+	Py_UNICODE *outp;
+	int ressize;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+	    return NULL;
+	startp = PyUnicode_AS_UNICODE(object);
+	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
+	    if (*p<10)
+		ressize += 2+1+1;
+	    else if (*p<100)
+		ressize += 2+2+1;
+	    else if (*p<1000)
+		ressize += 2+3+1;
+	    else if (*p<10000)
+		ressize += 2+4+1;
+	    else if (*p<100000)
+		ressize += 2+5+1;
+	    else if (*p<1000000)
+		ressize += 2+6+1;
+	    else
+		ressize += 2+7+1;
+	}
+	/* allocate replacement */
+	res = PyUnicode_FromUnicode(NULL, ressize);
+	if (res == NULL) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+	/* generate replacement */
+	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
+	    p < startp+end; ++p) {
+	    Py_UNICODE c = *p;
+	    int digits;
+	    int base;
+	    *outp++ = '&';
+	    *outp++ = '#';
+	    if (*p<10) {
+		digits = 1;
+		base = 1;
+	    }
+	    else if (*p<100) {
+		digits = 2;
+		base = 10;
+	    }
+	    else if (*p<1000) {
+		digits = 3;
+		base = 100;
+	    }
+	    else if (*p<10000) {
+		digits = 4;
+		base = 1000;
+	    }
+	    else if (*p<100000) {
+		digits = 5;
+		base = 10000;
+	    }
+	    else if (*p<1000000) {
+		digits = 6;
+		base = 100000;
+	    }
+	    else {
+		digits = 7;
+		base = 1000000;
+	    }
+	    while (digits-->0) {
+		*outp++ = '0' + c/base;
+		c %= base;
+		base /= 10;
+	    }
+	    *outp++ = ';';
+	}
+	restuple = Py_BuildValue("(Oi)", res, end);
+	Py_DECREF(res);
+	Py_DECREF(object);
+	return restuple;
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+}
+
+static Py_UNICODE hexdigits[] = {
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+};
+
+PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
+{
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	PyObject *restuple;
+	PyObject *object;
+	int start;
+	int end;
+	PyObject *res;
+	Py_UNICODE *p;
+	Py_UNICODE *startp;
+	Py_UNICODE *outp;
+	int ressize;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+	    return NULL;
+	startp = PyUnicode_AS_UNICODE(object);
+	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
+	    if (*p >= 0x00010000)
+		ressize += 1+1+8;
+	    else if (*p >= 0x100) {
+		ressize += 1+1+4;
+	    }
+	    else
+		ressize += 1+1+2;
+	}
+	res = PyUnicode_FromUnicode(NULL, ressize);
+	if (res==NULL)
+	    return NULL;
+	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
+	    p < startp+end; ++p) {
+	    Py_UNICODE c = *p;
+	    *outp++ = '\\';
+	    if (c >= 0x00010000) {
+		*outp++ = 'U';
+		*outp++ = hexdigits[(c>>28)&0xf];
+		*outp++ = hexdigits[(c>>24)&0xf];
+		*outp++ = hexdigits[(c>>20)&0xf];
+		*outp++ = hexdigits[(c>>16)&0xf];
+		*outp++ = hexdigits[(c>>12)&0xf];
+		*outp++ = hexdigits[(c>>8)&0xf];
+	    }
+	    else if (c >= 0x100) {
+		*outp++ = 'u';
+		*outp++ = hexdigits[(c>>12)&0xf];
+		*outp++ = hexdigits[(c>>8)&0xf];
+	    }
+	    else
+		*outp++ = 'x';
+	    *outp++ = hexdigits[(c>>4)&0xf];
+	    *outp++ = hexdigits[c&0xf];
+	}
+
+	restuple = Py_BuildValue("(Oi)", res, end);
+	Py_DECREF(res);
+	Py_DECREF(object);
+	return restuple;
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+}
+
+static PyObject *strict_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_StrictErrors(exc);
+}
+
+
+static PyObject *ignore_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_IgnoreErrors(exc);
+}
+
+
+static PyObject *replace_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_ReplaceErrors(exc);
+}
+
+
+static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_XMLCharRefReplaceErrors(exc);
+}
+
+
+static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_BackslashReplaceErrors(exc);
+}
+
+
 void _PyCodecRegistry_Init(void)
 {
+    static struct {
+	char *name;
+	PyMethodDef def;
+    } methods[] =
+    {
+	{
+	    "strict",
+	    {
+		"strict_errors",
+		strict_errors,
+		METH_O
+	    }
+	},
+	{
+	    "ignore",
+	    {
+		"ignore_errors",
+		ignore_errors,
+		METH_O
+	    }
+	},
+	{
+	    "replace",
+	    {
+		"replace_errors",
+		replace_errors,
+		METH_O
+	    }
+	},
+	{
+	    "xmlcharrefreplace",
+	    {
+		"xmlcharrefreplace_errors",
+		xmlcharrefreplace_errors,
+		METH_O
+	    }
+	},
+	{
+	    "backslashreplace",
+	    {
+		"backslashreplace_errors",
+		backslashreplace_errors,
+		METH_O
+	    }
+	}
+    };
    if (_PyCodec_SearchPath == NULL)
 	_PyCodec_SearchPath = PyList_New(0);
    if (_PyCodec_SearchCache == NULL)
 	_PyCodec_SearchCache = PyDict_New();
+    if (_PyCodec_ErrorRegistry == NULL) {
+	int i;
+	_PyCodec_ErrorRegistry = PyDict_New();
+
+	if (_PyCodec_ErrorRegistry) {
+	    for (i = 0; i < 5; ++i) {
+		PyObject *func = PyCFunction_New(&methods[i].def, NULL);
+		int res;
+		if (!func)
+		    Py_FatalError("can't initialize codec error registry");
+		res = PyCodec_RegisterError(methods[i].name, func);
+		Py_DECREF(func);
+		if (res)
+		    Py_FatalError("can't initialize codec error registry");
+	    }
+	}
+    }
    if (_PyCodec_SearchPath == NULL || 
 	_PyCodec_SearchCache == NULL)
 	Py_FatalError("can't initialize codec registry");
@@ -439,4 +836,6 @@ void _PyCodecRegistry_Fini(void)
    _PyCodec_SearchPath = NULL;
    Py_XDECREF(_PyCodec_SearchCache);
    _PyCodec_SearchCache = NULL;
+    Py_XDECREF(_PyCodec_ErrorRegistry);
+    _PyCodec_ErrorRegistry = NULL;
 }
--- a/Python/exceptions.c
+++ b/Python/exceptions.c
@@ -100,6 +100,10 @@ Exception\n\
 |    +-- ValueError\n\
 |    |    |\n\
 |    |    +-- UnicodeError\n\
+ |    |        |\n\
+ |    |        +-- UnicodeEncodeError\n\
+ |    |        +-- UnicodeDecodeError\n\
+ |    |        +-- UnicodeTranslateError\n\
 |    |\n\
 |    +-- ReferenceError\n\
 |    +-- SystemError\n\
@@ -840,6 +844,590 @@ static PyMethodDef SyntaxError_methods[] = {
 };


+static
+int get_int(PyObject *exc, const char *name, int *value)
+{
+    PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+
+    if (!attr)
+	return -1;
+    if (!PyInt_Check(attr)) {
+	PyErr_Format(PyExc_TypeError, "%s attribute must be int", name);
+	Py_DECREF(attr);
+	return -1;
+    }
+    *value = PyInt_AS_LONG(attr);
+    Py_DECREF(attr);
+    return 0;
+}
+
+
+static
+int set_int(PyObject *exc, const char *name, int value)
+{
+    PyObject *obj = PyInt_FromLong(value);
+    int result;
+
+    if (!obj)
+	return -1;
+    result = PyObject_SetAttrString(exc, (char *)name, obj);
+    Py_DECREF(obj);
+    return result;
+}
+
+
+static
+PyObject *get_string(PyObject *exc, const char *name)
+{
+    PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+
+    if (!attr)
+	return NULL;
+    if (!PyString_Check(attr)) {
+	PyErr_Format(PyExc_TypeError, "%s attribute must be str", name);
+	Py_DECREF(attr);
+	return NULL;
+    }
+    return attr;
+}
+
+
+static
+int set_string(PyObject *exc, const char *name, const char *value)
+{
+    PyObject *obj = PyString_FromString(value);
+    int result;
+
+    if (!obj)
+	return -1;
+    result = PyObject_SetAttrString(exc, (char *)name, obj);
+    Py_DECREF(obj);
+    return result;
+}
+
+
+static
+PyObject *get_unicode(PyObject *exc, const char *name)
+{
+    PyObject *attr = PyObject_GetAttrString(exc, (char *)name);
+
+    if (!attr)
+	return NULL;
+    if (!PyUnicode_Check(attr)) {
+	PyErr_Format(PyExc_TypeError, "%s attribute must be unicode", name);
+	Py_DECREF(attr);
+	return NULL;
+    }
+    return attr;
+}
+
+PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc)
+{
+    return get_string(exc, "encoding");
+}
+
+PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *exc)
+{
+    return get_string(exc, "encoding");
+}
+
+PyObject * PyUnicodeTranslateError_GetEncoding(PyObject *exc)
+{
+    return get_string(exc, "encoding");
+}
+
+PyObject *PyUnicodeEncodeError_GetObject(PyObject *exc)
+{
+    return get_unicode(exc, "object");
+}
+
+PyObject *PyUnicodeDecodeError_GetObject(PyObject *exc)
+{
+    return get_string(exc, "object");
+}
+
+PyObject *PyUnicodeTranslateError_GetObject(PyObject *exc)
+{
+    return get_unicode(exc, "object");
+}
+
+int PyUnicodeEncodeError_GetStart(PyObject *exc, int *start)
+{
+    if (!get_int(exc, "start", start)) {
+	PyObject *object = PyUnicodeEncodeError_GetObject(exc);
+	int size;
+	if (!object)
+	    return -1;
+	size = PyUnicode_GET_SIZE(object);
+	if (*start<0)
+	    *start = 0;
+	if (*start>=size)
+	    *start = size-1;
+	Py_DECREF(object);
+	return 0;
+    }
+    return -1;
+}
+
+
+int PyUnicodeDecodeError_GetStart(PyObject *exc, int *start)
+{
+    if (!get_int(exc, "start", start)) {
+	PyObject *object = PyUnicodeDecodeError_GetObject(exc);
+	int size;
+	if (!object)
+	    return -1;
+	size = PyString_GET_SIZE(object);
+	if (*start<0)
+	    *start = 0;
+	if (*start>=size)
+	    *start = size-1;
+	Py_DECREF(object);
+	return 0;
+    }
+    return -1;
+}
+
+
+int PyUnicodeTranslateError_GetStart(PyObject *exc, int *start)
+{
+    return PyUnicodeEncodeError_GetStart(exc, start);
+}
+
+
+int PyUnicodeEncodeError_SetStart(PyObject *exc, int start)
+{
+    return set_int(exc, "start", start);
+}
+
+
+int PyUnicodeDecodeError_SetStart(PyObject *exc, int start)
+{
+    return set_int(exc, "start", start);
+}
+
+
+int PyUnicodeTranslateError_SetStart(PyObject *exc, int start)
+{
+    return set_int(exc, "start", start);
+}
+
+
+int PyUnicodeEncodeError_GetEnd(PyObject *exc, int *end)
+{
+    if (!get_int(exc, "end", end)) {
+	PyObject *object = PyUnicodeEncodeError_GetObject(exc);
+	int size;
+	if (!object)
+	    return -1;
+	size = PyUnicode_GET_SIZE(object);
+	if (*end<1)
+	    *end = 1;
+	if (*end>size)
+	    *end = size;
+	Py_DECREF(object);
+	return 0;
+    }
+    return -1;
+}
+
+
+int PyUnicodeDecodeError_GetEnd(PyObject *exc, int *end)
+{
+    if (!get_int(exc, "end", end)) {
+	PyObject *object = PyUnicodeDecodeError_GetObject(exc);
+	int size;
+	if (!object)
+	    return -1;
+	size = PyString_GET_SIZE(object);
+	if (*end<1)
+	    *end = 1;
+	if (*end>size)
+	    *end = size;
+	Py_DECREF(object);
+	return 0;
+    }
+    return -1;
+}
+
+
+int PyUnicodeTranslateError_GetEnd(PyObject *exc, int *start)
+{
+    return PyUnicodeEncodeError_GetEnd(exc, start);
+}
+
+
+int PyUnicodeEncodeError_SetEnd(PyObject *exc, int end)
+{
+    return set_int(exc, "end", end);
+}
+
+
+int PyUnicodeDecodeError_SetEnd(PyObject *exc, int end)
+{
+    return set_int(exc, "end", end);
+}
+
+
+int PyUnicodeTranslateError_SetEnd(PyObject *exc, int end)
+{
+    return set_int(exc, "end", end);
+}
+
+
+PyObject *PyUnicodeEncodeError_GetReason(PyObject *exc)
+{
+    return get_string(exc, "reason");
+}
+
+
+PyObject *PyUnicodeDecodeError_GetReason(PyObject *exc)
+{
+    return get_string(exc, "reason");
+}
+
+
+PyObject *PyUnicodeTranslateError_GetReason(PyObject *exc)
+{
+    return get_string(exc, "reason");
+}
+
+
+int PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason)
+{
+    return set_string(exc, "reason", reason);
+}
+
+
+int PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason)
+{
+    return set_string(exc, "reason", reason);
+}
+
+
+int PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason)
+{
+    return set_string(exc, "reason", reason);
+}
+
+
+static PyObject *
+UnicodeError__init__(PyObject *self, PyObject *args, PyTypeObject *objecttype)
+{
+    PyObject *rtnval = NULL;
+    PyObject *encoding;
+    PyObject *object;
+    PyObject *start;
+    PyObject *end;
+    PyObject *reason;
+
+    if (!(self = get_self(args)))
+	return NULL;
+
+    if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
+	return NULL;
+
+    if (!PyArg_ParseTuple(args, "O!O!O!O!O!",
+	&PyString_Type, &encoding,
+	objecttype, &object,
+	&PyInt_Type, &start,
+	&PyInt_Type, &end,
+	&PyString_Type, &reason))
+	return NULL;
+
+    if (PyObject_SetAttrString(self, "args", args))
+	goto finally;
+
+    if (PyObject_SetAttrString(self, "encoding", encoding))
+	goto finally;
+    if (PyObject_SetAttrString(self, "object", object))
+	goto finally;
+    if (PyObject_SetAttrString(self, "start", start))
+	goto finally;
+    if (PyObject_SetAttrString(self, "end", end))
+	goto finally;
+    if (PyObject_SetAttrString(self, "reason", reason))
+	goto finally;
+
+    Py_INCREF(Py_None);
+    rtnval = Py_None;
+
+  finally:
+    Py_DECREF(args);
+    return rtnval;
+}
+
+
+static PyObject *
+UnicodeEncodeError__init__(PyObject *self, PyObject *args)
+{
+    return UnicodeError__init__(self, args, &PyUnicode_Type);
+}
+
+static PyObject *
+UnicodeEncodeError__str__(PyObject *self, PyObject *arg)
+{
+    PyObject *encodingObj = NULL;
+    PyObject *objectObj = NULL;
+    int length;
+    int start;
+    int end;
+    PyObject *reasonObj = NULL;
+    char buffer[1000];
+    PyObject *result = NULL;
+
+    self = arg;
+
+    if (!(encodingObj = PyUnicodeEncodeError_GetEncoding(self)))
+	goto error;
+
+    if (!(objectObj = PyUnicodeEncodeError_GetObject(self)))
+	goto error;
+
+    length = PyUnicode_GET_SIZE(objectObj);
+
+    if (PyUnicodeEncodeError_GetStart(self, &start))
+	goto error;
+
+    if (PyUnicodeEncodeError_GetEnd(self, &end))
+	goto error;
+
+    if (!(reasonObj = PyUnicodeEncodeError_GetReason(self)))
+	goto error;
+
+    if (end==start+1) {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "'%.400s' codec can't encode character '\\u%x' in position %d: %.400s",
+	    PyString_AS_STRING(encodingObj),
+	    (int)PyUnicode_AS_UNICODE(objectObj)[start],
+	    start,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    else {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "'%.400s' codec can't encode characters in position %d-%d: %.400s",
+	    PyString_AS_STRING(encodingObj),
+	    start,
+	    end-1,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    result = PyString_FromString(buffer);
+
+error:
+    Py_XDECREF(reasonObj);
+    Py_XDECREF(objectObj);
+    Py_XDECREF(encodingObj);
+    return result;
+}
+
+static PyMethodDef UnicodeEncodeError_methods[] = {
+    {"__init__", UnicodeEncodeError__init__, METH_VARARGS},
+    {"__str__",  UnicodeEncodeError__str__, METH_O},
+    {NULL, NULL}
+};
+
+
+PyObject * PyUnicodeEncodeError_Create(
+	const char *encoding, const Py_UNICODE *object, int length,
+	int start, int end, const char *reason)
+{
+    return PyObject_CallFunction(PyExc_UnicodeEncodeError, "su#iis",
+	encoding, object, length, start, end, reason);
+}
+
+
+static PyObject *
+UnicodeDecodeError__init__(PyObject *self, PyObject *args)
+{
+    return UnicodeError__init__(self, args, &PyString_Type);
+}
+
+static PyObject *
+UnicodeDecodeError__str__(PyObject *self, PyObject *arg)
+{
+    PyObject *encodingObj = NULL;
+    PyObject *objectObj = NULL;
+    int length;
+    int start;
+    int end;
+    PyObject *reasonObj = NULL;
+    char buffer[1000];
+    PyObject *result = NULL;
+
+    self = arg;
+
+    if (!(encodingObj = PyUnicodeDecodeError_GetEncoding(self)))
+	goto error;
+
+    if (!(objectObj = PyUnicodeDecodeError_GetObject(self)))
+	goto error;
+
+    length = PyString_GET_SIZE(objectObj);
+
+    if (PyUnicodeDecodeError_GetStart(self, &start))
+	goto error;
+
+    if (PyUnicodeDecodeError_GetEnd(self, &end))
+	goto error;
+
+    if (!(reasonObj = PyUnicodeDecodeError_GetReason(self)))
+	goto error;
+
+    if (end==start+1) {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "'%.400s' codec can't decode byte 0x%x in position %d: %.400s",
+	    PyString_AS_STRING(encodingObj),
+	    ((int)PyString_AS_STRING(objectObj)[start])&0xff,
+	    start,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    else {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "'%.400s' codec can't decode bytes in position %d-%d: %.400s",
+	    PyString_AS_STRING(encodingObj),
+	    start,
+	    end-1,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    result = PyString_FromString(buffer);
+
+error:
+    Py_XDECREF(reasonObj);
+    Py_XDECREF(objectObj);
+    Py_XDECREF(encodingObj);
+    return result;
+}
+
+static PyMethodDef UnicodeDecodeError_methods[] = {
+    {"__init__", UnicodeDecodeError__init__, METH_VARARGS},
+    {"__str__",  UnicodeDecodeError__str__, METH_O},
+    {NULL, NULL}
+};
+
+
+PyObject * PyUnicodeDecodeError_Create(
+	const char *encoding, const char *object, int length,
+	int start, int end, const char *reason)
+{
+    return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#iis",
+	encoding, object, length, start, end, reason);
+}
+
+
+static PyObject *
+UnicodeTranslateError__init__(PyObject *self, PyObject *args)
+{
+    PyObject *rtnval = NULL;
+    PyObject *object;
+    PyObject *start;
+    PyObject *end;
+    PyObject *reason;
+
+    if (!(self = get_self(args)))
+	return NULL;
+
+    if (!(args = PySequence_GetSlice(args, 1, PySequence_Size(args))))
+	return NULL;
+
+    if (!PyArg_ParseTuple(args, "O!O!O!O!",
+	&PyUnicode_Type, &object,
+	&PyInt_Type, &start,
+	&PyInt_Type, &end,
+	&PyString_Type, &reason))
+	goto finally;
+
+    if (PyObject_SetAttrString(self, "args", args))
+	goto finally;
+
+    if (PyObject_SetAttrString(self, "object", object))
+	goto finally;
+    if (PyObject_SetAttrString(self, "start", start))
+	goto finally;
+    if (PyObject_SetAttrString(self, "end", end))
+	goto finally;
+    if (PyObject_SetAttrString(self, "reason", reason))
+	goto finally;
+
+    Py_INCREF(Py_None);
+    rtnval = Py_None;
+
+  finally:
+    Py_DECREF(args);
+    return rtnval;
+}
+
+
+static PyObject *
+UnicodeTranslateError__str__(PyObject *self, PyObject *arg)
+{
+    PyObject *objectObj = NULL;
+    int length;
+    int start;
+    int end;
+    PyObject *reasonObj = NULL;
+    char buffer[1000];
+    PyObject *result = NULL;
+
+    self = arg;
+
+    if (!(objectObj = PyUnicodeTranslateError_GetObject(self)))
+	goto error;
+
+    length = PyUnicode_GET_SIZE(objectObj);
+
+    if (PyUnicodeTranslateError_GetStart(self, &start))
+	goto error;
+
+    if (PyUnicodeTranslateError_GetEnd(self, &end))
+	goto error;
+
+    if (!(reasonObj = PyUnicodeTranslateError_GetReason(self)))
+	goto error;
+
+    if (end==start+1) {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "can't translate character '\\u%x' in position %d: %.400s",
+	    (int)PyUnicode_AS_UNICODE(objectObj)[start],
+	    start,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    else {
+	PyOS_snprintf(buffer, sizeof(buffer),
+	    "can't translate characters in position %d-%d: %.400s",
+	    start,
+	    end-1,
+	    PyString_AS_STRING(reasonObj)
+	);
+    }
+    result = PyString_FromString(buffer);
+
+error:
+    Py_XDECREF(reasonObj);
+    Py_XDECREF(objectObj);
+    return result;
+}
+
+static PyMethodDef UnicodeTranslateError_methods[] = {
+    {"__init__", UnicodeTranslateError__init__, METH_VARARGS},
+    {"__str__",  UnicodeTranslateError__str__, METH_O},
+    {NULL, NULL}
+};
+
+
+PyObject * PyUnicodeTranslateError_Create(
+	const Py_UNICODE *object, int length,
+	int start, int end, const char *reason)
+{
+    return PyObject_CallFunction(PyExc_UnicodeTranslateError, "u#iis",
+	object, length, start, end, reason);
+}
+
+

 /* Exception doc strings */

@@ -865,6 +1453,12 @@ PyDoc_STRVAR(ValueError__doc__,

 PyDoc_STRVAR(UnicodeError__doc__, "Unicode related error.");

+PyDoc_STRVAR(UnicodeEncodeError__doc__, "Unicode encoding error.");
+
+PyDoc_STRVAR(UnicodeDecodeError__doc__, "Unicode decoding error.");
+
+PyDoc_STRVAR(UnicodeTranslateError__doc__, "Unicode translation error.");
+
 PyDoc_STRVAR(SystemError__doc__,
 "Internal error in the Python interpreter.\n\
 \n\
@@ -949,6 +1543,9 @@ PyObject *PyExc_SystemError;
 PyObject *PyExc_SystemExit;
 PyObject *PyExc_UnboundLocalError;
 PyObject *PyExc_UnicodeError;
+PyObject *PyExc_UnicodeEncodeError;
+PyObject *PyExc_UnicodeDecodeError;
+PyObject *PyExc_UnicodeTranslateError;
 PyObject *PyExc_TypeError;
 PyObject *PyExc_ValueError;
 PyObject *PyExc_ZeroDivisionError;
@@ -1035,6 +1632,12 @@ static struct {
  FloatingPointError__doc__},
 {"ValueError",   &PyExc_ValueError,  0, ValueError__doc__},
 {"UnicodeError", &PyExc_UnicodeError, &PyExc_ValueError, UnicodeError__doc__},
+ {"UnicodeEncodeError", &PyExc_UnicodeEncodeError, &PyExc_UnicodeError,
+  UnicodeEncodeError__doc__, UnicodeEncodeError_methods},
+ {"UnicodeDecodeError", &PyExc_UnicodeDecodeError, &PyExc_UnicodeError,
+  UnicodeDecodeError__doc__, UnicodeDecodeError_methods},
+ {"UnicodeTranslateError", &PyExc_UnicodeTranslateError, &PyExc_UnicodeError,
+  UnicodeTranslateError__doc__, UnicodeTranslateError_methods},
 {"ReferenceError",  &PyExc_ReferenceError, 0, ReferenceError__doc__},
 {"SystemError",  &PyExc_SystemError, 0, SystemError__doc__},
 {"MemoryError",  &PyExc_MemoryError, 0, MemoryError__doc__},