Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
6a16b182
Commit
6a16b182
authored
Mar 18, 2019
by
Inada Naoki
Committed by
GitHub
Mar 18, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bpo-36297: remove "unicode_internal" codec (GH-12342)
parent
6fb544d8
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
41 additions
and
530 deletions
+41
-530
Doc/library/codecs.rst
Doc/library/codecs.rst
+4
-10
Doc/whatsnew/3.8.rst
Doc/whatsnew/3.8.rst
+3
-0
Include/cpython/unicodeobject.h
Include/cpython/unicodeobject.h
+0
-9
Lib/encodings/unicode_internal.py
Lib/encodings/unicode_internal.py
+0
-45
Lib/test/test_codeccallbacks.py
Lib/test/test_codeccallbacks.py
+11
-55
Lib/test/test_codecs.py
Lib/test/test_codecs.py
+5
-102
Lib/test/test_unicode.py
Lib/test/test_unicode.py
+14
-22
Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
...S.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
+2
-0
Modules/_codecsmodule.c
Modules/_codecsmodule.c
+1
-81
Modules/clinic/_codecsmodule.c.h
Modules/clinic/_codecsmodule.c.h
+1
-103
Objects/unicodeobject.c
Objects/unicodeobject.c
+0
-102
PCbuild/lib.pyproj
PCbuild/lib.pyproj
+0
-1
No files found.
Doc/library/codecs.rst
View file @
6a16b182
...
...
@@ -1316,16 +1316,10 @@ encodings.
|
| | code actually uses UTF-8 |
|
| | by default. |
+--------------------+---------+---------------------------+
|
unicode_internal | | Return the internal |
|
| | representation of the |
|
| | operand. Stateful codecs |
|
| | are not supported. |
|
| | |
|
| | .. deprecated:: 3.3 |
|
| | This representation is |
|
| | obsoleted by |
|
| | :pep:`393`. |
+--------------------+---------+---------------------------+
.. versionchanged:: 3.8
"unicode_internal" codec is removed.
..
_binary-transforms:
...
...
Doc/whatsnew/3.8.rst
View file @
6a16b182
...
...
@@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
* Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
(Contributed by Serhiy Storchaka in :issue:`29209`.)
* "unicode_internal" codec is removed.
(Contributed by Inada Naoki in :issue:`36297`.)
Porting to Python 3.8
=====================
...
...
Include/cpython/unicodeobject.h
View file @
6a16b182
...
...
@@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Py_ssize_t
length
/* Number of Py_UNICODE chars to encode */
)
Py_DEPRECATED
(
3.3
);
/* --- Unicode Internal Codec --------------------------------------------- */
/* Only for internal use in _codecsmodule.c */
PyObject
*
_PyUnicode_DecodeUnicodeInternal
(
const
char
*
string
,
Py_ssize_t
length
,
const
char
*
errors
);
/* --- Latin-1 Codecs ----------------------------------------------------- */
PyAPI_FUNC
(
PyObject
*
)
_PyUnicode_AsLatin1String
(
...
...
Lib/encodings/unicode_internal.py
deleted
100644 → 0
View file @
6fb544d8
""" Python 'unicode-internal' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import
codecs
### Codec APIs
class
Codec
(
codecs
.
Codec
):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode
=
codecs
.
unicode_internal_encode
decode
=
codecs
.
unicode_internal_decode
class
IncrementalEncoder
(
codecs
.
IncrementalEncoder
):
def
encode
(
self
,
input
,
final
=
False
):
return
codecs
.
unicode_internal_encode
(
input
,
self
.
errors
)[
0
]
class
IncrementalDecoder
(
codecs
.
IncrementalDecoder
):
def
decode
(
self
,
input
,
final
=
False
):
return
codecs
.
unicode_internal_decode
(
input
,
self
.
errors
)[
0
]
class
StreamWriter
(
Codec
,
codecs
.
StreamWriter
):
pass
class
StreamReader
(
Codec
,
codecs
.
StreamReader
):
pass
### encodings module API
def
getregentry
():
return
codecs
.
CodecInfo
(
name
=
'unicode-internal'
,
encode
=
Codec
.
encode
,
decode
=
Codec
.
decode
,
incrementalencoder
=
IncrementalEncoder
,
incrementaldecoder
=
IncrementalDecoder
,
streamwriter
=
StreamWriter
,
streamreader
=
StreamReader
,
)
Lib/test/test_codeccallbacks.py
View file @
6a16b182
...
...
@@ -211,42 +211,6 @@ class CodecCallbackTest(unittest.TestCase):
charmap
[
ord
(
"?"
)]
=
"XYZ"
# wrong type in mapping
self
.
assertRaises
(
TypeError
,
codecs
.
charmap_encode
,
sin
,
"replace"
,
charmap
)
def
test_decodeunicodeinternal
(
self
):
with
test
.
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
self
.
assertRaises
(
UnicodeDecodeError
,
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
.
decode
,
"unicode-internal"
,
)
if
len
(
'
\
0
'
.
encode
(
'unicode-internal'
))
==
4
:
def
handler_unicodeinternal
(
exc
):
if
not
isinstance
(
exc
,
UnicodeDecodeError
):
raise
TypeError
(
"don't know how to handle %r"
%
exc
)
return
(
"
\
x01
"
,
1
)
self
.
assertEqual
(
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
.
decode
(
"unicode-internal"
,
"ignore"
),
"
\
u0000
"
)
self
.
assertEqual
(
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
.
decode
(
"unicode-internal"
,
"replace"
),
"
\
u0000
\
ufffd
"
)
self
.
assertEqual
(
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
.
decode
(
"unicode-internal"
,
"backslashreplace"
),
"
\
u0000
\
\
x00"
)
codecs
.
register_error
(
"test.hui"
,
handler_unicodeinternal
)
self
.
assertEqual
(
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
.
decode
(
"unicode-internal"
,
"test.hui"
),
"
\
u0000
\
u0001
\
u0000
"
)
def
test_callbacks
(
self
):
def
handler1
(
exc
):
r
=
range
(
exc
.
start
,
exc
.
end
)
...
...
@@ -794,10 +758,7 @@ class CodecCallbackTest(unittest.TestCase):
(
"ascii"
,
b"
\
xff
"
),
(
"utf-8"
,
b"
\
xff
"
),
(
"utf-7"
,
b"+x-"
),
(
"unicode-internal"
,
b"
\
x00
"
),
):
with
test
.
support
.
check_warnings
():
# unicode-internal has been deprecated
self
.
assertRaises
(
TypeError
,
bytes
.
decode
,
...
...
@@ -1013,7 +974,6 @@ class CodecCallbackTest(unittest.TestCase):
(
"utf-32"
,
b"
\
xff
"
),
(
"unicode-escape"
,
b"
\
\
u123g"
),
(
"raw-unicode-escape"
,
b"
\
\
u123g"
),
(
"unicode-internal"
,
b"
\
xff
"
),
]
def
replacing
(
exc
):
...
...
@@ -1024,8 +984,6 @@ class CodecCallbackTest(unittest.TestCase):
raise
TypeError
(
"don't know how to handle %r"
%
exc
)
codecs
.
register_error
(
"test.replacing"
,
replacing
)
with
test
.
support
.
check_warnings
():
# unicode-internal has been deprecated
for
(
encoding
,
data
)
in
baddata
:
with
self
.
assertRaises
(
TypeError
):
data
.
decode
(
encoding
,
"test.replacing"
)
...
...
@@ -1039,8 +997,6 @@ class CodecCallbackTest(unittest.TestCase):
codecs
.
register_error
(
"test.mutating"
,
mutating
)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
with
test
.
support
.
check_warnings
():
# unicode-internal has been deprecated
for
(
encoding
,
data
)
in
baddata
:
self
.
assertEqual
(
data
.
decode
(
encoding
,
"test.mutating"
),
"
\
u4242
"
)
...
...
Lib/test/test_codecs.py
View file @
6a16b182
...
...
@@ -1239,16 +1239,6 @@ class EscapeDecodeTest(unittest.TestCase):
self.assertEqual(decode(br"
[
\
x0
]
\
x0
", "
replace
"), (b"
[
?
]
?
", 8))
class RecodingTest(unittest.TestCase):
def test_recoding(self):
f = io.BytesIO()
with codecs.EncodedFile(f, "
unicode_internal
", "
utf
-
8
") as f2:
f2.write("
a
")
# Python used to crash on this at exit because of a refcount
# bug in _codecsmodule.c
self.assertTrue(f.closed)
# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
...
...
@@ -1378,87 +1368,6 @@ class PunycodeTest(unittest.TestCase):
self.assertEqual(uni, puny.decode("
punycode
"))
class UnicodeInternalTest(unittest.TestCase):
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_bug1251300(self):
# Decoding with unicode_internal used to not correctly handle "
code
# points" above 0x10ffff on UCS-4 builds.
ok
=
[
(
b"
\
x00
\
x10
\
xff
\
xff
"
,
"
\
U0010ffff
"
),
(
b"
\
x00
\
x00
\
x01
\
x01
"
,
"
\
U00000101
"
),
(
b""
,
""
),
]
not_ok
=
[
b"
\
x7f
\
xff
\
xff
\
xff
"
,
b"
\
x80
\
x00
\
x00
\
x00
"
,
b"
\
x81
\
x00
\
x00
\
x00
"
,
b"
\
x00
"
,
b"
\
x00
\
x00
\
x00
\
x00
\
x00
"
,
]
for
internal
,
uni
in
ok
:
if
sys
.
byteorder
==
"little"
:
internal
=
bytes
(
reversed
(
internal
))
with
support
.
check_warnings
():
self
.
assertEqual
(
uni
,
internal
.
decode
(
"unicode_internal"
))
for
internal
in
not_ok
:
if
sys
.
byteorder
==
"little"
:
internal
=
bytes
(
reversed
(
internal
))
with
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
self
.
assertRaises
(
UnicodeDecodeError
,
internal
.
decode
,
"unicode_internal"
)
if
sys
.
byteorder
==
"little"
:
invalid
=
b"
\
x00
\
x00
\
x11
\
x00
"
invalid_backslashreplace
=
r"\x00\x00\x11\x00"
else
:
invalid
=
b"
\
x00
\
x11
\
x00
\
x00
"
invalid_backslashreplace
=
r"\x00\x11\x00\x00"
with
support
.
check_warnings
():
self
.
assertRaises
(
UnicodeDecodeError
,
invalid
.
decode
,
"unicode_internal"
)
with
support
.
check_warnings
():
self
.
assertEqual
(
invalid
.
decode
(
"unicode_internal"
,
"replace"
),
'
\
ufffd
'
)
with
support
.
check_warnings
():
self
.
assertEqual
(
invalid
.
decode
(
"unicode_internal"
,
"backslashreplace"
),
invalid_backslashreplace
)
@
unittest
.
skipUnless
(
SIZEOF_WCHAR_T
==
4
,
'specific to 32-bit wchar_t'
)
def
test_decode_error_attributes
(
self
):
try
:
with
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
b"
\
x00
\
x00
\
x00
\
x00
\
x00
\
x11
\
x11
\
x00
"
.
decode
(
"unicode_internal"
)
except
UnicodeDecodeError
as
ex
:
self
.
assertEqual
(
"unicode_internal"
,
ex
.
encoding
)
self
.
assertEqual
(
b"
\
x00
\
x00
\
x00
\
x00
\
x00
\
x11
\
x11
\
x00
"
,
ex
.
object
)
self
.
assertEqual
(
4
,
ex
.
start
)
self
.
assertEqual
(
8
,
ex
.
end
)
else
:
self
.
fail
()
@
unittest
.
skipUnless
(
SIZEOF_WCHAR_T
==
4
,
'specific to 32-bit wchar_t'
)
def
test_decode_callback
(
self
):
codecs
.
register_error
(
"UnicodeInternalTest"
,
codecs
.
ignore_errors
)
decoder
=
codecs
.
getdecoder
(
"unicode_internal"
)
with
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
ab
=
"ab"
.
encode
(
"unicode_internal"
).
decode
()
ignored
=
decoder
(
bytes
(
"%s
\
x22
\
x22
\
x22
\
x22
%s"
%
(
ab
[:
4
],
ab
[
4
:]),
"ascii"
),
"UnicodeInternalTest"
)
self
.
assertEqual
((
"ab"
,
12
),
ignored
)
def
test_encode_length
(
self
):
with
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
# Issue 3739
encoder
=
codecs
.
getencoder
(
"unicode_internal"
)
self
.
assertEqual
(
encoder
(
"a"
)[
1
],
1
)
self
.
assertEqual
(
encoder
(
"
\
xe9
\
u0142
"
)[
1
],
2
)
self
.
assertEqual
(
codecs
.
escape_encode
(
br'\x00'
)[
1
],
4
)
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
# 3.1 Map to nothing.
...
...
@@ -1949,7 +1858,6 @@ all_unicode_encodings = [
"
shift_jisx0213
",
"
tis_620
",
"
unicode_escape
",
"unicode_internal"
,
"
utf_16
",
"
utf_16_be
",
"
utf_16_le
",
...
...
@@ -1969,7 +1877,6 @@ if hasattr(codecs, "oem_encode"):
# The following encodings don't work in stateful mode
broken_unicode_with_stateful = [
"
punycode
",
"unicode_internal"
]
...
...
@@ -1984,8 +1891,6 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
name = "
latin_1
"
self.assertEqual(encoding.replace("
_
", "
-
"), name.replace("
_
", "
-
"))
with
support
.
check_warnings
():
# unicode-internal has been deprecated
(b, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s), "
encoding
=%
r" % encoding)
(chars, size) = codecs.getdecoder(encoding)(b)
...
...
@@ -2116,8 +2021,6 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
with
support
.
check_warnings
():
# unicode-internal has been deprecated
self.assertRaises(TypeError, encoder)
def test_encoding_map_type_initialized(self):
...
...
Lib/test/test_unicode.py
View file @
6a16b182
...
...
@@ -2104,11 +2104,7 @@ class UnicodeTest(string_tests.CommonTest,
u
=
chr
(
c
)
for
encoding
in
(
'utf-7'
,
'utf-8'
,
'utf-16'
,
'utf-16-le'
,
'utf-16-be'
,
'raw_unicode_escape'
,
'unicode_escape'
,
'unicode_internal'
):
with
warnings
.
catch_warnings
():
# unicode-internal has been deprecated
warnings
.
simplefilter
(
"ignore"
,
DeprecationWarning
)
'unicode_escape'
):
self
.
assertEqual
(
str
(
u
.
encode
(
encoding
),
encoding
),
u
)
# Roundtrip safety for BMP (just the first 256 chars)
...
...
@@ -2125,13 +2121,9 @@ class UnicodeTest(string_tests.CommonTest,
# Roundtrip safety for non-BMP (just a few chars)
with
warnings
.
catch_warnings
():
# unicode-internal has been deprecated
warnings
.
simplefilter
(
"ignore"
,
DeprecationWarning
)
u
=
'
\
U00010001
\
U00020002
\
U00030003
\
U00040004
\
U00050005
'
for
encoding
in
(
'utf-8'
,
'utf-16'
,
'utf-16-le'
,
'utf-16-be'
,
'raw_unicode_escape'
,
'unicode_escape'
,
'unicode_internal'
):
'raw_unicode_escape'
,
'unicode_escape'
):
self
.
assertEqual
(
str
(
u
.
encode
(
encoding
),
encoding
),
u
)
# UTF-8 must be roundtrip safe for all code points
...
...
@@ -2349,22 +2341,22 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertEqual
(
args
[
0
],
text
)
self
.
assertEqual
(
len
(
args
),
1
)
@
support
.
cpython_only
def
test_resize
(
self
):
from
_testcapi
import
getargs_u
for
length
in
range
(
1
,
100
,
7
):
# generate a fresh string (refcount=1)
text
=
'a'
*
length
+
'b'
with
support
.
check_warnings
((
'unicode_internal codec has been '
'deprecated'
,
DeprecationWarning
)):
# fill wstr internal field
abc
=
text
.
encode
(
'unicode_internal'
)
self
.
assertEqual
(
abc
.
decode
(
'unicode_internal'
)
,
text
)
abc
=
getargs_u
(
text
)
self
.
assertEqual
(
abc
,
text
)
# resize text: wstr field must be cleared and then recomputed
text
+=
'c'
abcdef
=
text
.
encode
(
'unicode_internal'
)
abcdef
=
getargs_u
(
text
)
self
.
assertNotEqual
(
abc
,
abcdef
)
self
.
assertEqual
(
abcdef
.
decode
(
'unicode_internal'
)
,
text
)
self
.
assertEqual
(
abcdef
,
text
)
def
test_compare
(
self
):
# Issue #17615
...
...
Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
0 → 100644
View file @
6a16b182
"unicode_internal" codec is removed. It was deprecated since Python 3.3.
Patch by Inada Naoki.
Modules/_codecsmodule.c
View file @
6a16b182
...
...
@@ -21,8 +21,7 @@
(Unicode object, bytes consumed)
These <encoding>s are available: utf_8, unicode_escape,
raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
mbcs (on win32).
raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
Written by Marc-Andre Lemburg (mal@lemburg.com).
...
...
@@ -250,38 +249,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
}
/* --- Decoder ------------------------------------------------------------ */
/*[clinic input]
_codecs.unicode_internal_decode
obj: object
errors: str(accept={str, NoneType}) = NULL
/
[clinic start generated code]*/
static
PyObject
*
_codecs_unicode_internal_decode_impl
(
PyObject
*
module
,
PyObject
*
obj
,
const
char
*
errors
)
/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
{
if
(
PyUnicode_Check
(
obj
))
{
if
(
PyUnicode_READY
(
obj
)
<
0
)
return
NULL
;
Py_INCREF
(
obj
);
return
codec_tuple
(
obj
,
PyUnicode_GET_LENGTH
(
obj
));
}
else
{
Py_buffer
view
;
PyObject
*
result
;
if
(
PyObject_GetBuffer
(
obj
,
&
view
,
PyBUF_SIMPLE
)
!=
0
)
return
NULL
;
result
=
codec_tuple
(
_PyUnicode_DecodeUnicodeInternal
(
view
.
buf
,
view
.
len
,
errors
),
view
.
len
);
PyBuffer_Release
(
&
view
);
return
result
;
}
}
/*[clinic input]
_codecs.utf_7_decode
data: Py_buffer
...
...
@@ -686,51 +653,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
return
codec_tuple
(
result
,
data
->
len
);
}
/*[clinic input]
_codecs.unicode_internal_encode
obj: object
errors: str(accept={str, NoneType}) = NULL
/
[clinic start generated code]*/
static
PyObject
*
_codecs_unicode_internal_encode_impl
(
PyObject
*
module
,
PyObject
*
obj
,
const
char
*
errors
)
/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
{
if
(
PyErr_WarnEx
(
PyExc_DeprecationWarning
,
"unicode_internal codec has been deprecated"
,
1
))
return
NULL
;
if
(
PyUnicode_Check
(
obj
))
{
Py_UNICODE
*
u
;
Py_ssize_t
len
,
size
;
if
(
PyUnicode_READY
(
obj
)
<
0
)
return
NULL
;
u
=
PyUnicode_AsUnicodeAndSize
(
obj
,
&
len
);
if
(
u
==
NULL
)
return
NULL
;
if
((
size_t
)
len
>
(
size_t
)
PY_SSIZE_T_MAX
/
sizeof
(
Py_UNICODE
))
return
PyErr_NoMemory
();
size
=
len
*
sizeof
(
Py_UNICODE
);
return
codec_tuple
(
PyBytes_FromStringAndSize
((
const
char
*
)
u
,
size
),
PyUnicode_GET_LENGTH
(
obj
));
}
else
{
Py_buffer
view
;
PyObject
*
result
;
if
(
PyObject_GetBuffer
(
obj
,
&
view
,
PyBUF_SIMPLE
)
!=
0
)
return
NULL
;
result
=
codec_tuple
(
PyBytes_FromStringAndSize
(
view
.
buf
,
view
.
len
),
view
.
len
);
PyBuffer_Release
(
&
view
);
return
result
;
}
}
/*[clinic input]
_codecs.utf_7_encode
str: unicode
...
...
@@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_UTF_32_EX_DECODE_METHODDEF
_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
_CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
_CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
_CODECS_LATIN_1_ENCODE_METHODDEF
...
...
Modules/clinic/_codecsmodule.c.h
View file @
6a16b182
...
...
@@ -370,57 +370,6 @@ exit:
return
return_value
;
}
PyDoc_STRVAR
(
_codecs_unicode_internal_decode__doc__
,
"unicode_internal_decode($module, obj, errors=None, /)
\n
"
"--
\n
"
"
\n
"
);
#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF \
{"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__},
static
PyObject
*
_codecs_unicode_internal_decode_impl
(
PyObject
*
module
,
PyObject
*
obj
,
const
char
*
errors
);
static
PyObject
*
_codecs_unicode_internal_decode
(
PyObject
*
module
,
PyObject
*
const
*
args
,
Py_ssize_t
nargs
)
{
PyObject
*
return_value
=
NULL
;
PyObject
*
obj
;
const
char
*
errors
=
NULL
;
if
(
!
_PyArg_CheckPositional
(
"unicode_internal_decode"
,
nargs
,
1
,
2
))
{
goto
exit
;
}
obj
=
args
[
0
];
if
(
nargs
<
2
)
{
goto
skip_optional
;
}
if
(
args
[
1
]
==
Py_None
)
{
errors
=
NULL
;
}
else
if
(
PyUnicode_Check
(
args
[
1
]))
{
Py_ssize_t
errors_length
;
errors
=
PyUnicode_AsUTF8AndSize
(
args
[
1
],
&
errors_length
);
if
(
errors
==
NULL
)
{
goto
exit
;
}
if
(
strlen
(
errors
)
!=
(
size_t
)
errors_length
)
{
PyErr_SetString
(
PyExc_ValueError
,
"embedded null character"
);
goto
exit
;
}
}
else
{
_PyArg_BadArgument
(
"unicode_internal_decode"
,
2
,
"str or None"
,
args
[
1
]);
goto
exit
;
}
skip_optional:
return_value
=
_codecs_unicode_internal_decode_impl
(
module
,
obj
,
errors
);
exit:
return
return_value
;
}
PyDoc_STRVAR
(
_codecs_utf_7_decode__doc__
,
"utf_7_decode($module, data, errors=None, final=False, /)
\n
"
"--
\n
"
...
...
@@ -1853,57 +1802,6 @@ exit:
return
return_value
;
}
PyDoc_STRVAR
(
_codecs_unicode_internal_encode__doc__
,
"unicode_internal_encode($module, obj, errors=None, /)
\n
"
"--
\n
"
"
\n
"
);
#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF \
{"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__},
static
PyObject
*
_codecs_unicode_internal_encode_impl
(
PyObject
*
module
,
PyObject
*
obj
,
const
char
*
errors
);
static
PyObject
*
_codecs_unicode_internal_encode
(
PyObject
*
module
,
PyObject
*
const
*
args
,
Py_ssize_t
nargs
)
{
PyObject
*
return_value
=
NULL
;
PyObject
*
obj
;
const
char
*
errors
=
NULL
;
if
(
!
_PyArg_CheckPositional
(
"unicode_internal_encode"
,
nargs
,
1
,
2
))
{
goto
exit
;
}
obj
=
args
[
0
];
if
(
nargs
<
2
)
{
goto
skip_optional
;
}
if
(
args
[
1
]
==
Py_None
)
{
errors
=
NULL
;
}
else
if
(
PyUnicode_Check
(
args
[
1
]))
{
Py_ssize_t
errors_length
;
errors
=
PyUnicode_AsUTF8AndSize
(
args
[
1
],
&
errors_length
);
if
(
errors
==
NULL
)
{
goto
exit
;
}
if
(
strlen
(
errors
)
!=
(
size_t
)
errors_length
)
{
PyErr_SetString
(
PyExc_ValueError
,
"embedded null character"
);
goto
exit
;
}
}
else
{
_PyArg_BadArgument
(
"unicode_internal_encode"
,
2
,
"str or None"
,
args
[
1
]);
goto
exit
;
}
skip_optional:
return_value
=
_codecs_unicode_internal_encode_impl
(
module
,
obj
,
errors
);
exit:
return
return_value
;
}
PyDoc_STRVAR
(
_codecs_utf_7_encode__doc__
,
"utf_7_encode($module, str, errors=None, /)
\n
"
"--
\n
"
...
...
@@ -3024,4 +2922,4 @@ exit:
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif
/* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
/*[clinic end generated code: output=
02bd0f0cf9a28150
input=a9049054013a1b77]*/
/*[clinic end generated code: output=
da3c47709a55a05e
input=a9049054013a1b77]*/
Objects/unicodeobject.c
View file @
6a16b182
...
...
@@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
return
result
;
}
/* --- Unicode Internal Codec ------------------------------------------- */
PyObject
*
_PyUnicode_DecodeUnicodeInternal
(
const
char
*
s
,
Py_ssize_t
size
,
const
char
*
errors
)
{
const
char
*
starts
=
s
;
Py_ssize_t
startinpos
;
Py_ssize_t
endinpos
;
_PyUnicodeWriter
writer
;
const
char
*
end
;
const
char
*
reason
;
PyObject
*
errorHandler
=
NULL
;
PyObject
*
exc
=
NULL
;
if
(
PyErr_WarnEx
(
PyExc_DeprecationWarning
,
"unicode_internal codec has been deprecated"
,
1
))
return
NULL
;
if
(
size
<
0
)
{
PyErr_BadInternalCall
();
return
NULL
;
}
if
(
size
==
0
)
_Py_RETURN_UNICODE_EMPTY
();
_PyUnicodeWriter_Init
(
&
writer
);
if
(
size
/
Py_UNICODE_SIZE
>
PY_SSIZE_T_MAX
-
1
)
{
PyErr_NoMemory
();
goto
onError
;
}
writer
.
min_length
=
(
size
+
(
Py_UNICODE_SIZE
-
1
))
/
Py_UNICODE_SIZE
;
end
=
s
+
size
;
while
(
s
<
end
)
{
Py_UNICODE
uch
;
Py_UCS4
ch
;
if
(
end
-
s
<
Py_UNICODE_SIZE
)
{
endinpos
=
end
-
starts
;
reason
=
"truncated input"
;
goto
error
;
}
/* We copy the raw representation one byte at a time because the
pointer may be unaligned (see test_codeccallbacks). */
((
char
*
)
&
uch
)[
0
]
=
s
[
0
];
((
char
*
)
&
uch
)[
1
]
=
s
[
1
];
#ifdef Py_UNICODE_WIDE
((
char
*
)
&
uch
)[
2
]
=
s
[
2
];
((
char
*
)
&
uch
)[
3
]
=
s
[
3
];
#endif
ch
=
uch
;
#ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
if
(
ch
>
0x10ffff
)
{
endinpos
=
s
-
starts
+
Py_UNICODE_SIZE
;
reason
=
"illegal code point (> 0x10FFFF)"
;
goto
error
;
}
#endif
s
+=
Py_UNICODE_SIZE
;
#ifndef Py_UNICODE_WIDE
if
(
Py_UNICODE_IS_HIGH_SURROGATE
(
ch
)
&&
end
-
s
>=
Py_UNICODE_SIZE
)
{
Py_UNICODE
uch2
;
((
char
*
)
&
uch2
)[
0
]
=
s
[
0
];
((
char
*
)
&
uch2
)[
1
]
=
s
[
1
];
if
(
Py_UNICODE_IS_LOW_SURROGATE
(
uch2
))
{
ch
=
Py_UNICODE_JOIN_SURROGATES
(
uch
,
uch2
);
s
+=
Py_UNICODE_SIZE
;
}
}
#endif
if
(
_PyUnicodeWriter_WriteCharInline
(
&
writer
,
ch
)
<
0
)
goto
onError
;
continue
;
error:
startinpos
=
s
-
starts
;
if
(
unicode_decode_call_errorhandler_writer
(
errors
,
&
errorHandler
,
"unicode_internal"
,
reason
,
&
starts
,
&
end
,
&
startinpos
,
&
endinpos
,
&
exc
,
&
s
,
&
writer
))
goto
onError
;
}
Py_XDECREF
(
errorHandler
);
Py_XDECREF
(
exc
);
return
_PyUnicodeWriter_Finish
(
&
writer
);
onError:
_PyUnicodeWriter_Dealloc
(
&
writer
);
Py_XDECREF
(
errorHandler
);
Py_XDECREF
(
exc
);
return
NULL
;
}
/* --- Latin-1 Codec ------------------------------------------------------ */
PyObject
*
...
...
PCbuild/lib.pyproj
View file @
6a16b182
...
...
@@ -392,7 +392,6 @@
<Compile
Include=
"encodings\tis_620.py"
/>
<Compile
Include=
"encodings\undefined.py"
/>
<Compile
Include=
"encodings\unicode_escape.py"
/>
<Compile
Include=
"encodings\unicode_internal.py"
/>
<Compile
Include=
"encodings\utf_16.py"
/>
<Compile
Include=
"encodings\utf_16_be.py"
/>
<Compile
Include=
"encodings\utf_16_le.py"
/>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment