Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
f5aba584
Commit
f5aba584
authored
Sep 06, 2016
by
Steve Dower
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup
parent
22d0698d
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
198 additions
and
51 deletions
+198
-51
Include/unicodeobject.h
Include/unicodeobject.h
+1
-1
Lib/encodings/__init__.py
Lib/encodings/__init__.py
+10
-0
Lib/encodings/aliases.py
Lib/encodings/aliases.py
+1
-0
Lib/encodings/oem.py
Lib/encodings/oem.py
+41
-0
Lib/site.py
Lib/site.py
+0
-16
Lib/test/test_codecs.py
Lib/test/test_codecs.py
+29
-33
Modules/_codecsmodule.c
Modules/_codecsmodule.c
+36
-0
Modules/clinic/_codecsmodule.c.h
Modules/clinic/_codecsmodule.c.h
+80
-1
No files found.
Include/unicodeobject.h
View file @
f5aba584
...
...
@@ -1663,7 +1663,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
PyAPI_FUNC
(
PyObject
*
)
PyUnicode_DecodeMBCS
(
const
char
*
string
,
/* MBCS encoded string */
Py_ssize_t
length
,
/* size of string */
Py_ssize_t
length
,
/* size of string */
const
char
*
errors
/* error handling */
);
...
...
Lib/encodings/__init__.py
View file @
f5aba584
...
...
@@ -29,6 +29,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
#"
import
codecs
import
sys
from
.
import
aliases
_cache
=
{}
...
...
@@ -151,3 +152,12 @@ def search_function(encoding):
# Register the search_function in the Python codec registry
codecs
.
register
(
search_function
)
if
sys
.
platform
==
'win32'
:
def
_alias_mbcs
(
encoding
):
import
_bootlocale
if
encoding
==
_bootlocale
.
getpreferredencoding
(
False
):
import
encodings.mbcs
return
encodings
.
mbcs
.
getregentry
()
codecs
.
register
(
_alias_mbcs
)
Lib/encodings/aliases.py
View file @
f5aba584
...
...
@@ -458,6 +458,7 @@ aliases = {
'macturkish'
:
'mac_turkish'
,
# mbcs codec
'ansi'
:
'mbcs'
,
'dbcs'
:
'mbcs'
,
# ptcp154 codec
...
...
Lib/encodings/oem.py
0 → 100644
View file @
f5aba584
""" Python 'oem' Codec for Windows
"""
# Import them explicitly to cause an ImportError
# on non-Windows systems
from
codecs
import
oem_encode
,
oem_decode
# for IncrementalDecoder, IncrementalEncoder, ...
import
codecs
### Codec APIs
encode
=
oem_encode
def
decode
(
input
,
errors
=
'strict'
):
return
oem_decode
(
input
,
errors
,
True
)
class
IncrementalEncoder
(
codecs
.
IncrementalEncoder
):
def
encode
(
self
,
input
,
final
=
False
):
return
oem_encode
(
input
,
self
.
errors
)[
0
]
class
IncrementalDecoder
(
codecs
.
BufferedIncrementalDecoder
):
_buffer_decode
=
oem_decode
class
StreamWriter
(
codecs
.
StreamWriter
):
encode
=
oem_encode
class
StreamReader
(
codecs
.
StreamReader
):
decode
=
oem_decode
### encodings module API
def
getregentry
():
return
codecs
.
CodecInfo
(
name
=
'oem'
,
encode
=
encode
,
decode
=
decode
,
incrementalencoder
=
IncrementalEncoder
,
incrementaldecoder
=
IncrementalDecoder
,
streamreader
=
StreamReader
,
streamwriter
=
StreamWriter
,
)
Lib/site.py
View file @
f5aba584
...
...
@@ -423,21 +423,6 @@ def enablerlcompleter():
sys
.
__interactivehook__
=
register_readline
def
aliasmbcs
():
"""On Windows, some default encodings are not provided by Python,
while they are always available as "mbcs" in each locale. Make
them usable by aliasing to "mbcs" in such a case."""
if
sys
.
platform
==
'win32'
:
import
_bootlocale
,
codecs
enc
=
_bootlocale
.
getpreferredencoding
(
False
)
if
enc
.
startswith
(
'cp'
):
# "cp***" ?
try
:
codecs
.
lookup
(
enc
)
except
LookupError
:
import
encodings
encodings
.
_cache
[
enc
]
=
encodings
.
_unknown
encodings
.
aliases
.
aliases
[
enc
]
=
'mbcs'
CONFIG_LINE
=
r'^(?P<key>(\
w|[-_])+)
\s*=\
s*(?P<
value>.*)\
s*$
'
def venv(known_paths):
...
...
@@ -560,7 +545,6 @@ def main():
setcopyright
()
sethelper
()
enablerlcompleter
()
aliasmbcs
()
execsitecustomize
()
if
ENABLE_USER_SITE
:
execusercustomize
()
...
...
Lib/test/test_codecs.py
View file @
f5aba584
...
...
@@ -8,11 +8,6 @@ import encodings
from
test
import
support
if
sys
.
platform
==
'win32'
:
VISTA_OR_LATER
=
(
sys
.
getwindowsversion
().
major
>=
6
)
else
:
VISTA_OR_LATER
=
False
try
:
import
ctypes
except
ImportError
:
...
...
@@ -841,18 +836,13 @@ class CP65001Test(ReadTest, unittest.TestCase):
(
'abc'
,
'strict'
,
b'abc'
),
(
'
\
xe9
\
u20ac
'
,
'strict'
,
b'
\
xc3
\
xa9
\
xe2
\
x82
\
xac
'
),
(
'
\
U0010ffff
'
,
'strict'
,
b'
\
xf4
\
x8f
\
xbf
\
xbf
'
),
(
'
\
udc80
'
,
'strict'
,
None
),
(
'
\
udc80
'
,
'ignore'
,
b''
),
(
'
\
udc80
'
,
'replace'
,
b'?'
),
(
'
\
udc80
'
,
'backslashreplace'
,
b'
\
\
udc80'
),
(
'
\
udc80
'
,
'namereplace'
,
b'
\
\
udc80'
),
(
'
\
udc80
'
,
'surrogatepass'
,
b'
\
xed
\
xb2
\
x80
'
),
]
if
VISTA_OR_LATER
:
tests
.
extend
((
(
'
\
udc80
'
,
'strict'
,
None
),
(
'
\
udc80
'
,
'ignore'
,
b''
),
(
'
\
udc80
'
,
'replace'
,
b'?'
),
(
'
\
udc80
'
,
'backslashreplace'
,
b'
\
\
udc80'
),
(
'
\
udc80
'
,
'namereplace'
,
b'
\
\
udc80'
),
(
'
\
udc80
'
,
'surrogatepass'
,
b'
\
xed
\
xb2
\
x80
'
),
))
else
:
tests
.
append
((
'
\
udc80
'
,
'strict'
,
b'
\
xed
\
xb2
\
x80
'
))
for
text
,
errors
,
expected
in
tests
:
if
expected
is
not
None
:
try
:
...
...
@@ -879,17 +869,10 @@ class CP65001Test(ReadTest, unittest.TestCase):
(
b'[
\
xff
]'
,
'ignore'
,
'[]'
),
(
b'[
\
xff
]'
,
'replace'
,
'[
\
ufffd
]'
),
(
b'[
\
xff
]'
,
'surrogateescape'
,
'[
\
udcff
]'
),
(
b'[
\
xed
\
xb2
\
x80
]'
,
'strict'
,
None
),
(
b'[
\
xed
\
xb2
\
x80
]'
,
'ignore'
,
'[]'
),
(
b'[
\
xed
\
xb2
\
x80
]'
,
'replace'
,
'[
\
ufffd
\
ufffd
\
ufffd
]'
),
]
if
VISTA_OR_LATER
:
tests
.
extend
((
(
b'[
\
xed
\
xb2
\
x80
]'
,
'strict'
,
None
),
(
b'[
\
xed
\
xb2
\
x80
]'
,
'ignore'
,
'[]'
),
(
b'[
\
xed
\
xb2
\
x80
]'
,
'replace'
,
'[
\
ufffd
\
ufffd
\
ufffd
]'
),
))
else
:
tests
.
extend
((
(
b'[
\
xed
\
xb2
\
x80
]'
,
'strict'
,
'[
\
udc80
]'
),
))
for
raw
,
errors
,
expected
in
tests
:
if
expected
is
not
None
:
try
:
...
...
@@ -904,7 +887,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
self
.
assertRaises
(
UnicodeDecodeError
,
raw
.
decode
,
'cp65001'
,
errors
)
@
unittest
.
skipUnless
(
VISTA_OR_LATER
,
'require Windows Vista or later'
)
def
test_lone_surrogates
(
self
):
self
.
assertRaises
(
UnicodeEncodeError
,
"
\
ud800
"
.
encode
,
"cp65001"
)
self
.
assertRaises
(
UnicodeDecodeError
,
b"
\
xed
\
xa0
\
x80
"
.
decode
,
"cp65001"
)
...
...
@@ -921,7 +903,6 @@ class CP65001Test(ReadTest, unittest.TestCase):
self
.
assertEqual
(
"[
\
uDC80
]"
.
encode
(
"cp65001"
,
"replace"
),
b'[?]'
)
@
unittest
.
skipUnless
(
VISTA_OR_LATER
,
'require Windows Vista or later'
)
def
test_surrogatepass_handler
(
self
):
self
.
assertEqual
(
"abc
\
ud800
def"
.
encode
(
"cp65001"
,
"surrogatepass"
),
b"abc
\
xed
\
xa0
\
x80
def"
)
...
...
@@ -1951,6 +1932,8 @@ all_unicode_encodings = [
if
hasattr
(
codecs
,
"mbcs_encode"
):
all_unicode_encodings
.
append
(
"mbcs"
)
if
hasattr
(
codecs
,
"oem_encode"
):
all_unicode_encodings
.
append
(
"oem"
)
# The following encoding is not tested, because it's not supposed
# to work:
...
...
@@ -3119,11 +3102,10 @@ class CodePageTest(unittest.TestCase):
(b'
\
xff
\
xf4
\
x8f
\
xbf
\
xbf
', 'ignore', '
\
U0010ffff
'),
(b'
\
xff
\
xf4
\
x8f
\
xbf
\
xbf
', 'replace', '
\
ufffd
\
U0010ffff
'),
))
if VISTA_OR_LATER:
self.check_encode(self.CP_UTF8, (
('[
\
U0010ffff
\
uDC80
]', 'ignore', b'[
\
xf4
\
x8f
\
xbf
\
xbf
]'),
('[
\
U0010ffff
\
uDC80
]', 'replace', b'[
\
xf4
\
x8f
\
xbf
\
xbf
?]'),
))
self.check_encode(self.CP_UTF8, (
('[
\
U0010ffff
\
uDC80
]', 'ignore', b'[
\
xf4
\
x8f
\
xbf
\
xbf
]'),
('[
\
U0010ffff
\
uDC80
]', 'replace', b'[
\
xf4
\
x8f
\
xbf
\
xbf
?]'),
))
def test_incremental(self):
decoded = codecs.code_page_decode(932, b'
\
x82
', 'strict', False)
...
...
@@ -3144,6 +3126,20 @@ class CodePageTest(unittest.TestCase):
False)
self.assertEqual(decoded, ('abc', 3))
def test_mbcs_alias(self):
# Check that looking up our 'default' codepage will return
# mbcs when we don't have a more specific one available
import _bootlocale
def _get_fake_codepage(*a):
return 'cp123'
old_getpreferredencoding = _bootlocale.getpreferredencoding
_bootlocale.getpreferredencoding = _get_fake_codepage
try:
codec = codecs.lookup('cp123')
self.assertEqual(codec.name, 'mbcs')
finally:
_bootlocale.getpreferredencoding = old_getpreferredencoding
class ASCIITest(unittest.TestCase):
def test_encode(self):
...
...
Modules/_codecsmodule.c
View file @
f5aba584
...
...
@@ -625,6 +625,25 @@ _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
return
codec_tuple
(
decoded
,
consumed
);
}
/*[clinic input]
_codecs.oem_decode
data: Py_buffer
errors: str(accept={str, NoneType}) = NULL
final: int(c_default="0") = False
/
[clinic start generated code]*/
static
PyObject
*
_codecs_oem_decode_impl
(
PyObject
*
module
,
Py_buffer
*
data
,
const
char
*
errors
,
int
final
)
/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/
{
Py_ssize_t
consumed
=
data
->
len
;
PyObject
*
decoded
=
PyUnicode_DecodeCodePageStateful
(
CP_OEMCP
,
data
->
buf
,
data
->
len
,
errors
,
final
?
NULL
:
&
consumed
);
return
codec_tuple
(
decoded
,
consumed
);
}
/*[clinic input]
_codecs.code_page_decode
codepage: int
...
...
@@ -970,6 +989,21 @@ _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
PyUnicode_GET_LENGTH
(
str
));
}
/*[clinic input]
_codecs.oem_encode
str: unicode
errors: str(accept={str, NoneType}) = NULL
/
[clinic start generated code]*/
static
PyObject
*
_codecs_oem_encode_impl
(
PyObject
*
module
,
PyObject
*
str
,
const
char
*
errors
)
/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/
{
return
codec_tuple
(
PyUnicode_EncodeCodePage
(
CP_OEMCP
,
str
,
errors
),
PyUnicode_GET_LENGTH
(
str
));
}
/*[clinic input]
_codecs.code_page_encode
code_page: int
...
...
@@ -1075,6 +1109,8 @@ static PyMethodDef _codecs_functions[] = {
_CODECS_READBUFFER_ENCODE_METHODDEF
_CODECS_MBCS_ENCODE_METHODDEF
_CODECS_MBCS_DECODE_METHODDEF
_CODECS_OEM_ENCODE_METHODDEF
_CODECS_OEM_DECODE_METHODDEF
_CODECS_CODE_PAGE_ENCODE_METHODDEF
_CODECS_CODE_PAGE_DECODE_METHODDEF
_CODECS_REGISTER_ERROR_METHODDEF
...
...
Modules/clinic/_codecsmodule.c.h
View file @
f5aba584
...
...
@@ -805,6 +805,45 @@ exit:
#if defined(HAVE_MBCS)
PyDoc_STRVAR
(
_codecs_oem_decode__doc__
,
"oem_decode($module, data, errors=None, final=False, /)
\n
"
"--
\n
"
"
\n
"
);
#define _CODECS_OEM_DECODE_METHODDEF \
{"oem_decode", (PyCFunction)_codecs_oem_decode, METH_VARARGS, _codecs_oem_decode__doc__},
static
PyObject
*
_codecs_oem_decode_impl
(
PyObject
*
module
,
Py_buffer
*
data
,
const
char
*
errors
,
int
final
);
static
PyObject
*
_codecs_oem_decode
(
PyObject
*
module
,
PyObject
*
args
)
{
PyObject
*
return_value
=
NULL
;
Py_buffer
data
=
{
NULL
,
NULL
};
const
char
*
errors
=
NULL
;
int
final
=
0
;
if
(
!
PyArg_ParseTuple
(
args
,
"y*|zi:oem_decode"
,
&
data
,
&
errors
,
&
final
))
{
goto
exit
;
}
return_value
=
_codecs_oem_decode_impl
(
module
,
&
data
,
errors
,
final
);
exit:
/* Cleanup for data */
if
(
data
.
obj
)
{
PyBuffer_Release
(
&
data
);
}
return
return_value
;
}
#endif
/* defined(HAVE_MBCS) */
#if defined(HAVE_MBCS)
PyDoc_STRVAR
(
_codecs_code_page_decode__doc__
,
"code_page_decode($module, codepage, data, errors=None, final=False, /)
\n
"
"--
\n
"
...
...
@@ -1346,6 +1385,38 @@ exit:
#if defined(HAVE_MBCS)
PyDoc_STRVAR
(
_codecs_oem_encode__doc__
,
"oem_encode($module, str, errors=None, /)
\n
"
"--
\n
"
"
\n
"
);
#define _CODECS_OEM_ENCODE_METHODDEF \
{"oem_encode", (PyCFunction)_codecs_oem_encode, METH_VARARGS, _codecs_oem_encode__doc__},
static
PyObject
*
_codecs_oem_encode_impl
(
PyObject
*
module
,
PyObject
*
str
,
const
char
*
errors
);
static
PyObject
*
_codecs_oem_encode
(
PyObject
*
module
,
PyObject
*
args
)
{
PyObject
*
return_value
=
NULL
;
PyObject
*
str
;
const
char
*
errors
=
NULL
;
if
(
!
PyArg_ParseTuple
(
args
,
"U|z:oem_encode"
,
&
str
,
&
errors
))
{
goto
exit
;
}
return_value
=
_codecs_oem_encode_impl
(
module
,
str
,
errors
);
exit:
return
return_value
;
}
#endif
/* defined(HAVE_MBCS) */
#if defined(HAVE_MBCS)
PyDoc_STRVAR
(
_codecs_code_page_encode__doc__
,
"code_page_encode($module, code_page, str, errors=None, /)
\n
"
"--
\n
"
...
...
@@ -1446,6 +1517,10 @@ exit:
#define _CODECS_MBCS_DECODE_METHODDEF
#endif
/* !defined(_CODECS_MBCS_DECODE_METHODDEF) */
#ifndef _CODECS_OEM_DECODE_METHODDEF
#define _CODECS_OEM_DECODE_METHODDEF
#endif
/* !defined(_CODECS_OEM_DECODE_METHODDEF) */
#ifndef _CODECS_CODE_PAGE_DECODE_METHODDEF
#define _CODECS_CODE_PAGE_DECODE_METHODDEF
#endif
/* !defined(_CODECS_CODE_PAGE_DECODE_METHODDEF) */
...
...
@@ -1454,7 +1529,11 @@ exit:
#define _CODECS_MBCS_ENCODE_METHODDEF
#endif
/* !defined(_CODECS_MBCS_ENCODE_METHODDEF) */
#ifndef _CODECS_OEM_ENCODE_METHODDEF
#define _CODECS_OEM_ENCODE_METHODDEF
#endif
/* !defined(_CODECS_OEM_ENCODE_METHODDEF) */
#ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
#define _CODECS_CODE_PAGE_ENCODE_METHODDEF
#endif
/* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
/*[clinic end generated code: output=
0221e4eece62c905
input=a9049054013a1b77]*/
/*[clinic end generated code: output=
7874e2d559d49368
input=a9049054013a1b77]*/
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment