Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
d9488c68
Commit
d9488c68
authored
Oct 04, 2011
by
Antoine Pitrou
Browse files
Options
Browse Files
Download
Plain Diff
Merge
parents
2242522f
a9860aeb
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
15 additions
and
38 deletions
+15
-38
Lib/sre_compile.py
Lib/sre_compile.py
+3
-1
Lib/test/test_builtin.py
Lib/test/test_builtin.py
+1
-2
Lib/test/test_codeccallbacks.py
Lib/test/test_codeccallbacks.py
+4
-12
Lib/test/test_multibytecodec.py
Lib/test/test_multibytecodec.py
+1
-6
Lib/test/test_unicode.py
Lib/test/test_unicode.py
+4
-16
Tools/pybench/pybench.py
Tools/pybench/pybench.py
+1
-0
Tools/unicode/comparecodecs.py
Tools/unicode/comparecodecs.py
+1
-1
No files found.
Lib/sre_compile.py
View file @
d9488c68
...
@@ -318,11 +318,13 @@ def _optimize_unicode(charset, fixup):
...
@@ -318,11 +318,13 @@ def _optimize_unicode(charset, fixup):
# XXX: could expand category
# XXX: could expand category
return
charset
# cannot compress
return
charset
# cannot compress
except
IndexError
:
except
IndexError
:
# non-BMP characters
# non-BMP characters
; XXX now they should work
return
charset
return
charset
if
negate
:
if
negate
:
if
sys
.
maxunicode
!=
65535
:
if
sys
.
maxunicode
!=
65535
:
# XXX: negation does not work with big charsets
# XXX: negation does not work with big charsets
# XXX2: now they should work, but removing this will make the
# charmap 17 times bigger
return
charset
return
charset
for
i
in
range
(
65536
):
for
i
in
range
(
65536
):
charmap
[
i
]
=
not
charmap
[
i
]
charmap
[
i
]
=
not
charmap
[
i
]
...
...
Lib/test/test_builtin.py
View file @
d9488c68
...
@@ -249,8 +249,7 @@ class BuiltinTest(unittest.TestCase):
...
@@ -249,8 +249,7 @@ class BuiltinTest(unittest.TestCase):
self
.
assertEqual
(
chr
(
0xff
),
'
\
xff
'
)
self
.
assertEqual
(
chr
(
0xff
),
'
\
xff
'
)
self
.
assertRaises
(
ValueError
,
chr
,
1
<<
24
)
self
.
assertRaises
(
ValueError
,
chr
,
1
<<
24
)
self
.
assertEqual
(
chr
(
sys
.
maxunicode
),
self
.
assertEqual
(
chr
(
sys
.
maxunicode
),
str
((
'
\
\
U%08x'
%
(
sys
.
maxunicode
)).
encode
(
"ascii"
),
str
(
'
\
\
U0010ffff'
.
encode
(
"ascii"
),
'unicode-escape'
))
'unicode-escape'
))
self
.
assertRaises
(
TypeError
,
chr
)
self
.
assertRaises
(
TypeError
,
chr
)
self
.
assertEqual
(
chr
(
0x0000FFFF
),
"
\
U0000FFFF
"
)
self
.
assertEqual
(
chr
(
0x0000FFFF
),
"
\
U0000FFFF
"
)
self
.
assertEqual
(
chr
(
0x00010000
),
"
\
U00010000
"
)
self
.
assertEqual
(
chr
(
0x00010000
),
"
\
U00010000
"
)
...
...
Lib/test/test_codeccallbacks.py
View file @
d9488c68
...
@@ -138,22 +138,14 @@ class CodecCallbackTest(unittest.TestCase):
...
@@ -138,22 +138,14 @@ class CodecCallbackTest(unittest.TestCase):
def
test_backslashescape
(
self
):
def
test_backslashescape
(
self
):
# Does the same as the "unicode-escape" encoding, but with different
# Does the same as the "unicode-escape" encoding, but with different
# base encodings.
# base encodings.
sin
=
"a
\
xac
\
u1234
\
u20ac
\
u8000
"
sin
=
"a
\
xac
\
u1234
\
u20ac
\
u8000
\
U0010ffff
"
if
sys
.
maxunicode
>
0xffff
:
sout
=
b"a
\
\
xac
\
\
u1234
\
\
u20ac
\
\
u8000
\
\
U0010ffff"
sin
+=
chr
(
sys
.
maxunicode
)
sout
=
b"a
\
\
xac
\
\
u1234
\
\
u20ac
\
\
u8000"
if
sys
.
maxunicode
>
0xffff
:
sout
+=
bytes
(
"
\
\
U%08x"
%
sys
.
maxunicode
,
"ascii"
)
self
.
assertEqual
(
sin
.
encode
(
"ascii"
,
"backslashreplace"
),
sout
)
self
.
assertEqual
(
sin
.
encode
(
"ascii"
,
"backslashreplace"
),
sout
)
sout
=
b"a
\
xac
\
\
u1234
\
\
u20ac
\
\
u8000"
sout
=
b"a
\
xac
\
\
u1234
\
\
u20ac
\
\
u8000
\
\
U0010ffff"
if
sys
.
maxunicode
>
0xffff
:
sout
+=
bytes
(
"
\
\
U%08x"
%
sys
.
maxunicode
,
"ascii"
)
self
.
assertEqual
(
sin
.
encode
(
"latin-1"
,
"backslashreplace"
),
sout
)
self
.
assertEqual
(
sin
.
encode
(
"latin-1"
,
"backslashreplace"
),
sout
)
sout
=
b"a
\
xac
\
\
u1234
\
xa4
\
\
u8000"
sout
=
b"a
\
xac
\
\
u1234
\
xa4
\
\
u8000
\
\
U0010ffff"
if
sys
.
maxunicode
>
0xffff
:
sout
+=
bytes
(
"
\
\
U%08x"
%
sys
.
maxunicode
,
"ascii"
)
self
.
assertEqual
(
sin
.
encode
(
"iso-8859-15"
,
"backslashreplace"
),
sout
)
self
.
assertEqual
(
sin
.
encode
(
"iso-8859-15"
,
"backslashreplace"
),
sout
)
def
test_decoding_callbacks
(
self
):
def
test_decoding_callbacks
(
self
):
...
...
Lib/test/test_multibytecodec.py
View file @
d9488c68
...
@@ -247,14 +247,9 @@ class Test_ISO2022(unittest.TestCase):
...
@@ -247,14 +247,9 @@ class Test_ISO2022(unittest.TestCase):
self
.
assertFalse
(
any
(
x
>
0x80
for
x
in
e
))
self
.
assertFalse
(
any
(
x
>
0x80
for
x
in
e
))
def
test_bug1572832
(
self
):
def
test_bug1572832
(
self
):
if
sys
.
maxunicode
>=
0x10000
:
myunichr
=
chr
else
:
myunichr
=
lambda
x
:
chr
(
0xD7C0
+
(
x
>>
10
))
+
chr
(
0xDC00
+
(
x
&
0x3FF
))
for
x
in
range
(
0x10000
,
0x110000
):
for
x
in
range
(
0x10000
,
0x110000
):
# Any ISO 2022 codec will cause the segfault
# Any ISO 2022 codec will cause the segfault
myuni
chr
(
x
).
encode
(
'iso_2022_jp'
,
'ignore'
)
chr
(
x
).
encode
(
'iso_2022_jp'
,
'ignore'
)
class
TestStateful
(
unittest
.
TestCase
):
class
TestStateful
(
unittest
.
TestCase
):
text
=
'
\
u4E16
\
u4E16
'
text
=
'
\
u4E16
\
u4E16
'
...
...
Lib/test/test_unicode.py
View file @
d9488c68
...
@@ -13,10 +13,6 @@ import warnings
...
@@ -13,10 +13,6 @@ import warnings
from
test
import
support
,
string_tests
from
test
import
support
,
string_tests
import
_string
import
_string
# decorator to skip tests on narrow builds
requires_wide_build
=
unittest
.
skipIf
(
sys
.
maxunicode
==
65535
,
'requires wide build'
)
# Error handling (bad decoder return)
# Error handling (bad decoder return)
def
search_function
(
encoding
):
def
search_function
(
encoding
):
def
decode1
(
input
,
errors
=
"strict"
):
def
decode1
(
input
,
errors
=
"strict"
):
...
@@ -519,7 +515,6 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -519,7 +515,6 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertFalse
(
meth
(
s
),
'%a.%s() is False'
%
(
s
,
meth_name
))
self
.
assertFalse
(
meth
(
s
),
'%a.%s() is False'
%
(
s
,
meth_name
))
@
requires_wide_build
def
test_lower
(
self
):
def
test_lower
(
self
):
string_tests
.
CommonTest
.
test_lower
(
self
)
string_tests
.
CommonTest
.
test_lower
(
self
)
self
.
assertEqual
(
'
\
U00010427
'
.
lower
(),
'
\
U0001044F
'
)
self
.
assertEqual
(
'
\
U00010427
'
.
lower
(),
'
\
U0001044F
'
)
...
@@ -530,7 +525,6 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -530,7 +525,6 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
lower
(),
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
lower
(),
'x
\
U0001044F
x
\
U0001044F
'
)
'x
\
U0001044F
x
\
U0001044F
'
)
@
requires_wide_build
def
test_upper
(
self
):
def
test_upper
(
self
):
string_tests
.
CommonTest
.
test_upper
(
self
)
string_tests
.
CommonTest
.
test_upper
(
self
)
self
.
assertEqual
(
'
\
U0001044F
'
.
upper
(),
'
\
U00010427
'
)
self
.
assertEqual
(
'
\
U0001044F
'
.
upper
(),
'
\
U00010427
'
)
...
@@ -541,7 +535,6 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -541,7 +535,6 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
upper
(),
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
upper
(),
'X
\
U00010427
X
\
U00010427
'
)
'X
\
U00010427
X
\
U00010427
'
)
@
requires_wide_build
def
test_capitalize
(
self
):
def
test_capitalize
(
self
):
string_tests
.
CommonTest
.
test_capitalize
(
self
)
string_tests
.
CommonTest
.
test_capitalize
(
self
)
self
.
assertEqual
(
'
\
U0001044F
'
.
capitalize
(),
'
\
U00010427
'
)
self
.
assertEqual
(
'
\
U0001044F
'
.
capitalize
(),
'
\
U00010427
'
)
...
@@ -554,7 +547,6 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -554,7 +547,6 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
capitalize
(),
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
'
.
capitalize
(),
'X
\
U0001044F
x
\
U0001044F
'
)
'X
\
U0001044F
x
\
U0001044F
'
)
@
requires_wide_build
def
test_title
(
self
):
def
test_title
(
self
):
string_tests
.
MixinStrUnicodeUserStringTest
.
test_title
(
self
)
string_tests
.
MixinStrUnicodeUserStringTest
.
test_title
(
self
)
self
.
assertEqual
(
'
\
U0001044F
'
.
title
(),
'
\
U00010427
'
)
self
.
assertEqual
(
'
\
U0001044F
'
.
title
(),
'
\
U00010427
'
)
...
@@ -569,7 +561,6 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -569,7 +561,6 @@ class UnicodeTest(string_tests.CommonTest,
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
X
\
U00010427
x
\
U0001044F
'
.
title
(),
self
.
assertEqual
(
'X
\
U00010427
x
\
U0001044F
X
\
U00010427
x
\
U0001044F
'
.
title
(),
'X
\
U0001044F
x
\
U0001044F
X
\
U0001044F
x
\
U0001044F
'
)
'X
\
U0001044F
x
\
U0001044F
X
\
U0001044F
x
\
U0001044F
'
)
@
requires_wide_build
def
test_swapcase
(
self
):
def
test_swapcase
(
self
):
string_tests
.
CommonTest
.
test_swapcase
(
self
)
string_tests
.
CommonTest
.
test_swapcase
(
self
)
self
.
assertEqual
(
'
\
U0001044F
'
.
swapcase
(),
'
\
U00010427
'
)
self
.
assertEqual
(
'
\
U0001044F
'
.
swapcase
(),
'
\
U00010427
'
)
...
@@ -1114,15 +1105,12 @@ class UnicodeTest(string_tests.CommonTest,
...
@@ -1114,15 +1105,12 @@ class UnicodeTest(string_tests.CommonTest,
def
test_codecs_utf8
(
self
):
def
test_codecs_utf8
(
self
):
self
.
assertEqual
(
''
.
encode
(
'utf-8'
),
b''
)
self
.
assertEqual
(
''
.
encode
(
'utf-8'
),
b''
)
self
.
assertEqual
(
'
\
u20ac
'
.
encode
(
'utf-8'
),
b'
\
xe2
\
x82
\
xac
'
)
self
.
assertEqual
(
'
\
u20ac
'
.
encode
(
'utf-8'
),
b'
\
xe2
\
x82
\
xac
'
)
if
sys
.
maxunicode
==
65535
:
self
.
assertEqual
(
'
\
U00010002
'
.
encode
(
'utf-8'
),
b'
\
xf0
\
x90
\
x80
\
x82
'
)
self
.
assertEqual
(
'
\
ud800
\
udc02
'
.
encode
(
'utf-8'
),
b'
\
xf0
\
x90
\
x80
\
x82
'
)
self
.
assertEqual
(
'
\
U00023456
'
.
encode
(
'utf-8'
),
b'
\
xf0
\
xa3
\
x91
\
x96
'
)
self
.
assertEqual
(
'
\
ud84d
\
udc56
'
.
encode
(
'utf-8'
),
b'
\
xf0
\
xa3
\
x91
\
x96
'
)
self
.
assertEqual
(
'
\
ud800
'
.
encode
(
'utf-8'
,
'surrogatepass'
),
b'
\
xed
\
xa0
\
x80
'
)
self
.
assertEqual
(
'
\
ud800
'
.
encode
(
'utf-8'
,
'surrogatepass'
),
b'
\
xed
\
xa0
\
x80
'
)
self
.
assertEqual
(
'
\
udc00
'
.
encode
(
'utf-8'
,
'surrogatepass'
),
b'
\
xed
\
xb0
\
x80
'
)
self
.
assertEqual
(
'
\
udc00
'
.
encode
(
'utf-8'
,
'surrogatepass'
),
b'
\
xed
\
xb0
\
x80
'
)
if
sys
.
maxunicode
==
65535
:
self
.
assertEqual
((
'
\
U00010002
'
*
10
).
encode
(
'utf-8'
),
self
.
assertEqual
(
b'
\
xf0
\
x90
\
x80
\
x82
'
*
10
)
(
'
\
ud800
\
udc02
'
*
1000
).
encode
(
'utf-8'
),
b'
\
xf0
\
x90
\
x80
\
x82
'
*
1000
)
self
.
assertEqual
(
self
.
assertEqual
(
'
\
u6b63
\
u78ba
\
u306b
\
u8a00
\
u3046
\
u3068
\
u7ffb
\
u8a33
\
u306f
'
'
\
u6b63
\
u78ba
\
u306b
\
u8a00
\
u3046
\
u3068
\
u7ffb
\
u8a33
\
u306f
'
'
\
u3055
\
u308c
\
u3066
\
u3044
\
u307e
\
u305b
\
u3093
\
u3002
\
u4e00
'
'
\
u3055
\
u308c
\
u3066
\
u3044
\
u307e
\
u305b
\
u3093
\
u3002
\
u4e00
'
...
...
Tools/pybench/pybench.py
View file @
d9488c68
...
@@ -107,6 +107,7 @@ def get_machine_details():
...
@@ -107,6 +107,7 @@ def get_machine_details():
print
(
'Getting machine details...'
)
print
(
'Getting machine details...'
)
buildno
,
builddate
=
platform
.
python_build
()
buildno
,
builddate
=
platform
.
python_build
()
python
=
platform
.
python_version
()
python
=
platform
.
python_version
()
# XXX this is now always UCS4, maybe replace it with 'PEP393' in 3.3+?
if
sys
.
maxunicode
==
65535
:
if
sys
.
maxunicode
==
65535
:
# UCS2 build (standard)
# UCS2 build (standard)
unitype
=
'UCS2'
unitype
=
'UCS2'
...
...
Tools/unicode/comparecodecs.py
View file @
d9488c68
...
@@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2):
...
@@ -14,7 +14,7 @@ def compare_codecs(encoding1, encoding2):
print
(
'Comparing encoding/decoding of %r and %r'
%
(
encoding1
,
encoding2
))
print
(
'Comparing encoding/decoding of %r and %r'
%
(
encoding1
,
encoding2
))
mismatch
=
0
mismatch
=
0
# Check encoding
# Check encoding
for
i
in
range
(
sys
.
maxunicode
):
for
i
in
range
(
sys
.
maxunicode
+
1
):
u
=
chr
(
i
)
u
=
chr
(
i
)
try
:
try
:
c1
=
u
.
encode
(
encoding1
)
c1
=
u
.
encode
(
encoding1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment