Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
35d18782
Commit
35d18782
authored
Oct 07, 2011
by
Barry Warsaw
Browse files
Options
Browse Files
Download
Plain Diff
Trunk merge
parents
cb9c5ba7
532c363a
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
86 additions
and
125 deletions
+86
-125
Doc/c-api/unicode.rst
Doc/c-api/unicode.rst
+3
-21
Include/unicodeobject.h
Include/unicodeobject.h
+10
-25
Lib/test/test_pkgutil.py
Lib/test/test_pkgutil.py
+2
-2
Modules/_io/textio.c
Modules/_io/textio.c
+20
-24
Modules/_json.c
Modules/_json.c
+2
-2
Modules/_sre.c
Modules/_sre.c
+1
-1
Objects/stringlib/eq.h
Objects/stringlib/eq.h
+1
-1
Objects/unicodeobject.c
Objects/unicodeobject.c
+45
-47
Python/formatter_unicode.c
Python/formatter_unicode.c
+2
-2
No files found.
Doc/c-api/unicode.rst
View file @
35d18782
...
...
@@ -99,7 +99,7 @@ access internal read-only data of Unicode objects:
.. deprecated-removed:: 3.3 4.0
Part of the old-style Unicode API, please migrate to using
:c:func:`PyUnicode_GET_LENGTH`
or :c:func:`PyUnicode_KIND_SIZE`
.
:c:func:`PyUnicode_GET_LENGTH`.
.. c:function:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o)
...
...
@@ -149,9 +149,8 @@ access internal read-only data of Unicode objects:
Return a pointer to the canonical representation cast to UCS1, UCS2 or UCS4
integer types for direct character access. No checks are performed if the
canonical representation has the correct character size; use
:c:func:`PyUnicode_CHARACTER_SIZE` or :c:func:`PyUnicode_KIND` to select the
right macro. Make sure :c:func:`PyUnicode_READY` has been called before
accessing this.
:c:func:`PyUnicode_KIND` to select the right macro. Make sure
:c:func:`PyUnicode_READY` has been called before accessing this.
.. versionadded:: 3.3
...
...
@@ -176,15 +175,6 @@ access internal read-only data of Unicode objects:
.. versionadded:: 3.3
.. c:function:: int PyUnicode_CHARACTER_SIZE(PyObject *o)
Return the number of bytes the string uses to represent single characters;
this can be 1, 2 or 4. *o* has to be a Unicode object in the "canonical"
representation (not checked).
.. versionadded:: 3.3
.. c:function:: void* PyUnicode_DATA(PyObject *o)
Return a void pointer to the raw unicode buffer. *o* has to be a Unicode
...
...
@@ -193,14 +183,6 @@ access internal read-only data of Unicode objects:
.. versionadded:: 3.3
.. c:function:: int PyUnicode_KIND_SIZE(int kind, Py_ssize_t index)
Compute ``index * char_size`` where ``char_size`` is ``2**(kind - 1)``. The
index is a character index, the result is a size in bytes.
.. versionadded:: 3.3
.. c:function:: void PyUnicode_WRITE(int kind, void *data, Py_ssize_t index, \
Py_UCS4 value)
...
...
Include/unicodeobject.h
View file @
35d18782
...
...
@@ -305,12 +305,12 @@ typedef struct {
* character type = Py_UCS2 (16 bits, unsigned)
* at least one character must be in range U+0100-U+FFFF
- PyUnicode_4BYTE_KIND (
3
):
- PyUnicode_4BYTE_KIND (
4
):
* character type = Py_UCS4 (32 bits, unsigned)
* at least one character must be in range U+10000-U+10FFFF
*/
unsigned
int
kind
:
2
;
unsigned
int
kind
:
3
;
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
...
...
@@ -424,29 +424,21 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
#define PyUnicode_IS_COMPACT_ASCII(op) \
(PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op))
enum
PyUnicode_Kind
{
/* String contains only wstr byte characters. This is only possible
when the string was created with a legacy API and _PyUnicode_Ready()
has not been called yet. */
#define PyUnicode_WCHAR_KIND 0
PyUnicode_WCHAR_KIND
=
0
,
/* Return values of the PyUnicode_KIND() macro: */
#define PyUnicode_1BYTE_KIND 1
#define PyUnicode_2BYTE_KIND 2
#define PyUnicode_4BYTE_KIND 3
/* Return the number of bytes the string uses to represent single characters,
this can be 1, 2 or 4.
See also PyUnicode_KIND_SIZE(). */
#define PyUnicode_CHARACTER_SIZE(op) \
(((Py_ssize_t)1 << (PyUnicode_KIND(op) - 1)))
PyUnicode_1BYTE_KIND
=
1
,
PyUnicode_2BYTE_KIND
=
2
,
PyUnicode_4BYTE_KIND
=
4
};
/* Return pointers to the canonical representation cast to unsigned char,
Py_UCS2, or Py_UCS4 for direct character access.
No checks are performed, use PyUnicode_
CHARACTER_SIZE or
PyUnicode_KIND() before to ensure
these will work correctly. */
No checks are performed, use PyUnicode_
KIND() before to ensure
these will work correctly. */
#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
...
...
@@ -473,13 +465,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
_PyUnicode_NONCOMPACT_DATA(op))
/* Compute (index * char_size) where char_size is 2 ** (kind - 1).
The index is a character index, the result is a size in bytes.
See also PyUnicode_CHARACTER_SIZE(). */
#define PyUnicode_KIND_SIZE(kind, index) \
(((Py_ssize_t)(index)) << ((kind) - 1))
/* In the access macros below, "kind" may be evaluated more than once.
All other macro parameters are evaluated exactly once, so it is safe
to put side effects into them (such as increasing the index). */
...
...
Lib/test/test_pkgutil.py
View file @
35d18782
...
...
@@ -15,11 +15,11 @@ class PkgutilTests(unittest.TestCase):
def
setUp
(
self
):
self
.
dirname
=
tempfile
.
mkdtemp
()
self
.
addCleanup
(
shutil
.
rmtree
,
self
.
dirname
)
sys
.
path
.
insert
(
0
,
self
.
dirname
)
def
tearDown
(
self
):
del
sys
.
path
[
0
]
shutil
.
rmtree
(
self
.
dirname
)
def
test_getdata_filesys
(
self
):
pkg
=
'test_getdata_filesys'
...
...
@@ -91,9 +91,9 @@ class PkgutilTests(unittest.TestCase):
# this does not appear to create an unreadable dir on Windows
# but the test should not fail anyway
os
.
mkdir
(
d
,
0
)
self
.
addCleanup
(
os
.
rmdir
,
d
)
for
t
in
pkgutil
.
walk_packages
(
path
=
[
self
.
dirname
]):
self
.
fail
(
"unexpected package found"
)
os
.
rmdir
(
d
)
class
PkgutilPEP302Tests
(
unittest
.
TestCase
):
...
...
Modules/_io/textio.c
View file @
35d18782
...
...
@@ -291,9 +291,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
kind
=
PyUnicode_KIND
(
modified
);
out
=
PyUnicode_DATA
(
modified
);
PyUnicode_WRITE
(
kind
,
PyUnicode_DATA
(
modified
),
0
,
'\r'
);
memcpy
(
out
+
PyUnicode_KIND_SIZE
(
kind
,
1
),
PyUnicode_DATA
(
output
),
PyUnicode_KIND_SIZE
(
kind
,
output_len
));
memcpy
(
out
+
kind
,
PyUnicode_DATA
(
output
),
kind
*
output_len
);
Py_DECREF
(
output
);
output
=
modified
;
/* output remains ready */
self
->
pendingcr
=
0
;
...
...
@@ -336,7 +334,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
for the \r *byte* with the libc's optimized memchr.
*/
if
(
seennl
==
SEEN_LF
||
seennl
==
0
)
{
only_lf
=
(
memchr
(
in_str
,
'\r'
,
PyUnicode_KIND_SIZE
(
kind
,
len
)
)
==
NULL
);
only_lf
=
(
memchr
(
in_str
,
'\r'
,
kind
*
len
)
==
NULL
);
}
if
(
only_lf
)
{
...
...
@@ -344,7 +342,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
(there's nothing else to be done, even when in translation mode)
*/
if
(
seennl
==
0
&&
memchr
(
in_str
,
'\n'
,
PyUnicode_KIND_SIZE
(
kind
,
len
)
)
!=
NULL
)
{
memchr
(
in_str
,
'\n'
,
kind
*
len
)
!=
NULL
)
{
Py_ssize_t
i
=
0
;
for
(;;)
{
Py_UCS4
c
;
...
...
@@ -403,7 +401,7 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
when there is something to translate. On the other hand,
we already know there is a \r byte, so chances are high
that something needs to be done. */
translated
=
PyMem_Malloc
(
PyUnicode_KIND_SIZE
(
kind
,
len
)
);
translated
=
PyMem_Malloc
(
kind
*
len
);
if
(
translated
==
NULL
)
{
PyErr_NoMemory
();
goto
error
;
...
...
@@ -1576,15 +1574,14 @@ textiowrapper_read(textio *self, PyObject *args)
static
char
*
find_control_char
(
int
kind
,
char
*
s
,
char
*
end
,
Py_UCS4
ch
)
{
int
size
=
PyUnicode_KIND_SIZE
(
kind
,
1
);
for
(;;)
{
while
(
PyUnicode_READ
(
kind
,
s
,
0
)
>
ch
)
s
+=
size
;
s
+=
kind
;
if
(
PyUnicode_READ
(
kind
,
s
,
0
)
==
ch
)
return
s
;
if
(
s
==
end
)
return
NULL
;
s
+=
size
;
s
+=
kind
;
}
}
...
...
@@ -1593,14 +1590,13 @@ _PyIO_find_line_ending(
int
translated
,
int
universal
,
PyObject
*
readnl
,
int
kind
,
char
*
start
,
char
*
end
,
Py_ssize_t
*
consumed
)
{
int
size
=
PyUnicode_KIND_SIZE
(
kind
,
1
);
Py_ssize_t
len
=
((
char
*
)
end
-
(
char
*
)
start
)
/
size
;
Py_ssize_t
len
=
((
char
*
)
end
-
(
char
*
)
start
)
/
kind
;
if
(
translated
)
{
/* Newlines are already translated, only search for \n */
char
*
pos
=
find_control_char
(
kind
,
start
,
end
,
'\n'
);
if
(
pos
!=
NULL
)
return
(
pos
-
start
)
/
size
+
1
;
return
(
pos
-
start
)
/
kind
+
1
;
else
{
*
consumed
=
len
;
return
-
1
;
...
...
@@ -1616,20 +1612,20 @@ _PyIO_find_line_ending(
/* Fast path for non-control chars. The loop always ends
since the Unicode string is NUL-terminated. */
while
(
PyUnicode_READ
(
kind
,
s
,
0
)
>
'\r'
)
s
+=
size
;
s
+=
kind
;
if
(
s
>=
end
)
{
*
consumed
=
len
;
return
-
1
;
}
ch
=
PyUnicode_READ
(
kind
,
s
,
0
);
s
+=
size
;
s
+=
kind
;
if
(
ch
==
'\n'
)
return
(
s
-
start
)
/
size
;
return
(
s
-
start
)
/
kind
;
if
(
ch
==
'\r'
)
{
if
(
PyUnicode_READ
(
kind
,
s
,
0
)
==
'\n'
)
return
(
s
-
start
)
/
size
+
1
;
return
(
s
-
start
)
/
kind
+
1
;
else
return
(
s
-
start
)
/
size
;
return
(
s
-
start
)
/
kind
;
}
}
}
...
...
@@ -1642,13 +1638,13 @@ _PyIO_find_line_ending(
if
(
readnl_len
==
1
)
{
char
*
pos
=
find_control_char
(
kind
,
start
,
end
,
nl
[
0
]);
if
(
pos
!=
NULL
)
return
(
pos
-
start
)
/
size
+
1
;
return
(
pos
-
start
)
/
kind
+
1
;
*
consumed
=
len
;
return
-
1
;
}
else
{
char
*
s
=
start
;
char
*
e
=
end
-
(
readnl_len
-
1
)
*
size
;
char
*
e
=
end
-
(
readnl_len
-
1
)
*
kind
;
char
*
pos
;
if
(
e
<
s
)
e
=
s
;
...
...
@@ -1662,14 +1658,14 @@ _PyIO_find_line_ending(
break
;
}
if
(
i
==
readnl_len
)
return
(
pos
-
start
)
/
size
+
readnl_len
;
s
=
pos
+
size
;
return
(
pos
-
start
)
/
kind
+
readnl_len
;
s
=
pos
+
kind
;
}
pos
=
find_control_char
(
kind
,
e
,
end
,
nl
[
0
]);
if
(
pos
==
NULL
)
*
consumed
=
len
;
else
*
consumed
=
(
pos
-
start
)
/
size
;
*
consumed
=
(
pos
-
start
)
/
kind
;
return
-
1
;
}
}
...
...
@@ -1738,8 +1734,8 @@ _textiowrapper_readline(textio *self, Py_ssize_t limit)
endpos
=
_PyIO_find_line_ending
(
self
->
readtranslate
,
self
->
readuniversal
,
self
->
readnl
,
kind
,
ptr
+
PyUnicode_KIND_SIZE
(
kind
,
start
)
,
ptr
+
PyUnicode_KIND_SIZE
(
kind
,
line_len
)
,
ptr
+
kind
*
start
,
ptr
+
kind
*
line_len
,
&
consumed
);
if
(
endpos
>=
0
)
{
endpos
+=
start
;
...
...
Modules/_json.c
View file @
35d18782
...
...
@@ -365,7 +365,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
APPEND_OLD_CHUNK
chunk
=
PyUnicode_FromKindAndData
(
kind
,
(
char
*
)
buf
+
PyUnicode_KIND_SIZE
(
kind
,
end
)
,
(
char
*
)
buf
+
kind
*
end
,
next
-
end
);
if
(
chunk
==
NULL
)
{
goto
bail
;
...
...
@@ -931,7 +931,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
if
(
custom_func
)
{
/* copy the section we determined to be a number */
numstr
=
PyUnicode_FromKindAndData
(
kind
,
(
char
*
)
str
+
PyUnicode_KIND_SIZE
(
kind
,
start
)
,
(
char
*
)
str
+
kind
*
start
,
idx
-
start
);
if
(
numstr
==
NULL
)
return
NULL
;
...
...
Modules/_sre.c
View file @
35d18782
...
...
@@ -1669,7 +1669,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
return
NULL
;
ptr
=
PyUnicode_DATA
(
string
);
*
p_length
=
PyUnicode_GET_LENGTH
(
string
);
*
p_charsize
=
PyUnicode_
CHARACTER_SIZE
(
string
);
*
p_charsize
=
PyUnicode_
KIND
(
string
);
*
p_logical_charsize
=
4
;
return
ptr
;
}
...
...
Objects/stringlib/eq.h
View file @
35d18782
...
...
@@ -30,5 +30,5 @@ unicode_eq(PyObject *aa, PyObject *bb)
PyUnicode_GET_LENGTH
(
a
)
==
1
)
return
1
;
return
memcmp
(
PyUnicode_1BYTE_DATA
(
a
),
PyUnicode_1BYTE_DATA
(
b
),
PyUnicode_GET_LENGTH
(
a
)
*
PyUnicode_
CHARACTER_SIZE
(
a
))
==
0
;
PyUnicode_GET_LENGTH
(
a
)
*
PyUnicode_
KIND
(
a
))
==
0
;
}
Objects/unicodeobject.c
View file @
35d18782
This diff is collapsed.
Click to expand it.
Python/formatter_unicode.c
View file @
35d18782
...
...
@@ -604,9 +604,9 @@ fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec,
#endif
_PyUnicode_InsertThousandsGrouping
(
out
,
kind
,
(
char
*
)
data
+
PyUnicode_KIND_SIZE
(
kind
,
pos
)
,
(
char
*
)
data
+
kind
*
pos
,
spec
->
n_grouped_digits
,
pdigits
+
PyUnicode_KIND_SIZE
(
kind
,
d_pos
)
,
pdigits
+
kind
*
d_pos
,
spec
->
n_digits
,
spec
->
n_min_width
,
locale
->
grouping
,
locale
->
thousands_sep
);
#ifndef NDEBUG
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment