Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
9c121069
Commit
9c121069
authored
Aug 05, 2007
by
Martin v. Löwis
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change PyUnicode_FromString[AndSize] to expect UTF-8.
parent
64ce5052
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
20 deletions
+25
-20
Doc/api/concrete.tex
Doc/api/concrete.tex
+10
-3
Objects/bytesobject.c
Objects/bytesobject.c
+7
-5
Objects/unicodeobject.c
Objects/unicodeobject.c
+8
-12
No files found.
Doc/api/concrete.tex
View file @
9c121069
...
...
@@ -996,10 +996,11 @@ use these APIs:
\var
{
u
}
is
\NULL
{}
.
\end{cfuncdesc}
\begin{cfuncdesc}
{
PyObject*
}{
PyUnicode
_
FromString
}{
const char *u
}
\begin{cfuncdesc}
{
PyObject*
}{
PyUnicode
_
FromStringAndSize
}{
const char *u,
Py
_
ssize
_
t size
}
Create a Unicode Object from the char buffer
\var
{
u
}
.
\var
{
u
}
must be 0-terminated, the bytes will be interpreted as
being latin-1 encoded.
\var
{
u
}
may also be
\NULL
{}
which causes the
The bytes will be interpreted as being UTF-8 encoded.
\var
{
u
}
may also be
\NULL
{}
which causes the
contents to be undefined. It is the user's responsibility to fill
in the needed data. The buffer is copied into the new object.
If the buffer is not
\NULL
{}
, the return value might be a shared object.
...
...
@@ -1008,6 +1009,12 @@ use these APIs:
\versionadded
{
3.0
}
\end{cfuncdesc}
\begin{cfuncdesc}
{
PyObject*
}{
PyUnicode
_
FromString
}{
const char*u
}
Create a Unicode object from an UTF-8 encoded null-terminated
char buffer
\var
{
u
}
.
\versionadded
{
3.0
}
\end{funcdesc}
\begin{cfuncdesc}
{
PyObject*
}{
PyUnicode
_
FromFormat
}{
const char *format, ...
}
Take a C
\cfunction
{
printf()
}
-style
\var
{
format
}
string and a
variable number of arguments, calculate the size of the resulting
...
...
Objects/bytesobject.c
View file @
9c121069
...
...
@@ -2724,11 +2724,13 @@ PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
static
PyObject
*
bytes_reduce
(
PyBytesObject
*
self
)
{
return
Py_BuildValue
(
"(O(s#s))"
,
Py_Type
(
self
),
self
->
ob_bytes
==
NULL
?
""
:
self
->
ob_bytes
,
Py_Size
(
self
),
"latin-1"
);
PyObject
*
latin1
;
if
(
self
->
ob_bytes
)
latin1
=
PyUnicode_DecodeLatin1
(
self
->
ob_bytes
,
Py_Size
(
self
),
NULL
);
else
latin1
=
PyUnicode_FromString
(
""
);
return
Py_BuildValue
(
"(O(Ns))"
,
Py_Type
(
self
),
latin1
,
"latin-1"
);
}
static
PySequenceMethods
bytes_as_sequence
=
{
...
...
Objects/unicodeobject.c
View file @
9c121069
...
...
@@ -427,7 +427,9 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
{
PyUnicodeObject
*
unicode
;
/* If the Unicode data is known at construction time, we can apply
some optimizations which share commonly used objects. */
some optimizations which share commonly used objects.
Also, this means the input must be UTF-8, so fall back to the
UTF-8 decoder at the end. */
if
(
u
!=
NULL
)
{
/* Optimization for empty strings */
...
...
@@ -436,8 +438,9 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
return
(
PyObject
*
)
unicode_empty
;
}
/* Single characters are shared when using this constructor */
if
(
size
==
1
)
{
/* Single characters are shared when using this constructor.
Restrict to ASCII, since the input must be UTF-8. */
if
(
size
==
1
&&
Py_CHARMASK
(
*
u
)
<
128
)
{
unicode
=
unicode_latin1
[
Py_CHARMASK
(
*
u
)];
if
(
!
unicode
)
{
unicode
=
_PyUnicode_New
(
1
);
...
...
@@ -449,21 +452,14 @@ PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
Py_INCREF
(
unicode
);
return
(
PyObject
*
)
unicode
;
}
return
PyUnicode_DecodeUTF8
(
u
,
size
,
NULL
);
}
unicode
=
_PyUnicode_New
(
size
);
if
(
!
unicode
)
return
NULL
;
/* Copy the Unicode data into the new object */
if
(
u
!=
NULL
)
{
Py_UNICODE
*
p
=
unicode
->
str
;
while
(
size
--
)
*
p
++
=
Py_CHARMASK
(
*
u
++
);
/* Don't need to write trailing 0 because
that's already done by _PyUnicode_New */
}
return
(
PyObject
*
)
unicode
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment