Commit 01be3445 authored by Stefan Behnel's avatar Stefan Behnel

added .pxd for PyUnicode C-API to Cython/Includes/

parent fdf5beeb
......@@ -136,6 +136,7 @@ from python_long cimport *
from python_float cimport *
from python_complex cimport *
from python_string cimport *
from python_unicode cimport *
from python_dict cimport *
from python_instance cimport *
from python_function cimport *
......
cdef extern from *:
ctypedef int Py_UNICODE
# Return true if the object o is a Unicode object or an instance
# of a Unicode subtype. Changed in version 2.2: Allowed subtypes
# to be accepted.
bint PyUnicode_Check(object o)
# Return true if the object o is a Unicode object, but not an
# instance of a subtype. New in version 2.2.
bint PyUnicode_CheckExact(object o)
# Return the size of the object. o has to be a PyUnicodeObject
# (not checked).
Py_ssize_t PyUnicode_GET_SIZE(object o)
# Return the size of the object's internal buffer in bytes. o has
# to be a PyUnicodeObject (not checked).
Py_ssize_t PyUnicode_GET_DATA_SIZE(object o)
# Return a pointer to the internal Py_UNICODE buffer of the
# object. o has to be a PyUnicodeObject (not checked).
Py_UNICODE* PyUnicode_AS_UNICODE(object o)
# Return a pointer to the internal buffer of the object. o has to
# be a PyUnicodeObject (not checked).
char* PyUnicode_AS_DATA(object o)
# Return 1 or 0 depending on whether ch is a whitespace character.
bint Py_UNICODE_ISSPACE(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a lowercase character.
bint Py_UNICODE_ISLOWER(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is an uppercase character.
bint Py_UNICODE_ISUPPER(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a titlecase character.
bint Py_UNICODE_ISTITLE(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a linebreak character.
bint Py_UNICODE_ISLINEBREAK(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a decimal character.
bint Py_UNICODE_ISDECIMAL(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a digit character.
bint Py_UNICODE_ISDIGIT(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is a numeric character.
bint Py_UNICODE_ISNUMERIC(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is an alphabetic character.
bint Py_UNICODE_ISALPHA(Py_UNICODE ch)
# Return 1 or 0 depending on whether ch is an alphanumeric character.
bint Py_UNICODE_ISALNUM(Py_UNICODE ch)
# Return the character ch converted to lower case.
Py_UNICODE Py_UNICODE_TOLOWER(Py_UNICODE ch)
# Return the character ch converted to upper case.
Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch)
# Return the character ch converted to title case.
Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch)
# Return the character ch converted to a decimal positive
# integer. Return -1 if this is not possible. This macro does not
# raise exceptions.
int Py_UNICODE_TODECIMAL(Py_UNICODE ch)
# Return the character ch converted to a single digit
# integer. Return -1 if this is not possible. This macro does not
# raise exceptions.
int Py_UNICODE_TODIGIT(Py_UNICODE ch)
# Return the character ch converted to a double. Return -1.0 if
# this is not possible. This macro does not raise exceptions.
double Py_UNICODE_TONUMERIC(Py_UNICODE ch)
# To create Unicode objects and access their basic sequence
# properties, use these APIs:
# Create a Unicode Object from the Py_UNICODE buffer u of the
# given size. u may be NULL which causes the contents to be
# undefined. It is the user's responsibility to fill in the needed
# data. The buffer is copied into the new object. If the buffer is
# not NULL, the return value might be a shared object. Therefore,
# modification of the resulting Unicode object is only allowed
# when u is NULL.
object PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size)
# Return a read-only pointer to the Unicode object's internal
# Py_UNICODE buffer, NULL if unicode is not a Unicode object.
Py_UNICODE* PyUnicode_AsUnicode(object o)
# Return the length of the Unicode object.
Py_ssize_t PyUnicode_GetSize(object o)
# Coerce an encoded object obj to an Unicode object and return a
# reference with incremented refcount.
# String and other char buffer compatible objects are decoded
# according to the given encoding and using the error handling
# defined by errors. Both can be NULL to have the interface use
# the default values (see the next section for details).
# All other objects, including Unicode objects, cause a TypeError
# to be set.
object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors)
# Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict")
# which is used throughout the interpreter whenever coercion to
# Unicode is needed.
object PyUnicode_FromObject(object obj)
# If the platform supports wchar_t and provides a header file
# wchar.h, Python can interface directly to this type using the
# following functions. Support is optimized if Python's own
# Py_UNICODE type is identical to the system's wchar_t.
#ctypedef int wchar_t
# Create a Unicode object from the wchar_t buffer w of the given
# size. Return NULL on failure.
#PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size)
#Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)
# Codecs
# Create a Unicode object by decoding size bytes of the encoded
# string s. encoding and errors have the same meaning as the
# parameters of the same name in the unicode() builtin
# function. The codec to be used is looked up using the Python
# codec registry. Return NULL if an exception was raised by the
# codec.
object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
# Encode the Py_UNICODE buffer of the given size and return a
# Python string object. encoding and errors have the same meaning
# as the parameters of the same name in the Unicode encode()
# method. The codec to be used is looked up using the Python codec
# registry. Return NULL if an exception was raised by the codec.
object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size,
char *encoding, char *errors)
# Encode a Unicode object and return the result as Python string
# object. encoding and errors have the same meaning as the
# parameters of the same name in the Unicode encode() method. The
# codec to be used is looked up using the Python codec
# registry. Return NULL if an exception was raised by the codec.
object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors)
# These are the UTF-8 codec APIs:
# Create a Unicode object by decoding size bytes of the UTF-8
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
# If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
# consumed is not NULL, trailing incomplete UTF-8 byte sequences
# will not be treated as an error. Those bytes will not be decoded
# and the number of bytes that have been decoded will be stored in
# consumed. New in version 2.4.
object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
# Encode the Py_UNICODE buffer of the given size using UTF-8 and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
object PyUnicode_AsUTF8String(object unicode)
# These are the UTF-16 codec APIs:
# Decode length bytes from a UTF-16 encoded buffer string and
# return the corresponding Unicode object. errors (if non-NULL)
# defines the error handling. It defaults to ``strict''.
#
# If byteorder is non-NULL, the decoder starts decoding using the
# given byte order:
#
# *byteorder == -1: little endian
# *byteorder == 0: native order
# *byteorder == 1: big endian
#
# and then switches if the first two bytes of the input data are a
# byte order mark (BOM) and the specified byte order is native
# order. This BOM is not copied into the resulting Unicode
# string. After completion, *byteorder is set to the current byte
# order at the.
#
# If byteorder is NULL, the codec starts in native order mode.
object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
# If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
# consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
# treat trailing incomplete UTF-16 byte sequences (such as an odd
# number of bytes or a split surrogate pair) as an error. Those
# bytes will not be decoded and the number of bytes that have been
# decoded will be stored in consumed. New in version 2.4.
object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
# Return a Python string object holding the UTF-16 encoded value
# of the Unicode data in s. If byteorder is not 0, output is
# written according to the following byte order:
#
# byteorder == -1: little endian
# byteorder == 0: native byte order (writes a BOM mark)
# byteorder == 1: big endian
#
# If byteorder is 0, the output string will always start with the
# Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark
# is prepended.
#
# If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
# represented as a surrogate pair. If it is not defined, each
# Py_UNICODE values is interpreted as an UCS-2 character.
object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
# Return a Python string using the UTF-16 encoding in native byte
# order. The string always starts with a BOM mark. Error handling
# is ``strict''. Return NULL if an exception was raised by the
# codec.
object PyUnicode_AsUTF16String(object unicode)
# These are the ``Unicode Escape'' codec APIs:
# Create a Unicode object by decoding size bytes of the
# Unicode-Escape encoded string s. Return NULL if an exception was
# raised by the codec.
object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using
# Unicode-Escape and return a Python string object. Return NULL if
# an exception was raised by the codec.
object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size)
# Encode a Unicode objects using Unicode-Escape and return the
# result as Python string object. Error handling is
# ``strict''. Return NULL if an exception was raised by the codec.
object PyUnicode_AsUnicodeEscapeString(object unicode)
# These are the ``Raw Unicode Escape'' codec APIs:
# Create a Unicode object by decoding size bytes of the
# Raw-Unicode-Escape encoded string s. Return NULL if an exception
# was raised by the codec.
object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using
# Raw-Unicode-Escape and return a Python string object. Return
# NULL if an exception was raised by the codec.
object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using Raw-Unicode-Escape and return the
# result as Python string object. Error handling is
# ``strict''. Return NULL if an exception was raised by the codec.
object PyUnicode_AsRawUnicodeEscapeString(object unicode)
# These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding.
# Create a Unicode object by decoding size bytes of the Latin-1
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using Latin-1 and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using Latin-1 and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsLatin1String(object unicode)
# These are the ASCII codec APIs. Only 7-bit ASCII data is
# accepted. All other codes generate errors.
# Create a Unicode object by decoding size bytes of the ASCII
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
# Encode the Py_UNICODE buffer of the given size using ASCII and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using ASCII and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsASCIIString(object o)
# These are the mapping codec APIs:
#
# This codec is special in that it can be used to implement many
# different codecs (and this is in fact what was done to obtain most
# of the standard codecs included in the encodings package). The codec
# uses mapping to encode and decode characters.
#
# Decoding mappings must map single string characters to single
# Unicode characters, integers (which are then interpreted as Unicode
# ordinals) or None (meaning "undefined mapping" and causing an
# error).
#
# Encoding mappings must map single Unicode characters to single
# string characters, integers (which are then interpreted as Latin-1
# ordinals) or None (meaning "undefined mapping" and causing an
# error).
#
# The mapping objects provided must only support the __getitem__
# mapping interface.
#
# If a character lookup fails with a LookupError, the character is
# copied as-is meaning that its ordinal value will be interpreted as
# Unicode or Latin-1 ordinal resp. Because of this, mappings only need
# to contain those mappings which map characters to different code
# points.
# Create a Unicode object by decoding size bytes of the encoded
# string s using the given mapping object. Return NULL if an
# exception was raised by the codec. If mapping is NULL latin-1
# decoding will be done. Else it can be a dictionary mapping byte
# or a unicode string, which is treated as a lookup table. Byte
# values greater that the length of the string and U+FFFE
# "characters" are treated as "undefined mapping". Changed in
# version 2.4: Allowed unicode string as mapping argument.
object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors)
# Encode the Py_UNICODE buffer of the given size using the given
# mapping object and return a Python string object. Return NULL if
# an exception was raised by the codec.
object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
# Encode a Unicode objects using the given mapping object and
# return the result as Python string object. Error handling is
# ``strict''. Return NULL if an exception was raised by the codec.
object PyUnicode_AsCharmapString(object o, object mapping)
# The following codec API is special in that maps Unicode to Unicode.
# Translate a Py_UNICODE buffer of the given length by applying a
# character mapping table to it and return the resulting Unicode
# object. Return NULL when an exception was raised by the codec.
#
# The mapping table must map Unicode ordinal integers to Unicode
# ordinal integers or None (causing deletion of the character).
#
# Mapping tables need only provide the __getitem__() interface;
# dictionaries and sequences work well. Unmapped character
# ordinals (ones which cause a LookupError) are left untouched and
# are copied as-is.
object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
object table, char *errors)
# These are the MBCS codec APIs. They are currently only available on
# Windows and use the Win32 MBCS converters to implement the
# conversions. Note that MBCS (or DBCS) is a class of encodings, not
# just one. The target encoding is defined by the user settings on the
# machine running the codec.
# Create a Unicode object by decoding size bytes of the MBCS
# encoded string s. Return NULL if an exception was raised by the
# codec.
object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
# If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
# consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
# decode trailing lead byte and the number of bytes that have been
# decoded will be stored in consumed. New in version 2.5.
object PyUnicode_DecodeMBCSStateful(char *s, int size, char *errors, int *consumed)
# Encode the Py_UNICODE buffer of the given size using MBCS and
# return a Python string object. Return NULL if an exception was
# raised by the codec.
object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
# Encode a Unicode objects using MBCS and return the result as
# Python string object. Error handling is ``strict''. Return NULL
# if an exception was raised by the codec.
object PyUnicode_AsMBCSString(object o)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment