Commit 3f93988f authored by Stefan Behnel's avatar Stefan Behnel

initial support for unicode literals in UTF-8

parent 79f741da
......@@ -5,7 +5,7 @@
# to be rebuilt next time pyrexc is run.
#
string_prefixes = "cCrR"
string_prefixes = "cCrRuU"
def make_lexicon():
from Cython.Plex import \
......
......@@ -1071,13 +1071,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
"static __Pyx_StringTabEntry %s[] = {" %
Naming.stringtab_cname)
for entry in entries:
print repr(entry.init), type(entry.init)
code.putln(
"{&%s, %s, sizeof(%s)}," % (
"{&%s, %s, sizeof(%s), %d}," % (
entry.pystring_cname,
entry.cname,
entry.cname))
entry.cname,
isinstance(entry.init, unicode)
))
code.putln(
"{0, 0, 0}")
"{0, 0, 0, 0}")
code.putln(
"};")
......
......@@ -2600,7 +2600,7 @@ utility_function_predeclarations = \
typedef struct {const char *s; const void **p;} __Pyx_CApiTabEntry; /*proto*/
typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/
typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/
typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
......@@ -3104,7 +3104,11 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
""","""
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
while (t->p) {
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
if (t->is_unicode) {
*t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
} else {
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
}
if (!*t->p)
return -1;
++t;
......
......@@ -493,7 +493,7 @@ def p_opt_string_literal(s):
def p_string_literal(s):
# A single string or char literal.
# Returns (kind, value) where kind in ('', 'c', 'r')
# Returns (kind, value) where kind in ('', 'c', 'r', 'u')
if s.sy == 'STRING':
value = unquote(s.systring)
s.next()
......@@ -502,7 +502,7 @@ def p_string_literal(s):
pos = s.position()
#is_raw = s.systring[:1].lower() == "r"
kind = s.systring[:1].lower()
if kind not in "cr":
if kind not in "cru":
kind = ''
chars = []
while 1:
......@@ -513,6 +513,8 @@ def p_string_literal(s):
systr = s.systring
if len(systr) == 1 and systr in "'\"\n":
chars.append('\\')
if kind == 'u' and not isinstance(systr, unicode):
systr = systr.decode("UTF-8")
chars.append(systr)
elif sy == 'ESCAPE':
systr = s.systring
......@@ -533,6 +535,8 @@ def p_string_literal(s):
chars.append('\\x0' + systr[2:])
elif c == '\n':
pass
elif c == 'u':
chars.append(systr)
else:
chars.append(r'\\' + systr[1:])
elif sy == 'NEWLINE':
......@@ -546,7 +550,10 @@ def p_string_literal(s):
"Unexpected token %r:%r in string literal" %
(sy, s.systring))
s.next()
value = join(chars, '')
if kind == 'u':
value = u''.join(chars)
else:
value = ''.join(chars)
#print "p_string_literal: value =", repr(value) ###
return kind, value
......
......@@ -705,6 +705,8 @@ class CStringType:
from_py_function = "PyString_AsString"
def literal_code(self, value):
if isinstance(value, unicode):
value = value.encode("UTF-8")
return '"%s"' % value
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment