Commit 6fbad08e authored by Marius Wachtler's avatar Marius Wachtler

Add support for unicode literals when using pypa

We support the \u, \U and \N escape sequences and
the unicode_literals option.
In addition updates pypa to latest version with unicode support.
parent 075620c7
Subproject commit 94fd3e1551188171fca8fb1d4bb7e2f916be33c4 Subproject commit 2ce3f0ef83f6d3d4bdd1ab841e2ca4c3417d93a4
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
#include "core/stats.h" #include "core/stats.h"
#include "core/types.h" #include "core/types.h"
#include "core/util.h" #include "core/util.h"
#include "runtime/capi.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pypa { namespace pypa {
bool string_to_double(String const& s, double& result); bool string_to_double(String const& s, double& result);
...@@ -511,7 +514,7 @@ struct expr_dispatcher { ...@@ -511,7 +514,7 @@ struct expr_dispatcher {
ResultPtr read(pypa::AstStr& s) { ResultPtr read(pypa::AstStr& s) {
AST_Str* ptr = new AST_Str(); AST_Str* ptr = new AST_Str();
location(ptr, s); location(ptr, s);
ptr->str_type = AST_Str::STR; ptr->str_type = s.unicode ? AST_Str::UNICODE : AST_Str::STR;
ptr->str_data = s.value; ptr->str_data = s.value;
return ptr; return ptr;
} }
...@@ -799,7 +802,7 @@ struct stmt_dispatcher { ...@@ -799,7 +802,7 @@ struct stmt_dispatcher {
location(ptr, d); location(ptr, d);
AST_Str* str = new AST_Str(); AST_Str* str = new AST_Str();
ptr->value = str; ptr->value = str;
str->str_type = AST_Str::STR; str->str_type = d.unicode ? AST_Str::UNICODE : AST_Str::STR;
str->str_data = d.doc; str->str_data = d.doc;
return ptr; return ptr;
} }
...@@ -823,14 +826,32 @@ AST_Module* readModule(pypa::AstModule& t) { ...@@ -823,14 +826,32 @@ AST_Module* readModule(pypa::AstModule& t) {
} }
void pypaErrorHandler(pypa::Error e) { void pypaErrorHandler(pypa::Error e) {
// raiseSyntaxError
// void raiseSyntaxError(const char* msg, int lineno, int col_offset, const
// std::string& file, const std::string& func);
if (e.type != pypa::ErrorType::SyntaxWarning) { if (e.type != pypa::ErrorType::SyntaxWarning) {
raiseSyntaxError(e.message.c_str(), e.cur.line, e.cur.column, e.file_name, std::string()); raiseSyntaxError(e.message.c_str(), e.cur.line, e.cur.column, e.file_name, std::string());
} }
} }
pypa::String pypaUnicodeEscapeDecoder(pypa::String s, bool raw_prefix, bool& error) {
try {
error = false;
Box* unicode = NULL;
if (raw_prefix)
unicode = PyUnicode_DecodeRawUnicodeEscape(s.c_str(), s.size(), "strict");
else
unicode = PyUnicode_DecodeUnicodeEscape(s.c_str(), s.size(), "strict");
checkAndThrowCAPIException();
BoxedString* str_utf8 = (BoxedString*)PyUnicode_AsUTF8String(unicode);
checkAndThrowCAPIException();
return str_utf8->s;
} catch (ExcInfo e) {
error = true;
BoxedString* error_message = str(e.value);
if (error_message && error_message->cls == str_cls)
return error_message->s;
return "Encountered an unknown error inside pypaUnicodeEscapeDecoder";
}
}
AST_Module* pypa_parse(char const* file_path) { AST_Module* pypa_parse(char const* file_path) {
pypa::Lexer lexer(file_path); pypa::Lexer lexer(file_path);
pypa::SymbolTablePtr symbols; pypa::SymbolTablePtr symbols;
...@@ -842,6 +863,7 @@ AST_Module* pypa_parse(char const* file_path) { ...@@ -842,6 +863,7 @@ AST_Module* pypa_parse(char const* file_path) {
options.python3only = false; options.python3only = false;
options.handle_future_errors = false; options.handle_future_errors = false;
options.error_handler = pypaErrorHandler; options.error_handler = pypaErrorHandler;
options.unicode_escape_handler = pypaUnicodeEscapeDecoder;
if (pypa::parse(lexer, module, symbols, options) && module) { if (pypa::parse(lexer, module, symbols, options) && module) {
return readModule(*module); return readModule(*module);
......
...@@ -231,29 +231,29 @@ static int main(int argc, char** argv) { ...@@ -231,29 +231,29 @@ static int main(int argc, char** argv) {
add_history(line); add_history(line);
AST_Module* m = parse_string(line);
Timer _t("repl");
if (m->body.size() > 0 && m->body[0]->type == AST_TYPE::Expr) {
AST_Expr* e = ast_cast<AST_Expr>(m->body[0]);
AST_Call* c = new AST_Call();
AST_Name* r = new AST_Name(m->interned_strings->get("repr"), AST_TYPE::Load, 0);
c->func = r;
c->starargs = NULL;
c->kwargs = NULL;
c->args.push_back(e->value);
c->lineno = 0;
AST_Print* p = new AST_Print();
p->dest = NULL;
p->nl = true;
p->values.push_back(c);
p->lineno = 0;
m->body[0] = p;
}
try { try {
AST_Module* m = parse_string(line);
Timer _t("repl");
if (m->body.size() > 0 && m->body[0]->type == AST_TYPE::Expr) {
AST_Expr* e = ast_cast<AST_Expr>(m->body[0]);
AST_Call* c = new AST_Call();
AST_Name* r = new AST_Name(m->interned_strings->get("repr"), AST_TYPE::Load, 0);
c->func = r;
c->starargs = NULL;
c->kwargs = NULL;
c->args.push_back(e->value);
c->lineno = 0;
AST_Print* p = new AST_Print();
p->dest = NULL;
p->nl = true;
p->values.push_back(c);
p->lineno = 0;
m->body[0] = p;
}
compileAndRunModule(m, main_module); compileAndRunModule(m, main_module);
} catch (ExcInfo e) { } catch (ExcInfo e) {
int retcode = 0xdeadbeef; // should never be seen int retcode = 0xdeadbeef; // should never be seen
......
...@@ -118,11 +118,9 @@ void raiseSyntaxError(const char* msg, int lineno, int col_offset, const std::st ...@@ -118,11 +118,9 @@ void raiseSyntaxError(const char* msg, int lineno, int col_offset, const std::st
Box* exc = runtimeCall(SyntaxError, ArgPassSpec(1), boxStrConstant(msg), NULL, NULL, NULL, NULL); Box* exc = runtimeCall(SyntaxError, ArgPassSpec(1), boxStrConstant(msg), NULL, NULL, NULL, NULL);
auto tb = getTraceback(); auto tb = getTraceback();
// TODO: push the syntax error line back on it: std::vector<const LineInfo*> entries = tb->lines;
//// TODO: leaks this! entries.push_back(new LineInfo(lineno, col_offset, file, func));
// last_tb.push_back(new LineInfo(lineno, col_offset, file, func)); raiseRaw(ExcInfo(exc->cls, exc, new BoxedTraceback(std::move(entries))));
raiseRaw(ExcInfo(exc->cls, exc, tb));
} }
void _printStacktrace() { void _printStacktrace() {
......
# skip-if: '-x' in EXTRA_JIT_ARGS
from StringIO import StringIO from StringIO import StringIO
import json import json
......
# skip-if: '-x' in EXTRA_JIT_ARGS
def f(a): def f(a):
print a print a
......
# skip-if: '-x' in EXTRA_JIT_ARGS
print repr(unicode()) print repr(unicode())
print repr(unicode('hello world')) print repr(unicode('hello world'))
...@@ -32,6 +30,7 @@ print u"Hello " + " World" ...@@ -32,6 +30,7 @@ print u"Hello " + " World"
def p(x): def p(x):
return [hex(ord(i)) for i in x] return [hex(ord(i)) for i in x]
s = u"\u20AC" # euro sign s = u"\u20AC" # euro sign
print p(u"\N{EURO SIGN}")
print p(s) print p(s)
print p(s.encode("utf8")) print p(s.encode("utf8"))
print p(s.encode("utf16")) print p(s.encode("utf16"))
......
# skip-if: '-x' in EXTRA_JIT_ARGS
import unicodedata import unicodedata
print unicodedata.lookup("EURO SIGN") == u"\u20ac" print unicodedata.lookup("EURO SIGN") == u"\u20ac"
print unicodedata.name(u"/") print unicodedata.name(u"/")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment