Commit defe96ef authored by Kevin Modzelewski's avatar Kevin Modzelewski

Allow some more use of unicode as attribute or variable names

Treat unicode names as equivalent to their ascii-encoded str
version, and any non-ascii unicode name as a TypeError.

My hope is that this covers the common cases but doesn't cause
bugs.  You can see that we're doing this by putting unicode in
and getting a str out, but I hope that this is ok.
parent 30b5bb49
......@@ -1353,6 +1353,8 @@ public:
AST_Expr* import_star_expr = new AST_Expr();
import_star_expr->value = import_star;
import_star_expr->lineno = node->lineno;
import_star_expr->col_offset = node->col_offset;
push_back(import_star_expr);
} else {
......
......@@ -485,6 +485,8 @@ Box* bltinImport(Box* name, Box* globals, Box* locals, Box** args) {
}
Box* delattrFunc(Box* obj, Box* _str) {
_str = coerceUnicodeToStr(_str);
if (_str->cls != str_cls)
raiseExcHelper(TypeError, "attribute name must be string, not '%s'", getTypeName(_str));
BoxedString* str = static_cast<BoxedString*>(_str);
......@@ -493,6 +495,8 @@ Box* delattrFunc(Box* obj, Box* _str) {
}
Box* getattrFunc(Box* obj, Box* _str, Box* default_value) {
_str = coerceUnicodeToStr(_str);
if (_str->cls != str_cls) {
raiseExcHelper(TypeError, "getattr(): attribute name must be string");
}
......@@ -518,8 +522,10 @@ Box* getattrFunc(Box* obj, Box* _str, Box* default_value) {
}
Box* setattrFunc(Box* obj, Box* _str, Box* value) {
_str = coerceUnicodeToStr(_str);
if (_str->cls != str_cls) {
raiseExcHelper(TypeError, "getattr(): attribute name must be string");
raiseExcHelper(TypeError, "setattr(): attribute name must be string");
}
BoxedString* str = static_cast<BoxedString*>(_str);
......@@ -528,10 +534,7 @@ Box* setattrFunc(Box* obj, Box* _str, Box* value) {
}
Box* hasattr(Box* obj, Box* _str) {
if (PyUnicode_Check(_str)) {
_str = _PyUnicode_AsDefaultEncodedString(_str, NULL);
checkAndThrowCAPIException();
}
_str = coerceUnicodeToStr(_str);
if (_str->cls != str_cls) {
raiseExcHelper(TypeError, "hasattr(): attribute name must be string");
......
......@@ -259,8 +259,7 @@ extern "C" PyObject* PyImport_ImportModuleLevel(const char* name, PyObject* glob
}
}
// Named the same thing as the CPython method:
static void ensure_fromlist(Box* module, Box* fromlist, const std::string& module_name, bool recursive) {
static void ensureFromlist(Box* module, Box* fromlist, const std::string& module_name, bool recursive) {
if (getattrInternal(module, "__path__", NULL) == NULL) {
// If it's not a package, then there's no sub-importing to do
return;
......@@ -277,7 +276,7 @@ static void ensure_fromlist(Box* module, Box* fromlist, const std::string& modul
Box* all = getattrInternal(module, "__all__", NULL);
if (all) {
ensure_fromlist(module, all, module_name, true);
ensureFromlist(module, all, module_name, true);
}
continue;
}
......@@ -305,7 +304,7 @@ extern "C" Box* import(int level, Box* from_imports, const std::string* module_n
assert(module);
if (from_imports != None) {
ensure_fromlist(module, from_imports, *module_name, false);
ensureFromlist(module, from_imports, *module_name, false);
}
return module;
......
......@@ -835,7 +835,6 @@ static Box* _intNew(Box* val, Box* base) {
Box* r = PyInt_FromString(s->s.c_str(), NULL, base_n);
if (!r)
throwCAPIException();
assert(r);
return r;
} else if (val->cls == unicode_cls) {
int base_n;
......@@ -849,7 +848,6 @@ static Box* _intNew(Box* val, Box* base) {
Box* r = PyInt_FromUnicode(PyUnicode_AS_UNICODE(val), PyUnicode_GET_SIZE(val), base_n);
if (!r)
throwCAPIException();
assert(r);
return r;
} else if (val->cls == float_cls) {
RELEASE_ASSERT(!base, "");
......
......@@ -1309,7 +1309,7 @@ Box* getattrInternalGeneral(Box* obj, const std::string& attr, GetattrRewriteArg
}
if (!cls_only) {
if (obj->cls != type_cls) {
if (!isSubclass(obj->cls, type_cls)) {
// Look up the val in the object's dictionary and if you find it, return it.
Box* val;
......@@ -2689,10 +2689,12 @@ Box* callFunc(BoxedFunctionBase* func, CallRewriteArgs* rewrite_args, ArgPassSpe
BoxedDict* d_kwargs = static_cast<BoxedDict*>(kwargs);
for (auto& p : d_kwargs->d) {
if (p.first->cls != str_cls)
auto k = coerceUnicodeToStr(p.first);
if (k->cls != str_cls)
raiseExcHelper(TypeError, "%s() keywords must be strings", getFunctionName(f).c_str());
BoxedString* s = static_cast<BoxedString*>(p.first);
BoxedString* s = static_cast<BoxedString*>(k);
if (param_names.takes_param_names) {
assert(!rewrite_args && "would need to make sure that this didn't need to go into r_kwargs");
......@@ -3707,8 +3709,10 @@ Box* typeNew(Box* _cls, Box* arg1, Box* arg2, Box** _args) {
made->tp_dictoffset = base->tp_dictoffset;
for (const auto& p : attr_dict->d) {
assert(p.first->cls == str_cls);
made->setattr(static_cast<BoxedString*>(p.first)->s, p.second, NULL);
auto k = coerceUnicodeToStr(p.first);
RELEASE_ASSERT(k->cls == str_cls, "");
made->setattr(static_cast<BoxedString*>(k)->s, p.second, NULL);
}
if (!made->hasattr("__module__"))
......@@ -4147,6 +4151,8 @@ extern "C" Box* importStar(Box* _from_module, BoxedModule* to_module) {
}
idx++;
attr_name = coerceUnicodeToStr(attr_name);
if (attr_name->cls != str_cls)
raiseExcHelper(TypeError, "attribute name must be string, not '%s'", getTypeName(attr_name));
......@@ -4171,4 +4177,17 @@ extern "C" Box* importStar(Box* _from_module, BoxedModule* to_module) {
return None;
}
Box* coerceUnicodeToStr(Box* unicode) {
if (!isSubclass(unicode->cls, unicode_cls))
return unicode;
Box* r = PyUnicode_AsASCIIString(unicode);
if (!r) {
PyErr_Clear();
raiseExcHelper(TypeError, "Cannot use non-ascii unicode strings as attribute names or keywords");
}
return r;
}
}
......@@ -149,6 +149,16 @@ static const char* objectNewParameterTypeErrorMsg() {
bool exceptionMatches(const ExcInfo& e, BoxedClass* cls);
// This function will ascii-encode any unicode objects it gets passed, or return the argument
// unmodified if it wasn't a unicode object.
// This is intended for functions that deal with attribute or variable names, which we internally
// assume will always be strings, but CPython lets be unicode.
// If we used an encoding like utf8 instead of ascii, we would allow collisions between unicode
// strings and a string that happens to be its encoding. It seems safer to just encode as ascii,
// which will throw an exception if you try to pass something that might run into this risk.
// (We wrap the unicode error and throw a TypeError)
Box* coerceUnicodeToStr(Box* unicode);
inline std::tuple<Box*, Box*, Box*, Box**> getTupleFromArgsArray(Box** args, int num_args) {
Box* arg1 = num_args >= 1 ? args[0] : nullptr;
Box* arg2 = num_args >= 2 ? args[1] : nullptr;
......
......@@ -971,6 +971,8 @@ public:
RELEASE_ASSERT(_self->cls == attrwrapper_cls, "");
AttrWrapper* self = static_cast<AttrWrapper*>(_self);
_key = coerceUnicodeToStr(_key);
RELEASE_ASSERT(_key->cls == str_cls, "");
BoxedString* key = static_cast<BoxedString*>(_key);
self->b->setattr(key->s, value, NULL);
......@@ -981,6 +983,8 @@ public:
RELEASE_ASSERT(_self->cls == attrwrapper_cls, "");
AttrWrapper* self = static_cast<AttrWrapper*>(_self);
_key = coerceUnicodeToStr(_key);
RELEASE_ASSERT(_key->cls == str_cls, "");
BoxedString* key = static_cast<BoxedString*>(_key);
Box* r = self->b->getattr(key->s);
......@@ -993,12 +997,13 @@ public:
RELEASE_ASSERT(_self->cls == attrwrapper_cls, "");
AttrWrapper* self = static_cast<AttrWrapper*>(_self);
_key = coerceUnicodeToStr(_key);
RELEASE_ASSERT(_key->cls == str_cls, "");
BoxedString* key = static_cast<BoxedString*>(_key);
Box* r = self->b->getattr(key->s);
if (!r) {
if (!r)
raiseExcHelper(KeyError, "'%s'", key->s.c_str());
}
return r;
}
......@@ -1027,10 +1032,7 @@ public:
RELEASE_ASSERT(_self->cls == attrwrapper_cls, "");
AttrWrapper* self = static_cast<AttrWrapper*>(_self);
if (PyUnicode_Check(_key)) {
_key = _PyUnicode_AsDefaultEncodedString(_key, NULL);
checkAndThrowCAPIException();
}
_key = coerceUnicodeToStr(_key);
RELEASE_ASSERT(_key->cls == str_cls, "");
BoxedString* key = static_cast<BoxedString*>(_key);
......
# import_target defines __all__ to be ['x']
# import_target defines __all__ to be ['x', u'z']
from import_target import *
print x
print x, z
try:
print foo
assert 0
......
......@@ -15,7 +15,8 @@ class C(object):
pass
_x = 1
__all__ = ['x']
z = 2
__all__ = ['x', u'z']
def letMeCallThatForYou(f, *args):
return f(*args)
Cannot use non-ascii unicode strings as attribute names or keywords
Cannot use non-ascii unicode strings as attribute names or keywords
Cannot use non-ascii unicode strings as attribute names or keywords
# allow-warning: import level 0 will be treated as -1
# skip-if: '-x' in EXTRA_JIT_ARGS
def f(a):
print a
try:
f(**{u'a\u0180':3})
except TypeError as e:
print e
try:
setattr(object(), u"\u0180", None)
except TypeError as e:
print e
try:
hasattr(object(), u"\u0180")
except TypeError as e:
print e
......@@ -30,11 +30,6 @@ print hash(u'') == hash('')
print "Hello " + u" World"
print u"Hello " + " World"
try:
hasattr(object(), u"\u0180")
except UnicodeEncodeError as e:
print e
def p(x):
return [hex(ord(i)) for i in x]
s = u"\u20AC" # euro sign
......@@ -65,3 +60,22 @@ print u'\u0180' in 'hello world'
print 'hello world' in u'\u0180'
print u''.__contains__('')
print ''.__contains__(u'')
class C(object):
a = 1
# We don't support this, with or without unicode:
# locals()[u'b'] = 2
c = C()
print getattr(c, u'a')
# print c.b
c.__dict__[u'c'] = 3
print c.c
print getattr(c, u'c')
delattr(c, u'c')
print hasattr(c, u'c')
def f(a):
print a
f(a=1)
f(**{'a':2})
f(**{u'a':3})
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment