Commit d044212d authored by Kevin Modzelewski's avatar Kevin Modzelewski

Improve some str/unicode interchangeability

unicode strings are allowed to be used as attribute names, but
only if they convert cleanly to ascii.

unicode and str need to compare equal to each other and have the same
hash value if the unicode is ascii-compatible and has the same value
as the str.
parent 1458a514
...@@ -6599,6 +6599,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) ...@@ -6599,6 +6599,12 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
static long static long
unicode_hash(PyUnicodeObject *self) unicode_hash(PyUnicodeObject *self)
{ {
// Pyston change: just convert to a str and hash, since we use std::hash and not
// CPython's hashing algorithm they duplicated here:
PyObject* str = PyUnicode_AsEncodedString((PyObject*)self, "utf8", "replace");
return str->ob_type->tp_hash(str);
#if 0
/* Since Unicode objects compare equal to their ASCII string /* Since Unicode objects compare equal to their ASCII string
counterparts, they should use the individual character values counterparts, they should use the individual character values
as basis for their hash value. This is needed to assure that as basis for their hash value. This is needed to assure that
...@@ -6634,6 +6640,7 @@ unicode_hash(PyUnicodeObject *self) ...@@ -6634,6 +6640,7 @@ unicode_hash(PyUnicodeObject *self)
x = -2; x = -2;
self->hash = x; self->hash = x;
return x; return x;
#endif
} }
PyDoc_STRVAR(index__doc__, PyDoc_STRVAR(index__doc__,
......
...@@ -520,6 +520,11 @@ Box* setattrFunc(Box* obj, Box* _str, Box* value) { ...@@ -520,6 +520,11 @@ Box* setattrFunc(Box* obj, Box* _str, Box* value) {
} }
Box* hasattr(Box* obj, Box* _str) { Box* hasattr(Box* obj, Box* _str) {
if (PyUnicode_Check(_str)) {
_str = _PyUnicode_AsDefaultEncodedString(_str, NULL);
checkAndThrowCAPIException();
}
if (_str->cls != str_cls) { if (_str->cls != str_cls) {
raiseExcHelper(TypeError, "hasattr(): attribute name must be string"); raiseExcHelper(TypeError, "hasattr(): attribute name must be string");
} }
......
...@@ -980,7 +980,7 @@ extern "C" Box* strEq(BoxedString* lhs, Box* rhs) { ...@@ -980,7 +980,7 @@ extern "C" Box* strEq(BoxedString* lhs, Box* rhs) {
assert(lhs->cls == str_cls); assert(lhs->cls == str_cls);
if (rhs->cls != str_cls) if (rhs->cls != str_cls)
return boxBool(false); return NotImplemented;
BoxedString* srhs = static_cast<BoxedString*>(rhs); BoxedString* srhs = static_cast<BoxedString*>(rhs);
return boxBool(lhs->s == srhs->s); return boxBool(lhs->s == srhs->s);
...@@ -990,7 +990,7 @@ extern "C" Box* strNe(BoxedString* lhs, Box* rhs) { ...@@ -990,7 +990,7 @@ extern "C" Box* strNe(BoxedString* lhs, Box* rhs) {
assert(lhs->cls == str_cls); assert(lhs->cls == str_cls);
if (rhs->cls != str_cls) if (rhs->cls != str_cls)
return boxBool(true); return NotImplemented;
BoxedString* srhs = static_cast<BoxedString*>(rhs); BoxedString* srhs = static_cast<BoxedString*>(rhs);
return boxBool(lhs->s != srhs->s); return boxBool(lhs->s != srhs->s);
......
...@@ -974,6 +974,11 @@ public: ...@@ -974,6 +974,11 @@ public:
RELEASE_ASSERT(_self->cls == attrwrapper_cls, ""); RELEASE_ASSERT(_self->cls == attrwrapper_cls, "");
AttrWrapper* self = static_cast<AttrWrapper*>(_self); AttrWrapper* self = static_cast<AttrWrapper*>(_self);
if (PyUnicode_Check(_key)) {
_key = _PyUnicode_AsDefaultEncodedString(_key, NULL);
checkAndThrowCAPIException();
}
RELEASE_ASSERT(_key->cls == str_cls, ""); RELEASE_ASSERT(_key->cls == str_cls, "");
BoxedString* key = static_cast<BoxedString*>(_key); BoxedString* key = static_cast<BoxedString*>(_key);
Box* r = self->b->getattr(key->s); Box* r = self->b->getattr(key->s);
......
...@@ -12,3 +12,23 @@ print repr(u.encode("utf8")) ...@@ -12,3 +12,23 @@ print repr(u.encode("utf8"))
# This is tricky, since we need to support file encodings, and then set stdout to UTF8: # This is tricky, since we need to support file encodings, and then set stdout to UTF8:
# print u # print u
d = {}
d["hello world"] = "hi"
print d[u"hello world"]
class C(object):
pass
c = C()
c.a = 1
print hasattr(c, 'a')
print hasattr(c, u'a')
print u'a' in c.__dict__
print u'' == ''
print '' == u''
print hash(u'') == hash('')
try:
hasattr(object(), u"\u0180")
except UnicodeEncodeError as e:
print e
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment