Commit e0c1a8d9 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Very basic unicode test

and with the fixes to get that working, all the tests pass!
parent 398ee469
...@@ -196,7 +196,7 @@ PyAPI_FUNC(int) PyString_AsStringAndSize( ...@@ -196,7 +196,7 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
register Py_ssize_t *len /* pointer to length variable or NULL register Py_ssize_t *len /* pointer to length variable or NULL
(only possible for 0-terminated (only possible for 0-terminated
strings) */ strings) */
); ) PYSTON_NOEXCEPT;
/* Using the current locale, insert the thousands grouping /* Using the current locale, insert the thousands grouping
......
...@@ -31,7 +31,78 @@ _Py_HashSecret_t _Py_HashSecret; ...@@ -31,7 +31,78 @@ _Py_HashSecret_t _Py_HashSecret;
} }
extern "C" PyObject* PyObject_Unicode(PyObject* v) noexcept { extern "C" PyObject* PyObject_Unicode(PyObject* v) noexcept {
Py_FatalError("unimplemented"); PyObject* res;
PyObject* func;
PyObject* str;
int unicode_method_found = 0;
static PyObject* unicodestr = NULL;
if (v == NULL) {
res = PyString_FromString("<NULL>");
if (res == NULL)
return NULL;
str = PyUnicode_FromEncodedObject(res, NULL, "strict");
Py_DECREF(res);
return str;
} else if (PyUnicode_CheckExact(v)) {
Py_INCREF(v);
return v;
}
if (PyInstance_Check(v)) {
/* We're an instance of a classic class */
/* Try __unicode__ from the instance -- alas we have no type */
if (!unicodestr) {
unicodestr = boxStrConstant("__unicode__");
gc::registerPermanentRoot(unicodestr);
if (!unicodestr)
return NULL;
}
func = PyObject_GetAttr(v, unicodestr);
if (func != NULL) {
unicode_method_found = 1;
res = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
} else {
PyErr_Clear();
}
} else {
/* Not a classic class instance, try __unicode__. */
func = _PyObject_LookupSpecial(v, "__unicode__", &unicodestr);
if (func != NULL) {
unicode_method_found = 1;
res = PyObject_CallFunctionObjArgs(func, NULL);
Py_DECREF(func);
} else if (PyErr_Occurred())
return NULL;
}
/* Didn't find __unicode__ */
if (!unicode_method_found) {
if (PyUnicode_Check(v)) {
/* For a Unicode subtype that's didn't overwrite __unicode__,
return a true Unicode object with the same data. */
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(v), PyUnicode_GET_SIZE(v));
}
if (PyString_CheckExact(v)) {
Py_INCREF(v);
res = v;
} else {
if (Py_TYPE(v)->tp_str != NULL)
res = (*Py_TYPE(v)->tp_str)(v);
else
res = PyObject_Repr(v);
}
}
if (res == NULL)
return NULL;
if (!PyUnicode_Check(res)) {
str = PyUnicode_FromEncodedObject(res, NULL, "strict");
Py_DECREF(res);
res = str;
}
return res;
} }
extern "C" PyObject* _PyObject_Str(PyObject* v) noexcept { extern "C" PyObject* _PyObject_Str(PyObject* v) noexcept {
......
...@@ -2025,6 +2025,38 @@ extern "C" PyObject* PyString_FromString(const char* s) noexcept { ...@@ -2025,6 +2025,38 @@ extern "C" PyObject* PyString_FromString(const char* s) noexcept {
return boxStrConstant(s); return boxStrConstant(s);
} }
extern "C" int PyString_AsStringAndSize(register PyObject* obj, register char** s, register Py_ssize_t* len) noexcept {
if (s == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (!PyString_Check(obj)) {
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(obj)) {
obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
if (obj == NULL)
return -1;
} else
#endif
{
PyErr_Format(PyExc_TypeError, "expected string or Unicode object, "
"%.200s found",
Py_TYPE(obj)->tp_name);
return -1;
}
}
*s = PyString_AS_STRING(obj);
if (len != NULL)
*len = PyString_GET_SIZE(obj);
else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
PyErr_SetString(PyExc_TypeError, "expected string without null bytes");
return -1;
}
return 0;
}
BoxedString* createUninitializedString(ssize_t n) { BoxedString* createUninitializedString(ssize_t n) {
// I *think* this should avoid doing any copies, by using move constructors: // I *think* this should avoid doing any copies, by using move constructors:
return new BoxedString(std::string(n, '\x00')); return new BoxedString(std::string(n, '\x00'));
...@@ -2057,9 +2089,15 @@ extern "C" char* PyString_AsString(PyObject* o) noexcept { ...@@ -2057,9 +2089,15 @@ extern "C" char* PyString_AsString(PyObject* o) noexcept {
return getWriteableStringContents(s); return getWriteableStringContents(s);
} }
extern "C" Py_ssize_t PyString_Size(PyObject* s) noexcept { extern "C" Py_ssize_t PyString_Size(PyObject* op) noexcept {
RELEASE_ASSERT(s->cls == str_cls, ""); if (op->cls == str_cls)
return static_cast<BoxedString*>(s)->s.size(); return static_cast<BoxedString*>(op)->s.size();
char* _s;
Py_ssize_t len;
if (PyString_AsStringAndSize(op, &_s, &len))
return -1;
return len;
} }
extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept { extern "C" int _PyString_Resize(PyObject** pv, Py_ssize_t newsize) noexcept {
......
print repr(unicode())
print repr(unicode('hello world'))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment