Commit e00e400e authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge branch 'codecs' of https://github.com/undingen/pyston into undingen-codecs

Conflicts:
	Makefile
	from_cpython/CMakeLists.txt
	src/runtime/capi.cpp
	src/runtime/types.cpp
parents 2642c04e 2dd09d34
......@@ -291,7 +291,7 @@ STDLIB_OBJS := stdlib.bc.o stdlib.stripped.bc.o
STDLIB_RELEASE_OBJS := stdlib.release.bc.o
ASM_SRCS := $(wildcard src/runtime/*.S)
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c $(EXTRA_STDMODULE_SRCS)
STDMODULE_SRCS := errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c $(EXTRA_STDMODULE_SRCS)
STDOBJECT_SRCS := structseq.c capsule.c stringobject.c $(EXTRA_STDOBJECT_SRCS)
STDPYTHON_SRCS := pyctype.c getargs.c formatter_string.c pystrtod.c dtoa.c $(EXTRA_STDPYTHON_SRCS)
FROM_CPYTHON_SRCS := $(addprefix from_cpython/Modules/,$(STDMODULE_SRCS)) $(addprefix from_cpython/Objects/,$(STDOBJECT_SRCS)) $(addprefix from_cpython/Python/,$(STDPYTHON_SRCS))
......
......@@ -15,7 +15,7 @@ endforeach(STDLIB_FILE)
add_custom_target(copy_stdlib ALL DEPENDS ${STDLIB_TARGETS})
# compile specified files in from_cpython/Modules
file(GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c)
file(GLOB_RECURSE STDMODULE_SRCS Modules errnomodule.c shamodule.c sha256module.c sha512module.c _math.c mathmodule.c md5.c md5module.c _randommodule.c _sre.c operator.c binascii.c pwdmodule.c posixmodule.c _struct.c datetimemodule.c _functoolsmodule.c _collectionsmodule.c itertoolsmodule.c resource.c signalmodule.c selectmodule.c fcntlmodule.c timemodule.c arraymodule.c zlibmodule.c _codecsmodule.c)
# compile specified files in from_cpython/Objects
file(GLOB_RECURSE STDOBJECT_SRCS Objects structseq.c capsule.c stringobject.c)
......
......@@ -72,6 +72,7 @@
#include "descrobject.h"
#include "warnings.h"
#include "codecs.h"
#include "pyerrors.h"
#include "pystate.h"
......
// This file is originally from CPython 2.7, with modifications for Pyston
#ifndef Py_CODECREGISTRY_H
#define Py_CODECREGISTRY_H
#ifdef __cplusplus
extern "C" {
#endif
/* ------------------------------------------------------------------------
Python Codec Registry and support functions
Written by Marc-Andre Lemburg (mal@lemburg.com).
Copyright (c) Corporation for National Research Initiatives.
------------------------------------------------------------------------ */
/* Register a new codec search function.
As side effect, this tries to load the encodings package, if not
yet done, to make sure that it is always first in the list of
search functions.
The search_function's refcount is incremented by this function. */
PyAPI_FUNC(int) PyCodec_Register(PyObject *search_function) PYSTON_NOEXCEPT;
/* Codec register lookup API.
Looks up the given encoding and returns a CodecInfo object with
function attributes which implement the different aspects of
processing the encoding.
The encoding string is looked up converted to all lower-case
characters. This makes encodings looked up through this mechanism
effectively case-insensitive.
If no codec is found, a KeyError is set and NULL returned.
As side effect, this tries to load the encodings package, if not
yet done. This is part of the lazy load strategy for the encodings
package.
*/
PyAPI_FUNC(PyObject *) _PyCodec_Lookup(const char *encoding) PYSTON_NOEXCEPT;
/* Generic codec based encoding API.
object is passed through the encoder function found for the given
encoding using the error handling method defined by errors. errors
may be NULL to use the default method defined for the codec.
Raises a LookupError in case no encoder can be found.
*/
PyAPI_FUNC(PyObject *) PyCodec_Encode(PyObject *object, const char *encoding, const char *errors) PYSTON_NOEXCEPT;
/* Generic codec based decoding API.
object is passed through the decoder function found for the given
encoding using the error handling method defined by errors. errors
may be NULL to use the default method defined for the codec.
Raises a LookupError in case no encoder can be found.
*/
PyAPI_FUNC(PyObject *) PyCodec_Decode(PyObject *object, const char *encoding, const char *errors) PYSTON_NOEXCEPT;
/* --- Codec Lookup APIs --------------------------------------------------
All APIs return a codec object with incremented refcount and are
based on _PyCodec_Lookup(). The same comments w/r to the encoding
name also apply to these APIs.
*/
/* Get an encoder function for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_Encoder(const char *encoding) PYSTON_NOEXCEPT;
/* Get a decoder function for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_Decoder(const char *encoding) PYSTON_NOEXCEPT;
/* Get a IncrementalEncoder object for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_IncrementalEncoder(const char *encoding, const char *errors) PYSTON_NOEXCEPT;
/* Get a IncrementalDecoder object function for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_IncrementalDecoder(const char *encoding, const char *errors) PYSTON_NOEXCEPT;
/* Get a StreamReader factory function for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_StreamReader(const char *encoding, PyObject *stream,
const char *errors) PYSTON_NOEXCEPT;
/* Get a StreamWriter factory function for the given encoding. */
PyAPI_FUNC(PyObject *) PyCodec_StreamWriter(const char *encoding, PyObject *stream,
const char *errors) PYSTON_NOEXCEPT;
/* Unicode encoding error handling callback registry API */
/* Register the error handling callback function error under the given
name. This function will be called by the codec when it encounters
unencodable characters/undecodable bytes and doesn't know the
callback name, when name is specified as the error parameter
in the call to the encode/decode function.
Return 0 on success, -1 on error */
PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error) PYSTON_NOEXCEPT;
/* Lookup the error handling callback function registered under the given
name. As a special case NULL can be passed, in which case
the error handling callback for "strict" will be returned. */
PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name) PYSTON_NOEXCEPT;
/* raise exc as an exception */
PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc) PYSTON_NOEXCEPT;
/* ignore the unicode error, skipping the faulty input */
PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc) PYSTON_NOEXCEPT;
/* replace the unicode encode error with ? or U+FFFD */
PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc) PYSTON_NOEXCEPT;
/* replace the unicode encode error with XML character references */
PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc) PYSTON_NOEXCEPT;
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc) PYSTON_NOEXCEPT;
#ifdef __cplusplus
}
#endif
#endif /* !Py_CODECREGISTRY_H */
......@@ -986,7 +986,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(
const char *string,
Py_ssize_t length,
const char *errors
);
) PYSTON_NOEXCEPT;
/* --- Latin-1 Codecs -----------------------------------------------------
......
......@@ -43,7 +43,9 @@ _norm_encoding_map = (' . '
' ')
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
# Pyston change: we don't support multiple inheritance yet
#class CodecRegistryError(LookupError, SystemError):
class CodecRegistryError(LookupError):
pass
def normalize_encoding(encoding):
......
This diff is collapsed.
......@@ -300,6 +300,10 @@ extern "C" PyObject* PyObject_CallObject(PyObject* obj, PyObject* args) noexcept
}
}
extern "C" int PyObject_AsReadBuffer(PyObject* obj, const void** buffer, Py_ssize_t* buffer_len) noexcept {
Py_FatalError("unimplemented");
}
static PyObject* call_function_tail(PyObject* callable, PyObject* args) {
PyObject* retval;
......
This diff is collapsed.
......@@ -97,6 +97,14 @@ static PyObject* do_mkvalue(const char** p_format, va_list* p_va, int flags) noe
case 'H':
return PyInt_FromLong((long)va_arg(*p_va, unsigned int));
case 'n':
#if SIZEOF_SIZE_T != SIZEOF_LONG
return PyInt_FromSsize_t(va_arg(*p_va, Py_ssize_t));
#endif
/* Fall through from 'n' to 'l' if Py_ssize_t is long */
case 'l':
return PyInt_FromLong(va_arg(*p_va, long));
case 'N':
case 'S':
case 'O':
......
......@@ -442,12 +442,22 @@ Box* issubclass_func(Box* child, Box* parent) {
return boxBool(isSubclass(static_cast<BoxedClass*>(child), static_cast<BoxedClass*>(parent)));
}
Box* bltinImport(Box* arg) {
if (arg->cls != str_cls) {
raiseExcHelper(TypeError, "__import__() argument 1 must be string, not %s", getTypeName(arg));
Box* bltinImport(Box* name, Box* globals, Box* locals, Box** args) {
Box* fromlist = args[0];
Box* level = args[1];
RELEASE_ASSERT(globals == None, "not implemented");
RELEASE_ASSERT(locals == None, "not implemented");
if (name->cls != str_cls) {
raiseExcHelper(TypeError, "__import__() argument 1 must be string, not %s", getTypeName(name));
}
if (level->cls != int_cls) {
raiseExcHelper(TypeError, "an integer is required");
}
return import(-1, new BoxedTuple({}), &static_cast<BoxedString*>(arg)->s);
return import(((BoxedInt*)level)->n, fromlist, &static_cast<BoxedString*>(name)->s);
}
Box* getattrFunc(Box* obj, Box* _str, Box* default_value) {
......@@ -575,7 +585,8 @@ BoxedClass* BaseException, *Exception, *StandardError, *AssertionError, *Attribu
*NameError, *KeyError, *IndexError, *IOError, *OSError, *ZeroDivisionError, *ValueError, *UnboundLocalError,
*RuntimeError, *ImportError, *StopIteration, *Warning, *SyntaxError, *OverflowError, *DeprecationWarning,
*MemoryError, *LookupError, *EnvironmentError, *ArithmeticError, *BufferError, *KeyboardInterrupt, *SystemExit,
*SystemError, *NotImplementedError, *PendingDeprecationWarning, *EOFError;
*SystemError, *NotImplementedError, *PendingDeprecationWarning, *EOFError, *UnicodeError, *UnicodeEncodeError,
*UnicodeDecodeError, *UnicodeTranslateError;
Box* PyExc_RecursionErrorInst;
Box* PyExc_MemoryErrorInst;
......@@ -1028,6 +1039,13 @@ void setupBuiltins() {
PendingDeprecationWarning = makeBuiltinException(Warning, "PendingDeprecationWarning");
EOFError = makeBuiltinException(StandardError, "EOFError");
// Unicode errors
UnicodeError = makeBuiltinException(ValueError, "UnicodeError");
UnicodeEncodeError = makeBuiltinException(UnicodeError, "UnicodeEncodeError");
UnicodeDecodeError = makeBuiltinException(UnicodeError, "UnicodeDecodeError");
UnicodeTranslateError = makeBuiltinException(UnicodeError, "UnicodeTranslateError");
BaseException->giveAttr("__reduce__",
new BoxedFunction(boxRTFunction((void*)BoxedException::__reduce__, UNKNOWN, 1)));
EnvironmentError->giveAttr("__reduce__",
......@@ -1096,8 +1114,10 @@ void setupBuiltins() {
Box* issubclass_obj = new BoxedBuiltinFunctionOrMethod(boxRTFunction((void*)issubclass_func, BOXED_BOOL, 2));
builtins_module->giveAttr("issubclass", issubclass_obj);
CLFunction* import_func = boxRTFunction((void*)bltinImport, UNKNOWN, 5, 4, false, false,
ParamNames({ "name", "globals", "locals", "fromlist", "level" }, "", ""));
builtins_module->giveAttr("__import__",
new BoxedBuiltinFunctionOrMethod(boxRTFunction((void*)bltinImport, UNKNOWN, 1)));
new BoxedBuiltinFunctionOrMethod(import_func, { None, None, None, new BoxedInt(-1) }));
enumerate_cls
= new BoxedHeapClass(object_cls, &BoxedEnumerate::gcHandler, 0, sizeof(BoxedEnumerate), false, "enumerate");
......
......@@ -209,6 +209,12 @@ static std::string generateVersionString() {
return oss.str();
}
static bool isLittleEndian() {
unsigned long number = 1;
char* s = (char*)&number;
return s[0] != 0;
}
void setupSys() {
sys_modules_dict = new BoxedDict();
gc::registerPermanentRoot(sys_modules_dict);
......@@ -235,6 +241,7 @@ void setupSys() {
sys_module->giveAttr("warnoptions", new BoxedList());
sys_module->giveAttr("py3kwarning", False);
sys_module->giveAttr("byteorder", new BoxedString(isLittleEndian() ? "little" : "big"));
sys_module->giveAttr("platform", boxStrConstant("unknown")); // seems like a reasonable, if poor, default
......
......@@ -426,7 +426,8 @@ extern "C" PyObject* PyObject_Call(PyObject* callable_object, PyObject* args, Py
else
return runtimeCall(callable_object, ArgPassSpec(0, 0, true, false), args, NULL, NULL, NULL, NULL);
} catch (ExcInfo e) {
Py_FatalError("unimplemented");
setCAPIException(e);
return NULL;
}
}
......@@ -712,7 +713,20 @@ extern "C" void PyErr_SetObject(PyObject* exception, PyObject* value) noexcept {
}
extern "C" PyObject* PyErr_Format(PyObject* exception, const char* format, ...) noexcept {
Py_FatalError("unimplemented");
va_list vargs;
PyObject* string;
#ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format);
#else
va_start(vargs);
#endif
string = PyString_FromFormatV(format, vargs);
PyErr_SetObject(exception, string);
Py_XDECREF(string);
va_end(vargs);
return NULL;
}
extern "C" int PyErr_BadArgument() noexcept {
......@@ -1433,6 +1447,8 @@ extern "C" PyObject* Py_FindMethod(PyMethodDef* methods, PyObject* self, const c
}
extern "C" PyObject* PyCFunction_NewEx(PyMethodDef* ml, PyObject* self, PyObject* module) noexcept {
RELEASE_ASSERT(module == NULL, "not implemented");
assert((ml->ml_flags & (~(METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O))) == 0);
return new BoxedCApiFunction(ml->ml_flags, self, ml->ml_name, ml->ml_meth);
}
......
......@@ -159,14 +159,16 @@ static Box* importSub(const std::string& name, const std::string& full_name, Box
return NULL;
}
static Box* import(const std::string* name, bool return_first) {
static Box* import(const std::string* name, bool return_first, int level) {
assert(name);
assert(name->size() > 0);
static StatCounter slowpath_import("slowpath_import");
slowpath_import.log();
BoxedDict* sys_modules = getSysModulesDict();
RELEASE_ASSERT(level == -1 || level == 0, "not implemented");
if (level == 0)
printf("Warning: import level 0 will be treated as -1!\n");
size_t l = 0, r;
Box* last_module = NULL;
......@@ -210,6 +212,23 @@ extern "C" PyObject* PyImport_ImportModuleNoBlock(const char* name) noexcept {
Py_FatalError("unimplemented");
}
// This function has the same behaviour as __import__()
extern "C" PyObject* PyImport_ImportModuleLevel(char* name, PyObject* globals, PyObject* locals, PyObject* fromlist,
int level) noexcept {
RELEASE_ASSERT(globals == NULL, "not implemented");
RELEASE_ASSERT(locals == NULL, "not implemented");
RELEASE_ASSERT(fromlist == NULL, "not implemented");
RELEASE_ASSERT(level == 0, "not implemented");
try {
std::string module_name = name;
return import(level, fromlist ? fromlist : None, &module_name);
} catch (ExcInfo e) {
setCAPIException(e);
return NULL;
}
}
// Named the same thing as the CPython method:
static void ensure_fromlist(Box* module, Box* fromlist, const std::string& module_name, bool recursive) {
if (module->getattr("__path__") == NULL) {
......@@ -243,9 +262,9 @@ static void ensure_fromlist(Box* module, Box* fromlist, const std::string& modul
}
extern "C" Box* import(int level, Box* from_imports, const std::string* module_name) {
RELEASE_ASSERT(level == -1, "");
RELEASE_ASSERT(level == -1 || level == 0, "not implemented");
Box* module = import(module_name, from_imports == None);
Box* module = import(module_name, from_imports == None, level);
assert(module);
if (from_imports != None) {
......
This diff is collapsed.
......@@ -305,8 +305,6 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
raiseExcHelper(TypeError, "tuple.__new__(%s): %s is not a subtype of tuple", getNameOfClass(cls),
getNameOfClass(cls));
RELEASE_ASSERT(cls == tuple_cls, "");
int args_sz = args->elts.size();
int kwargs_sz = kwargs->d.size();
......@@ -335,7 +333,7 @@ extern "C" Box* tupleNew(Box* _cls, BoxedTuple* args, BoxedDict* kwargs) {
velts.push_back(e);
}
return new BoxedTuple(std::move(velts));
return new (cls) BoxedTuple(std::move(velts));
}
extern "C" int PyTuple_SetItem(PyObject* op, Py_ssize_t i, PyObject* newitem) noexcept {
......
......@@ -62,6 +62,7 @@ extern "C" void initfcntl();
extern "C" void inittime();
extern "C" void initarray();
extern "C" void initzlib();
extern "C" void init_codecs();
namespace pyston {
......@@ -1373,6 +1374,7 @@ void setupRuntime() {
inittime();
initarray();
initzlib();
init_codecs();
setupSysEnd();
......
......@@ -373,6 +373,42 @@ extern "C" Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeEncodeError_GetStart(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeDecodeError_GetStart(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeTranslateError_GetStart(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeEncodeError_GetEnd(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeDecodeError_GetEnd(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" int PyUnicodeTranslateError_GetEnd(PyObject*, Py_ssize_t*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject* PyUnicodeEncodeError_GetObject(PyObject*) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject* _PyUnicode_DecodeUnicodeInternal(const char* s, Py_ssize_t size, const char* errors) noexcept {
Py_FatalError("unimplemented");
}
extern "C" PyObject* PyUnicode_BuildEncodingMap(PyObject* string) noexcept {
Py_FatalError("unimplemented");
}
// From CPython, unicodeobject.c
// Used by Py_UNICODE_ISSPACE in unicodeobject.h
/* Fast detection of the most frequent whitespace characters */
......
# allow-warning: converting unicode literal to str
# expected: fail
# - func_set_name not yet implemented
# Simple optparse test, taken from the optparse.py docstring:
from optparse import OptionParser
......
# allow-warning: converting unicode literal to str
# allow-warning: import level 0 will be treated as -1!
def test(string, encoding):
s = string.encode(encoding)
print encoding, s
assert string == s.decode(encoding)
test("hello world", "hex")
test("hello world", "base64")
test("\r\n\\", "string-escape")
......@@ -7,3 +7,4 @@ print sys.version[:3]
print os.path.exists(sys.executable)
print sys.prefix, sys.exec_prefix
print sys.copyright[-200:]
print sys.byteorder
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment