Commit 95fface9 authored by Rudi Chen's avatar Rudi Chen

Handle more slicing edge cases.

- Non-integer and non-None object passed into slices can fall back
  to item operator.
- Support indexeable items for slices when appropriate.
- Increase setitem and delitem ic slot sizes so that the rewriter
can succeed.
- Increase the performance of using slice operators on built-in types
  avoiding a lot of the guards and slowpath.
parent 987fa9ec
......@@ -302,11 +302,11 @@ ICSetupInfo* createGetitemIC(TypeRecorder* type_recorder) {
}
ICSetupInfo* createSetitemIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(true, 1, 256, ICSetupInfo::Setitem, type_recorder);
return ICSetupInfo::initialize(true, 1, 512, ICSetupInfo::Setitem, type_recorder);
}
ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(false, 1, 256, ICSetupInfo::Delitem, type_recorder);
return ICSetupInfo::initialize(false, 1, 512, ICSetupInfo::Delitem, type_recorder);
}
ICSetupInfo* createSetattrIC(TypeRecorder* type_recorder) {
......
......@@ -134,7 +134,6 @@ static Py_ssize_t list_length(Box* self) noexcept {
}
Box* _listSlice(BoxedList* self, i64 start, i64 stop, i64 step, i64 length) {
// printf("%ld %ld %ld\n", start, stop, step);
assert(step != 0);
if (step > 0) {
assert(0 <= start);
......@@ -419,7 +418,7 @@ int list_ass_ext_slice(BoxedList* self, PyObject* item, PyObject* value) {
}
Box* listSetitemSliceInt64(BoxedList* self, i64 start, i64 stop, i64 step, Box* v) {
RELEASE_ASSERT(step == 1, "step sizes must be 1 for now");
RELEASE_ASSERT(step == 1, "step sizes must be 1 in this code path");
boundSliceWithLength(&start, &stop, start, stop, self->size);
......
......@@ -2335,7 +2335,8 @@ extern "C" bool nonzero(Box* obj) {
|| isSubclass(obj->cls, Exception) || obj->cls == file_cls || obj->cls == traceback_cls
|| obj->cls == instancemethod_cls || obj->cls == module_cls || obj->cls == capifunc_cls
|| obj->cls == builtin_function_or_method_cls || obj->cls == method_cls || obj->cls == frame_cls
|| obj->cls == capi_getset_cls || obj->cls == pyston_getset_cls || obj->cls == wrapperdescr_cls,
|| obj->cls == generator_cls || obj->cls == capi_getset_cls || obj->cls == pyston_getset_cls
|| obj->cls == wrapperdescr_cls,
"%s.__nonzero__", getTypeName(obj)); // TODO
// TODO should rewrite these?
......@@ -4365,38 +4366,58 @@ extern "C" Box* unaryop(Box* operand, int op_type) {
return rtn;
}
Box* callItemAttr(Box* target, BoxedString* item_str, Box* item, Box* value, CallRewriteArgs* rewrite_args) {
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), item, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), item);
}
}
// This function decides whether to call the slice operator (e.g. __getslice__)
// or the item operator (__getitem__).
Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_str, Box* slice, Box* value,
CallRewriteArgs* rewrite_args) {
// If we don't have a slice operator, fall back to item operator.
// No rewriting for the typeLookup here (i.e. no guards if there is no slice operator). These guards
// will be added when we call callattrInternal on a slice operator later. If the guards fail, we fall
// back into the slow path which has this fallback to the item operator.
Box* slice_attr = typeLookup(target->cls, slice_str, NULL);
if (!slice_attr) {
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice);
}
}
// Need a slice object to use the slice operators.
if (slice->cls != slice_cls) {
// This function contains a lot of logic for deciding between whether to call
// the slice operator or the item operator, so we can match CPython's behavior
// on custom classes that define those operators. However, for builtin types,
// we know we can call either and the behavior will be the same. Adding all those
// guards are unnecessary and bad for performance.
//
// Also, for special slicing logic (e.g. open slice ranges [:]), the builtin types
// have C-implemented functions that already handle all the edge cases, so we don't
// need to have a slowpath for them here.
if (target->cls == list_cls || target->cls == str_cls || target->cls == unicode_cls) {
if (rewrite_args) {
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice_cls, /*negate=*/true);
rewrite_args->obj->addAttrGuard(offsetof(Box, cls), (uint64_t)target->cls);
}
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice);
// Guard on the type of the object (need to have the slice operator attribute to call it).
Box* slice_attr = NULL;
if (rewrite_args) {
RewriterVar* target_cls = rewrite_args->obj->getAttr(offsetof(Box, cls));
GetattrRewriteArgs grewrite_args(rewrite_args->rewriter, target_cls, Location::any());
slice_attr = typeLookup(target->cls, slice_str, &grewrite_args);
if (!grewrite_args.out_success) {
rewrite_args = NULL;
}
} else {
if (rewrite_args) {
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice_cls);
}
slice_attr = typeLookup(target->cls, slice_str, NULL);
}
if (!slice_attr) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
// Need a slice object to use the slice operators.
if (rewrite_args) {
rewrite_args->arg1->addAttrGuard(offsetof(Box, cls), (uint64_t)slice->cls);
}
if (slice->cls != slice_cls) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
BoxedSlice* bslice = (BoxedSlice*)slice;
......@@ -4409,29 +4430,33 @@ Box* callItemOrSliceAttr(Box* target, BoxedString* item_str, BoxedString* slice_
->addAttrGuard(offsetof(Box, cls), (uint64_t)none_cls, /*negate=*/true);
}
if (value) {
return callattrInternal2(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), slice, value);
} else {
return callattrInternal1(target, item_str, CLASS_ONLY, rewrite_args, ArgPassSpec(1), slice);
}
return callItemAttr(target, item_str, slice, value, rewrite_args);
} else {
rewrite_args = NULL;
REWRITE_ABORTED("");
Box* start = bslice->start;
Box* stop = bslice->stop;
// If the slice cannot be used as integer slices, also fall back to the get operator.
// We could optimize further here by having a version of isSliceIndex that
// creates guards, but it would only affect some rare edge cases.
if (!isSliceIndex(bslice->start) || !isSliceIndex(bslice->stop)) {
return callItemAttr(target, item_str, slice, value, rewrite_args);
}
// If we don't specify the start/stop (e.g. o[:]), the slice operator functions
// CPython seems to use 0 and sys.maxint as the default values.
if (bslice->start->cls == none_cls)
start = boxInt(0);
if (bslice->stop->cls == none_cls)
stop = boxInt(PyInt_GetMax());
int64_t start = 0, stop = PyInt_GetMax();
sliceIndex(bslice->start, &start);
sliceIndex(bslice->stop, &stop);
Box* boxedStart = boxInt(start);
Box* boxedStop = boxInt(stop);
if (value) {
return callattrInternal3(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(3), start, stop, value);
return callattrInternal3(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(3), boxedStart, boxedStop,
value);
} else {
return callattrInternal2(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), start, stop);
return callattrInternal2(target, slice_str, CLASS_ONLY, rewrite_args, ArgPassSpec(2), boxedStart,
boxedStop);
}
}
}
......
......@@ -17,7 +17,6 @@
#include "core/options.h"
#include "core/types.h"
#include "runtime/objmodel.h"
#include "runtime/types.h"
namespace pyston {
......@@ -27,19 +26,8 @@ void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64*
throwCAPIException();
}
void sliceIndex(Box* b, int64_t* out) {
if (b->cls == none_cls) {
// Leave default value in case of None (useful for slices like [2:])
} else if (b->cls == int_cls) {
*out = static_cast<BoxedInt*>(b)->n;
} else if (PyIndex_Check(b)) {
int64_t x = PyNumber_AsSsize_t(b, NULL);
if (!(x == -1 && PyErr_Occurred()))
*out = x;
} else {
raiseExcHelper(TypeError, "slice indices must be integers or "
"None or have an __index__ method");
}
bool isSliceIndex(Box* b) {
return b->cls == none_cls || b->cls == int_cls || PyIndex_Check(b);
}
void boundSliceWithLength(i64* start_out, i64* stop_out, i64 start, i64 stop, i64 size) {
......
......@@ -16,6 +16,7 @@
#define PYSTON_RUNTIME_UTIL_H
#include "core/types.h"
#include "runtime/types.h"
namespace pyston {
......@@ -24,7 +25,18 @@ class BoxedSlice;
void parseSlice(BoxedSlice* slice, int size, i64* out_start, i64* out_stop, i64* out_end, i64* out_length = nullptr);
// Analogue of _PyEval_SliceIndex
void sliceIndex(Box* b, int64_t* out);
inline void sliceIndex(Box* b, int64_t* out) {
if (b->cls == none_cls) {
// Leave default value in case of None (useful for slices like [2:])
return;
}
int ret = _PyEval_SliceIndex(b, out);
if (ret <= 0)
throwCAPIException();
}
bool isSliceIndex(Box* b);
// Adjust the start and stop bounds of the sequence we are slicing to its size.
// Negative values greater or equal to (-length) become positive values.
......
......@@ -18,15 +18,15 @@ def install_and_test_protobuf():
subprocess.check_call(["./configure", "--prefix="+INSTALL_DIR], cwd=PROTOBUF_DIR)
subprocess.check_call(["make", "-j4"], cwd=PROTOBUF_DIR)
subprocess.check_call(["make", "install"], cwd=PROTOBUF_DIR)
PROTOBUF_PY_DIR = os.path.join(PROTOBUF_DIR, "python")
env = os.environ
env["PATH"] = env["PATH"] + ":" + os.path.join(INSTALL_DIR, "bin")
env["LD_LIBRARY_PATH"] = os.path.join(INSTALL_DIR, "lib")
subprocess.check_call([PYTHON_EXE, "setup.py", "build"], cwd=PROTOBUF_PY_DIR, env=env)
expected = [{"ran": 216, "failures": 0, "errors": 1}]
expected = [{"ran": 216}]
run_test([PYTHON_EXE, "setup.py", "test"], cwd=PROTOBUF_PY_DIR, expected=expected, env=env)
create_virtenv(ENV_NAME, None, force_create = True)
install_and_test_protobuf()
......@@ -37,13 +37,26 @@ class Both(object):
def __setslice__(self, start, stop, item):
print "called setslice on object", start, stop
class IndexZero(object):
def __index__(self):
return 0
def __repr__(self):
return "0"
class FalseIndex(object):
def __index__(self):
return "troll"
indexable = Indexable()
sliceable = Sliceable()
index_zero = IndexZero()
false_index = FalseIndex()
both = Both()
numbers = range(10)
# Can use index and slice notation for object with only getitem
indexable[0]
indexable[index_zero]
indexable[:10]
indexable[11:]
indexable[:]
......@@ -61,17 +74,22 @@ del indexable[:]
del indexable[3:8]
del indexable[slice(1,12,2)]
# Can't use index notation or pass in a slice for objects with only getslice
try:
sliceable[0]
except TypeError:
print "no index notation with index"
print "can't use index notation or pass in a slice for objects with only getslice"
try:
sliceable['a':'b']
except TypeError:
print "can't pass in any type into a slice with only getslice"
try:
sliceable[1:10:2]
except TypeError:
print "need getitem to support variable-sized steps"
sliceable[index_zero:index_zero]
sliceable[:10]
sliceable[11:]
sliceable[:]
......@@ -88,11 +106,21 @@ both[:] = xrange(2)
both[3:8] = xrange(2)
both[::2] = xrange(2)
# Should all call getitem as a fallback
both['a']
both['a':'b']
both['a':'b':'c']
del both[0]
del both[:]
del both[3:8]
del both [::2]
try:
both[false_index:false_index]
except TypeError:
print "even if we have getitem, __index__ should not return a non-int"
# Number lists should have the set/get/del|item/slice functions
print numbers[0]
print numbers[:]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment