Commit 59fb6342 authored by Serhiy Storchaka's avatar Serhiy Storchaka

Issue #25761: Improved detecting errors in broken pickle data.

parent c68e723e
......@@ -1031,7 +1031,7 @@ class _Unpickler:
self._unframer = _Unframer(self._file_read, self._file_readline)
self.read = self._unframer.read
self.readline = self._unframer.readline
self.mark = object() # any new unique object
self.metastack = []
self.stack = []
self.append = self.stack.append
self.proto = 0
......@@ -1047,20 +1047,12 @@ class _Unpickler:
except _Stop as stopinst:
return stopinst.value
# Return largest index k such that self.stack[k] is self.mark.
# If the stack doesn't contain a mark, eventually raises IndexError.
# This could be sped by maintaining another stack, of indices at which
# the mark appears. For that matter, the latter stack would suffice,
# and we wouldn't need to push mark objects on self.stack at all.
# Doing so is probably a good thing, though, since if the pickle is
# corrupt (or hostile) we may get a clue from finding self.mark embedded
# in unpickled objects.
def marker(self):
stack = self.stack
mark = self.mark
k = len(stack)-1
while stack[k] is not mark: k = k-1
return k
# Return a list of items pushed in the stack after last MARK instruction.
def pop_mark(self):
items = self.stack
self.stack = self.metastack.pop()
self.append = self.stack.append
return items
def persistent_load(self, pid):
raise UnpicklingError("unsupported persistent id encountered")
......@@ -1237,8 +1229,8 @@ class _Unpickler:
dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
def load_tuple(self):
k = self.marker()
self.stack[k:] = [tuple(self.stack[k+1:])]
items = self.pop_mark()
self.append(tuple(items))
dispatch[TUPLE[0]] = load_tuple
def load_empty_tuple(self):
......@@ -1270,21 +1262,20 @@ class _Unpickler:
dispatch[EMPTY_SET[0]] = load_empty_set
def load_frozenset(self):
k = self.marker()
self.stack[k:] = [frozenset(self.stack[k+1:])]
items = self.pop_mark()
self.append(frozenset(items))
dispatch[FROZENSET[0]] = load_frozenset
def load_list(self):
k = self.marker()
self.stack[k:] = [self.stack[k+1:]]
items = self.pop_mark()
self.append(items)
dispatch[LIST[0]] = load_list
def load_dict(self):
k = self.marker()
items = self.stack[k+1:]
items = self.pop_mark()
d = {items[i]: items[i+1]
for i in range(0, len(items), 2)}
self.stack[k:] = [d]
self.append(d)
dispatch[DICT[0]] = load_dict
# INST and OBJ differ only in how they get a class object. It's not
......@@ -1292,9 +1283,7 @@ class _Unpickler:
# previously diverged and grew different bugs.
# klass is the class to instantiate, and k points to the topmost mark
# object, following which are the arguments for klass.__init__.
def _instantiate(self, klass, k):
args = tuple(self.stack[k+1:])
del self.stack[k:]
def _instantiate(self, klass, args):
if (args or not isinstance(klass, type) or
hasattr(klass, "__getinitargs__")):
try:
......@@ -1310,14 +1299,14 @@ class _Unpickler:
module = self.readline()[:-1].decode("ascii")
name = self.readline()[:-1].decode("ascii")
klass = self.find_class(module, name)
self._instantiate(klass, self.marker())
self._instantiate(klass, self.pop_mark())
dispatch[INST[0]] = load_inst
def load_obj(self):
# Stack is ... markobject classobject arg1 arg2 ...
k = self.marker()
klass = self.stack.pop(k+1)
self._instantiate(klass, k)
args = self.pop_mark()
cls = args.pop(0)
self._instantiate(cls, args)
dispatch[OBJ[0]] = load_obj
def load_newobj(self):
......@@ -1402,12 +1391,14 @@ class _Unpickler:
dispatch[REDUCE[0]] = load_reduce
def load_pop(self):
del self.stack[-1]
if self.stack:
del self.stack[-1]
else:
self.pop_mark()
dispatch[POP[0]] = load_pop
def load_pop_mark(self):
k = self.marker()
del self.stack[k:]
self.pop_mark()
dispatch[POP_MARK[0]] = load_pop_mark
def load_dup(self):
......@@ -1463,17 +1454,14 @@ class _Unpickler:
dispatch[APPEND[0]] = load_append
def load_appends(self):
stack = self.stack
mark = self.marker()
list_obj = stack[mark - 1]
items = stack[mark + 1:]
items = self.pop_mark()
list_obj = self.stack[-1]
if isinstance(list_obj, list):
list_obj.extend(items)
else:
append = list_obj.append
for item in items:
append(item)
del stack[mark:]
dispatch[APPENDS[0]] = load_appends
def load_setitem(self):
......@@ -1485,27 +1473,21 @@ class _Unpickler:
dispatch[SETITEM[0]] = load_setitem
def load_setitems(self):
stack = self.stack
mark = self.marker()
dict = stack[mark - 1]
for i in range(mark + 1, len(stack), 2):
dict[stack[i]] = stack[i + 1]
del stack[mark:]
items = self.pop_mark()
dict = self.stack[-1]
for i in range(0, len(items), 2):
dict[items[i]] = items[i + 1]
dispatch[SETITEMS[0]] = load_setitems
def load_additems(self):
stack = self.stack
mark = self.marker()
set_obj = stack[mark - 1]
items = stack[mark + 1:]
items = self.pop_mark()
set_obj = self.stack[-1]
if isinstance(set_obj, set):
set_obj.update(items)
else:
add = set_obj.add
for item in items:
add(item)
del stack[mark:]
dispatch[ADDITEMS[0]] = load_additems
def load_build(self):
......@@ -1533,7 +1515,9 @@ class _Unpickler:
dispatch[BUILD[0]] = load_build
def load_mark(self):
self.append(self.mark)
self.metastack.append(self.stack)
self.stack = []
self.append = self.stack.append
dispatch[MARK[0]] = load_mark
def load_stop(self):
......
......@@ -1000,7 +1000,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'0', # POP
b'1', # POP_MARK
b'2', # DUP
# b'(2', # PyUnpickler doesn't raise
b'(2',
b'R', # REDUCE
b')R',
b'a', # APPEND
......@@ -1009,7 +1009,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'Nb',
b'd', # DICT
b'e', # APPENDS
# b'(e', # PyUnpickler raises AttributeError
b'(e',
b'ibuiltins\nlist\n', # INST
b'l', # LIST
b'o', # OBJ
......@@ -1022,7 +1022,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'NNs',
b't', # TUPLE
b'u', # SETITEMS
# b'(u', # PyUnpickler doesn't raise
b'(u',
b'}(Nu',
b'\x81', # NEWOBJ
b')\x81',
......@@ -1033,7 +1033,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'N\x87',
b'NN\x87',
b'\x90', # ADDITEMS
# b'(\x90', # PyUnpickler raises AttributeError
b'(\x90',
b'\x91', # FROZENSET
b'\x92', # NEWOBJ_EX
b')}\x92',
......@@ -1046,7 +1046,7 @@ class AbstractUnpickleTests(unittest.TestCase):
def test_bad_mark(self):
badpickles = [
# b'N(.', # STOP
b'N(.', # STOP
b'N(2', # DUP
b'cbuiltins\nlist\n)(R', # REDUCE
b'cbuiltins\nlist\n()R',
......@@ -1081,7 +1081,7 @@ class AbstractUnpickleTests(unittest.TestCase):
b'N(\x94', # MEMOIZE
]
for p in badpickles:
self.check_unpickling_error(self.bad_mark_errors, p)
self.check_unpickling_error(self.bad_stack_errors, p)
def test_truncated_data(self):
self.check_unpickling_error(EOFError, b'')
......@@ -2581,11 +2581,6 @@ class AbstractPickleModuleTests(unittest.TestCase):
self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
def test_bad_input(self):
# Test issue4298
s = bytes([0x58, 0, 0, 0, 0x54])
self.assertRaises(EOFError, pickle.loads, s)
class AbstractPersistentPicklerTests(unittest.TestCase):
......
......@@ -33,8 +33,6 @@ class PyUnpicklerTests(AbstractUnpickleTests):
unpickler = pickle._Unpickler
bad_stack_errors = (IndexError,)
bad_mark_errors = (IndexError, pickle.UnpicklingError,
TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
......@@ -69,8 +67,6 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
pickler = pickle._Pickler
unpickler = pickle._Unpickler
bad_stack_errors = (pickle.UnpicklingError, IndexError)
bad_mark_errors = (pickle.UnpicklingError, IndexError,
TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
......@@ -132,7 +128,6 @@ if has_c_implementation:
class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
bad_mark_errors = (EOFError,)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError)
......
......@@ -109,6 +109,8 @@ Core and Builtins
Library
-------
- Issue #25761: Improved detecting errors in broken pickle data.
- Issue #25717: Restore the previous behaviour of tolerating most fstat()
errors when opening files. This was a regression in 3.5a1, and stopped
anonymous temporary files from working in special cases.
......
......@@ -370,18 +370,12 @@ _Pickle_FastCall(PyObject *func, PyObject *obj)
/*************************************************************************/
static int
stack_underflow(void)
{
PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError, "unpickling stack underflow");
return -1;
}
/* Internal data type used as the unpickling stack. */
typedef struct {
PyObject_VAR_HEAD
PyObject **data;
int mark_set; /* is MARK set? */
Py_ssize_t fence; /* position of top MARK or 0 */
Py_ssize_t allocated; /* number of slots in data allocated */
} Pdata;
......@@ -412,6 +406,8 @@ Pdata_New(void)
if (!(self = PyObject_New(Pdata, &Pdata_Type)))
return NULL;
Py_SIZE(self) = 0;
self->mark_set = 0;
self->fence = 0;
self->allocated = 8;
self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
if (self->data)
......@@ -429,8 +425,7 @@ Pdata_clear(Pdata *self, Py_ssize_t clearto)
{
Py_ssize_t i = Py_SIZE(self);
if (clearto < 0)
return stack_underflow();
assert(clearto >= self->fence);
if (clearto >= i)
return 0;
......@@ -466,6 +461,17 @@ Pdata_grow(Pdata *self)
return -1;
}
static int
Pdata_stack_underflow(Pdata *self)
{
PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError,
self->mark_set ?
"unexpected MARK found" :
"unpickling stack underflow");
return -1;
}
/* D is a Pdata*. Pop the topmost element and store it into V, which
* must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
* is raised and V is set to NULL.
......@@ -473,9 +479,8 @@ Pdata_grow(Pdata *self)
static PyObject *
Pdata_pop(Pdata *self)
{
if (Py_SIZE(self) == 0) {
PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError, "bad pickle data");
if (Py_SIZE(self) <= self->fence) {
Pdata_stack_underflow(self);
return NULL;
}
return self->data[--Py_SIZE(self)];
......@@ -507,6 +512,10 @@ Pdata_poptuple(Pdata *self, Py_ssize_t start)
PyObject *tuple;
Py_ssize_t len, i, j;
if (start < self->fence) {
Pdata_stack_underflow(self);
return NULL;
}
len = Py_SIZE(self) - start;
tuple = PyTuple_New(len);
if (tuple == NULL)
......@@ -4585,13 +4594,19 @@ find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
static Py_ssize_t
marker(UnpicklerObject *self)
{
PickleState *st = _Pickle_GetGlobalState();
Py_ssize_t mark;
if (self->num_marks < 1) {
PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError, "could not find MARK");
return -1;
}
return self->marks[--self->num_marks];
mark = self->marks[--self->num_marks];
self->stack->mark_set = self->num_marks != 0;
self->stack->fence = self->num_marks ?
self->marks[self->num_marks - 1] : 0;
return mark;
}
static int
......@@ -5052,7 +5067,7 @@ load_counted_tuple(UnpicklerObject *self, int len)
PyObject *tuple;
if (Py_SIZE(self->stack) < len)
return stack_underflow();
return Pdata_stack_underflow(self->stack);
tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
if (tuple == NULL)
......@@ -5134,6 +5149,12 @@ load_dict(UnpicklerObject *self)
if ((dict = PyDict_New()) == NULL)
return -1;
if ((j - i) % 2 != 0) {
PickleState *st = _Pickle_GetGlobalState();
PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
return -1;
}
for (k = i + 1; k < j; k += 2) {
key = self->stack->data[k - 1];
value = self->stack->data[k];
......@@ -5201,7 +5222,7 @@ load_obj(UnpicklerObject *self)
return -1;
if (Py_SIZE(self->stack) - i < 1)
return stack_underflow();
return Pdata_stack_underflow(self->stack);
args = Pdata_poptuple(self->stack, i + 1);
if (args == NULL)
......@@ -5518,12 +5539,15 @@ load_pop(UnpicklerObject *self)
*/
if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
self->num_marks--;
} else if (len > 0) {
self->stack->mark_set = self->num_marks != 0;
self->stack->fence = self->num_marks ?
self->marks[self->num_marks - 1] : 0;
} else if (len <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
else {
len--;
Py_DECREF(self->stack->data[len]);
Py_SIZE(self->stack) = len;
} else {
return stack_underflow();
}
return 0;
}
......@@ -5545,10 +5569,10 @@ static int
load_dup(UnpicklerObject *self)
{
PyObject *last;
Py_ssize_t len;
Py_ssize_t len = Py_SIZE(self->stack);
if ((len = Py_SIZE(self->stack)) <= 0)
return stack_underflow();
if (len <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
last = self->stack->data[len - 1];
PDATA_APPEND(self->stack, last, -1);
return 0;
......@@ -5731,8 +5755,8 @@ load_put(UnpicklerObject *self)
return -1;
if (len < 2)
return bad_readline();
if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
if (Py_SIZE(self->stack) <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
key = PyLong_FromString(s, NULL, 10);
......@@ -5760,8 +5784,8 @@ load_binput(UnpicklerObject *self)
if (_Unpickler_Read(self, &s, 1) < 0)
return -1;
if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
if (Py_SIZE(self->stack) <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
idx = Py_CHARMASK(s[0]);
......@@ -5779,8 +5803,8 @@ load_long_binput(UnpicklerObject *self)
if (_Unpickler_Read(self, &s, 4) < 0)
return -1;
if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
if (Py_SIZE(self->stack) <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
idx = calc_binsize(s, 4);
......@@ -5798,8 +5822,8 @@ load_memoize(UnpicklerObject *self)
{
PyObject *value;
if (Py_SIZE(self->stack) <= 0)
return stack_underflow();
if (Py_SIZE(self->stack) <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];
return _Unpickler_MemoPut(self, self->memo_len, value);
......@@ -5813,8 +5837,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
Py_ssize_t len, i;
len = Py_SIZE(self->stack);
if (x > len || x <= 0)
return stack_underflow();
if (x > len || x <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
if (len == x) /* nothing to do */
return 0;
......@@ -5863,8 +5887,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
static int
load_append(UnpicklerObject *self)
{
if (Py_SIZE(self->stack) - 1 <= 0)
return stack_underflow();
if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
return do_append(self, Py_SIZE(self->stack) - 1);
}
......@@ -5886,8 +5910,8 @@ do_setitems(UnpicklerObject *self, Py_ssize_t x)
int status = 0;
len = Py_SIZE(self->stack);
if (x > len || x <= 0)
return stack_underflow();
if (x > len || x <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
if (len == x) /* nothing to do */
return 0;
if ((len - x) % 2 != 0) {
......@@ -5940,8 +5964,8 @@ load_additems(UnpicklerObject *self)
if (mark < 0)
return -1;
len = Py_SIZE(self->stack);
if (mark > len || mark <= 0)
return stack_underflow();
if (mark > len || mark <= self->stack->fence)
return Pdata_stack_underflow(self->stack);
if (len == mark) /* nothing to do */
return 0;
......@@ -5996,8 +6020,8 @@ load_build(UnpicklerObject *self)
/* Stack is ... instance, state. We want to leave instance at
* the stack top, possibly mutated via instance.__setstate__(state).
*/
if (Py_SIZE(self->stack) < 2)
return stack_underflow();
if (Py_SIZE(self->stack) - 2 < self->stack->fence)
return Pdata_stack_underflow(self->stack);
PDATA_POP(self->stack, state);
if (state == NULL)
......@@ -6133,7 +6157,8 @@ load_mark(UnpicklerObject *self)
self->marks_size = (Py_ssize_t)alloc;
}
self->marks[self->num_marks++] = Py_SIZE(self->stack);
self->stack->mark_set = 1;
self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
return 0;
}
......@@ -6216,6 +6241,8 @@ load(UnpicklerObject *self)
char *s = NULL;
self->num_marks = 0;
self->stack->mark_set = 0;
self->stack->fence = 0;
self->proto = 0;
if (Py_SIZE(self->stack))
Pdata_clear(self->stack, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment