Commit 3052f922 authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #12848: The pure Python pickle implementation now treats object lengths...

Issue #12848: The pure Python pickle implementation now treats object lengths as unsigned 32-bit integers, like the C implementation does.
Patch by Serhiy Storchaka.
parent 43cc0475
...@@ -265,7 +265,7 @@ class _Pickler: ...@@ -265,7 +265,7 @@ class _Pickler:
if i < 256: if i < 256:
return BINPUT + bytes([i]) return BINPUT + bytes([i])
else: else:
return LONG_BINPUT + pack("<i", i) return LONG_BINPUT + pack("<I", i)
return PUT + repr(i).encode("ascii") + b'\n' return PUT + repr(i).encode("ascii") + b'\n'
...@@ -275,7 +275,7 @@ class _Pickler: ...@@ -275,7 +275,7 @@ class _Pickler:
if i < 256: if i < 256:
return BINGET + bytes([i]) return BINGET + bytes([i])
else: else:
return LONG_BINGET + pack("<i", i) return LONG_BINGET + pack("<I", i)
return GET + repr(i).encode("ascii") + b'\n' return GET + repr(i).encode("ascii") + b'\n'
...@@ -497,7 +497,7 @@ class _Pickler: ...@@ -497,7 +497,7 @@ class _Pickler:
if n < 256: if n < 256:
self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj)) self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
else: else:
self.write(BINBYTES + pack("<i", n) + bytes(obj)) self.write(BINBYTES + pack("<I", n) + bytes(obj))
self.memoize(obj) self.memoize(obj)
dispatch[bytes] = save_bytes dispatch[bytes] = save_bytes
...@@ -505,7 +505,7 @@ class _Pickler: ...@@ -505,7 +505,7 @@ class _Pickler:
if self.bin: if self.bin:
encoded = obj.encode('utf-8', 'surrogatepass') encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded) n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded) self.write(BINUNICODE + pack("<I", n) + encoded)
else: else:
obj = obj.replace("\\", "\\u005c") obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\n", "\\u000a") obj = obj.replace("\n", "\\u000a")
...@@ -925,6 +925,9 @@ class _Unpickler: ...@@ -925,6 +925,9 @@ class _Unpickler:
def load_long4(self): def load_long4(self):
n = mloads(b'i' + self.read(4)) n = mloads(b'i' + self.read(4))
if n < 0:
# Corrupt or hostile pickle -- we never write one like this
raise UnpicklingError("LONG pickle has negative byte count");
data = self.read(n) data = self.read(n)
self.append(decode_long(data)) self.append(decode_long(data))
dispatch[LONG4[0]] = load_long4 dispatch[LONG4[0]] = load_long4
...@@ -953,14 +956,19 @@ class _Unpickler: ...@@ -953,14 +956,19 @@ class _Unpickler:
dispatch[STRING[0]] = load_string dispatch[STRING[0]] = load_string
def load_binstring(self): def load_binstring(self):
# Deprecated BINSTRING uses signed 32-bit length
len = mloads(b'i' + self.read(4)) len = mloads(b'i' + self.read(4))
if len < 0:
raise UnpicklingError("BINSTRING pickle has negative byte count");
data = self.read(len) data = self.read(len)
value = str(data, self.encoding, self.errors) value = str(data, self.encoding, self.errors)
self.append(value) self.append(value)
dispatch[BINSTRING[0]] = load_binstring dispatch[BINSTRING[0]] = load_binstring
def load_binbytes(self): def load_binbytes(self, unpack=struct.unpack, maxsize=sys.maxsize):
len = mloads(b'i' + self.read(4)) len, = unpack('<I', self.read(4))
if len > maxsize:
raise UnpicklingError("BINBYTES exceeds system's maximum size of %d bytes" % maxsize);
self.append(self.read(len)) self.append(self.read(len))
dispatch[BINBYTES[0]] = load_binbytes dispatch[BINBYTES[0]] = load_binbytes
...@@ -968,8 +976,10 @@ class _Unpickler: ...@@ -968,8 +976,10 @@ class _Unpickler:
self.append(str(self.readline()[:-1], 'raw-unicode-escape')) self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
dispatch[UNICODE[0]] = load_unicode dispatch[UNICODE[0]] = load_unicode
def load_binunicode(self): def load_binunicode(self, unpack=struct.unpack, maxsize=sys.maxsize):
len = mloads(b'i' + self.read(4)) len, = unpack('<I', self.read(4))
if len > maxsize:
raise UnpicklingError("BINUNICODE exceeds system's maximum size of %d bytes" % maxsize);
self.append(str(self.read(len), 'utf-8', 'surrogatepass')) self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode dispatch[BINUNICODE[0]] = load_binunicode
...@@ -1100,6 +1110,9 @@ class _Unpickler: ...@@ -1100,6 +1110,9 @@ class _Unpickler:
return return
key = _inverted_registry.get(code) key = _inverted_registry.get(code)
if not key: if not key:
if code <= 0: # note that 0 is forbidden
# Corrupt or hostile pickle.
raise UnpicklingError("EXT specifies code <= 0");
raise ValueError("unregistered extension code %d" % code) raise ValueError("unregistered extension code %d" % code)
obj = self.find_class(*key) obj = self.find_class(*key)
_extension_cache[code] = obj _extension_cache[code] = obj
...@@ -1153,8 +1166,8 @@ class _Unpickler: ...@@ -1153,8 +1166,8 @@ class _Unpickler:
self.append(self.memo[i]) self.append(self.memo[i])
dispatch[BINGET[0]] = load_binget dispatch[BINGET[0]] = load_binget
def load_long_binget(self): def load_long_binget(self, unpack=struct.unpack):
i = mloads(b'i' + self.read(4)) i, = unpack('<I', self.read(4))
self.append(self.memo[i]) self.append(self.memo[i])
dispatch[LONG_BINGET[0]] = load_long_binget dispatch[LONG_BINGET[0]] = load_long_binget
...@@ -1172,9 +1185,9 @@ class _Unpickler: ...@@ -1172,9 +1185,9 @@ class _Unpickler:
self.memo[i] = self.stack[-1] self.memo[i] = self.stack[-1]
dispatch[BINPUT[0]] = load_binput dispatch[BINPUT[0]] = load_binput
def load_long_binput(self): def load_long_binput(self, unpack=struct.unpack, maxsize=sys.maxsize):
i = mloads(b'i' + self.read(4)) i, = unpack('<I', self.read(4))
if i < 0: if i > maxsize:
raise ValueError("negative LONG_BINPUT argument") raise ValueError("negative LONG_BINPUT argument")
self.memo[i] = self.stack[-1] self.memo[i] = self.stack[-1]
dispatch[LONG_BINPUT[0]] = load_long_binput dispatch[LONG_BINPUT[0]] = load_long_binput
......
...@@ -167,6 +167,10 @@ Core and Builtins ...@@ -167,6 +167,10 @@ Core and Builtins
Library Library
------- -------
- Issue #12848: The pure Python pickle implementation now treats object
lengths as unsigned 32-bit integers, like the C implementation does.
Patch by Serhiy Storchaka.
- Issue #1160: Fix compiling large regular expressions on UCS2 builds. - Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka. Patch by Serhiy Storchaka.
......
...@@ -1579,7 +1579,7 @@ save_long(PicklerObject *self, PyObject *obj) ...@@ -1579,7 +1579,7 @@ save_long(PicklerObject *self, PyObject *obj)
* byte at the start, and cut it back later if possible. * byte at the start, and cut it back later if possible.
*/ */
nbytes = (nbits >> 3) + 1; nbytes = (nbits >> 3) + 1;
if (nbytes > INT_MAX) { if (nbytes > 0x7fffffffL) {
PyErr_SetString(PyExc_OverflowError, PyErr_SetString(PyExc_OverflowError,
"long too large to pickle"); "long too large to pickle");
goto error; goto error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment