Commit 028b1922 authored by Julien Muchembled's avatar Julien Muchembled

Fix possible data corruption after FileStorage is truncated to roll back a transaction

Multi-threaded IO support, which is new to ZODB 3.10, allows clients to read
data (load & loadBefore) even after tpc_vote has started to write a new
transaction to disk. This is done by using different 'file' objects.

Issues start when a transaction is rolled back after data has been appended
(using the writing file object). Truncating is not enough because the FilePool
may have been used concurrently to read the end of the last transaction:
file objects have their own read buffers which, in this case, may also contain
the beginning of the aborted transaction.

So a solution is to invalidate read buffers whenever they may contain wrong
data. This patch does it on truncation, which happens rarely enough to not
affect performance.

We discovered this bug in the following conditions:
- ZODB splitted in several FileStorage
- many conflicts in the first committed DB, but always resolved
- unresolved conflict in another DB
If the transaction is replayed with success (no more conflict in the other DB),
a subsequent load of the object that could be resolved in the first DB may, for
example, return a wrong serial (tid of the aborted transaction) if the layout
of the committed transaction matches that of the aborted one.

The bug usually manifests with POSKeyError & CorruptedDataError exceptions in
ZEO logs, for example while trying to resolve a conflict (and restarting the
transaction does not help, causing Site Errors in Zope). But theorically,
this could also cause silent corruption or unpickling errors at client side.
parent b83ac1c2
...@@ -20,6 +20,7 @@ import contextlib ...@@ -20,6 +20,7 @@ import contextlib
import errno import errno
import logging import logging
import os import os
import sys
import threading import threading
import time import time
from struct import pack from struct import pack
...@@ -722,6 +723,7 @@ class FileStorage( ...@@ -722,6 +723,7 @@ class FileStorage(
# Hm, an error occurred writing out the data. Maybe the # Hm, an error occurred writing out the data. Maybe the
# disk is full. We don't want any turd at the end. # disk is full. We don't want any turd at the end.
self._file.truncate(self._pos) self._file.truncate(self._pos)
self._files.flush()
raise raise
self._nextpos = self._pos + (tl + 8) self._nextpos = self._pos + (tl + 8)
...@@ -776,6 +778,7 @@ class FileStorage( ...@@ -776,6 +778,7 @@ class FileStorage(
def _abort(self): def _abort(self):
if self._nextpos: if self._nextpos:
self._file.truncate(self._pos) self._file.truncate(self._pos)
self._files.flush()
self._nextpos=0 self._nextpos=0
self._blob_tpc_abort() self._blob_tpc_abort()
...@@ -2093,6 +2096,22 @@ class FilePool: ...@@ -2093,6 +2096,22 @@ class FilePool:
while self._files: while self._files:
self._files.pop().close() self._files.pop().close()
def flush(self):
"""Empty read buffers.
This is required if they contain data of rolled back transactions.
"""
with self.write_lock():
for f in self._files:
f.flush()
# Unfortunately, Python 3.x has no API to flush read buffers.
if sys.version_info.major > 2:
def flush(self):
with self.write_lock():
self.empty()
def close(self): def close(self):
with self._cond: with self._cond:
self.closed = True self.closed = True
......
...@@ -25,6 +25,7 @@ import zope.testing.setupstack ...@@ -25,6 +25,7 @@ import zope.testing.setupstack
from ZODB import POSException from ZODB import POSException
from ZODB import DB from ZODB import DB
from ZODB.fsIndex import fsIndex from ZODB.fsIndex import fsIndex
from ZODB.utils import U64, p64, z64
from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage
from ZODB.tests import PackableStorage, Synchronization, ConflictResolution from ZODB.tests import PackableStorage, Synchronization, ConflictResolution
...@@ -217,7 +218,6 @@ class FileStorageTests( ...@@ -217,7 +218,6 @@ class FileStorageTests(
# global. # global.
import time import time
from ZODB.utils import U64, p64
from ZODB.FileStorage.format import CorruptedError from ZODB.FileStorage.format import CorruptedError
from ZODB.serialize import referencesf from ZODB.serialize import referencesf
...@@ -285,6 +285,27 @@ class FileStorageTests( ...@@ -285,6 +285,27 @@ class FileStorageTests(
else: else:
self.assertNotEqual(next_oid, None) self.assertNotEqual(next_oid, None)
def checkFlushAfterTruncate(self, fail=False):
r0 = self._dostore(z64)
storage = self._storage
t = transaction.Transaction()
storage.tpc_begin(t)
storage.store(z64, r0, b'foo', b'', t)
storage.tpc_vote(t)
# Read operations are done with separate 'file' objects with their
# own buffers: here, the buffer also includes voted data.
storage.load(z64)
# This must invalidate all read buffers.
storage.tpc_abort(t)
self._dostore(z64, r0, b'bar', 1)
# In the case that read buffers were not invalidated, return value
# is based on what was cached during the first load.
self.assertEqual(storage.load(z64)[0], b'foo' if fail else b'bar')
def checkFlushNeededAfterTruncate(self):
self._storage._files.flush = lambda: None
self.checkFlushAfterTruncate(True)
class FileStorageHexTests(FileStorageTests): class FileStorageHexTests(FileStorageTests):
def open(self, **kwargs): def open(self, **kwargs):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment