Commit 3d435f55 authored by Julien Muchembled's avatar Julien Muchembled

master: leave backup mode if upstream DB is truncated

Even if the user should have left backup mode before truncating
upstream, this change should help fixing his mistake and minimizing
the risk of data corruption.

The previous behaviour was to crash with:

  RuntimeError: upstream DB truncated

This led to 2 issues:

1. As long as upstream last tid remains older, it is impossible to
   start the backup cluster if it's able to connect to upstream:
   then to avoid another upstream downtime, it is required to fake
   a connection failure, e.g. with temporary firewall rules or
   different --upstream-* parameters, which is not practical.
2. Worse, if there's again new commits upstream with last tid newer
   than on backup, the user may miss to also truncate the backup
   cluster (even more if it's setup to restart automatically).
parent 22eaa437
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from ..app import StateChangedException
from neo.lib import logging
from neo.lib.exception import PrimaryFailure from neo.lib.exception import PrimaryFailure
from neo.lib.handler import EventHandler from neo.lib.handler import EventHandler
from neo.lib.protocol import NodeTypes, NodeStates, Packets from neo.lib.protocol import ClusterStates, NodeTypes, NodeStates, Packets
from neo.lib.pt import PartitionTable from neo.lib.pt import PartitionTable
class BackupHandler(EventHandler): class BackupHandler(EventHandler):
...@@ -60,7 +62,9 @@ class BackupHandler(EventHandler): ...@@ -60,7 +62,9 @@ class BackupHandler(EventHandler):
# backup_tid could remain stuck to an old tid if upstream is idle. # backup_tid could remain stuck to an old tid if upstream is idle.
app.invalidatePartitions(tid, tid, xrange(app.pt.getPartitions())) app.invalidatePartitions(tid, tid, xrange(app.pt.getPartitions()))
else: else:
raise RuntimeError("upstream DB truncated") logging.critical("Upstream DB truncated. Leaving backup mode"
" in case this backup DB needs to be truncated.")
raise StateChangedException(ClusterStates.STOPPING_BACKUP)
app.ignore_invalidations = False app.ignore_invalidations = False
def invalidatePartitions(self, conn, tid, partition_list): def invalidatePartitions(self, conn, tid, partition_list):
......
...@@ -1226,6 +1226,32 @@ class ReplicationTests(NEOThreadedTest): ...@@ -1226,6 +1226,32 @@ class ReplicationTests(NEOThreadedTest):
backup.ticAndJoinStorageTasks() backup.ticAndJoinStorageTasks()
self.assertEqual(1, self.checkBackup(backup)) self.assertEqual(1, self.checkBackup(backup))
@backup_test()
def testUpstreamTruncated(self, backup):
upstream = backup.upstream
importZODB = upstream.importZODB()
importZODB(10)
tid1 = upstream.last_tid
importZODB(10)
tid2 = upstream.last_tid
self.tic()
getBackupState = backup.neoctl.getClusterState
self.assertEqual(getBackupState(), ClusterStates.BACKINGUP)
self.assertEqual(backup.last_tid, tid2)
upstream.neoctl.truncate(tid1)
self.tic()
self.assertEqual(getBackupState(), ClusterStates.RUNNING)
self.assertEqual(backup.last_tid, tid2)
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
self.tic()
self.assertEqual(getBackupState(), ClusterStates.RUNNING)
self.assertEqual(backup.last_tid, tid2)
backup.neoctl.truncate(tid1)
self.tic()
backup.neoctl.setClusterState(ClusterStates.STARTING_BACKUP)
self.tic()
self.assertEqual(getBackupState(), ClusterStates.BACKINGUP)
self.assertEqual(backup.last_tid, tid1)
@backup_test(3) @backup_test(3)
def testDeleteObject(self, backup): def testDeleteObject(self, backup):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment