Bug #19148 Backup causes cluster down if _really_ early abort happens

Early abort, failing to allocate buffers, would go down the abort track and end up trying to sendDropTriggers (as the standard stop backup does). However, it is possible to not have any tables yet defined in the backup (due to allocating buffers is before this). Simple check in sendDropTrig (and continue to next step). Files may be open, so we continue to close them. Also updated ERROR_Codes (and added one to test this bug).

Bug #19148 Backup causes cluster down if _really_ early abort happens
Early abort, failing to allocate buffers, would go down the abort track and end up trying to sendDropTriggers (as the standard stop backup does). However, it is possible to not have any tables yet defined in the backup (due to allocating buffers is before this). Simple check in sendDropTrig (and continue to next step). Files may be open, so we continue to close them. Also updated ERROR_Codes (and added one to test this bug).
73831dad · stewart@willster.(none) · 889b3a00 · 73831dad · 73831dad · 73831dad
Commit 73831dad authored Oct 18, 2006 by stewart@willster.(none)
4 changed files
--- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt
+++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt
@@ -8,7 +8,7 @@ Next DBDICT 6007
 Next DBDIH 7178
 Next DBTC 8038
 Next CMVMI 9000
-Next BACKUP 10022
+Next BACKUP 10036
 Next DBUTIL 11002
 Next DBTUX 12008
 Next SUMA 13001
@@ -411,6 +411,11 @@ Backup Stuff:
 10028: Abort backup by error at reception of BACKUP_FRAGMENT_CONF at master (code 305)
 10029: Abort backup by error at reception of FSAPPENDCONF in slave (FileOrScanError = 5)
 10030: Simulate buffer full from trigger execution => abort backup
+10031: Error 331 for dictCommitTableMutex_locked
+10032: backup checkscan
+10033: backup checkscan
+10034: define backup reply error
+10035: Fail to allocate buffers

 11001: Send UTIL_SEQUENCE_REF (in master)


--- a/storage/ndb/src/kernel/blocks/backup/Backup.cpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.cpp
@@ -2004,7 +2004,15 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr)

  if (ptr.p->slaveData.dropTrig.tableId == RNIL) {
    jam();
-    ptr.p->tables.first(tabPtr);
+    if(ptr.p->tables.count())
+      ptr.p->tables.first(tabPtr);
+    else
+    {
+      // Early abort, go to close files
+      jam();
+      closeFiles(signal, ptr);
+      return;
+    }
  } else {
    jam();
    ndbrequire(findTable(ptr, tabPtr, ptr.p->slaveData.dropTrig.tableId));
@@ -2105,8 +2113,11 @@ Backup::execDROP_TRIG_REF(Signal* signal)
  BackupRecordPtr ptr;
  c_backupPool.getPtr(ptr, ptrI);

-  ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
-  ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
+  if(ref->getConf()->getTriggerId() != -1)
+  {
+    ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
+    ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
+  }

  dropTrigReply(signal, ptr);
 }
@@ -2538,8 +2549,9 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
    files[i].p->filePointer = RNIL;
    files[i].p->m_flags = 0;
    files[i].p->errorCode = 0;
-    
-    if(files[i].p->pages.seize(noOfPages[i]) == false) {
+
+    if(ERROR_INSERTED(10035) || files[i].p->pages.seize(noOfPages[i]) == false)
+    {
      jam();
      DEBUG_OUT("Failed to seize " << noOfPages[i] << " pages");
      defineBackupRef(signal, ptr, DefineBackupRef::FailedToAllocateBuffers);
@@ -4451,14 +4463,24 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
  }
  
  jam();
-  BackupFilePtr filePtr;
-  ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
-  
+
  StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
  conf->backupId = ptr.p->backupId;
  conf->backupPtr = ptr.i;
-  conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
-  conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
+
+  BackupFilePtr filePtr;
+  if(ptr.p->logFilePtr != RNIL)
+  {
+    ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
+    conf->noOfLogBytes= filePtr.p->operation.noOfBytes;
+    conf->noOfLogRecords= filePtr.p->operation.noOfRecords;
+  }
+  else
+  {
+    conf->noOfLogBytes= 0;
+    conf->noOfLogRecords= 0;
+  }
+
  sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
 	     StopBackupConf::SignalLength, JBB);
  

--- a/storage/ndb/src/kernel/blocks/backup/Backup.hpp
+++ b/storage/ndb/src/kernel/blocks/backup/Backup.hpp
@@ -427,6 +427,7 @@ public:
      : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
      , tables(tp), triggers(trp), files(bp)
      , masterData(b), backup(b)
+      , ctlFilePtr(RNIL), logFilePtr(RNIL), dataFilePtr(RNIL)
      {
      }
    

--- a/storage/ndb/test/src/NdbBackup.cpp
+++ b/storage/ndb/test/src/NdbBackup.cpp
@@ -350,7 +350,8 @@ int
 FailS_codes[] = {
  10025,
  10027,
-  10033
+  10033,
+  10035
 };

 int
@@ -362,7 +363,8 @@ FailM_codes[] = {
  10027,
  10028,
  10031,
-  10033
+  10033,
+  10035
 };

 int