Bug #19148 Backup causes cluster down if _really_ early abort happens

Early abort, failing to allocate buffers, would go down the abort track and
 end up trying to sendDropTriggers (as the standard stop backup does). 
However, it is possible to not have any tables yet defined in the backup 
(due to allocating buffers is before this). Simple check in sendDropTrig 
(and continue to next step). Files may be open, so we continue to close them.

Also updated ERROR_Codes (and added one to test this bug).
parent 889b3a00
......@@ -8,7 +8,7 @@ Next DBDICT 6007
Next DBDIH 7178
Next DBTC 8038
Next CMVMI 9000
Next BACKUP 10022
Next BACKUP 10036
Next DBUTIL 11002
Next DBTUX 12008
Next SUMA 13001
......@@ -411,6 +411,11 @@ Backup Stuff:
10028: Abort backup by error at reception of BACKUP_FRAGMENT_CONF at master (code 305)
10029: Abort backup by error at reception of FSAPPENDCONF in slave (FileOrScanError = 5)
10030: Simulate buffer full from trigger execution => abort backup
10031: Error 331 for dictCommitTableMutex_locked
10032: backup checkscan
10033: backup checkscan
10034: define backup reply error
10035: Fail to allocate buffers
11001: Send UTIL_SEQUENCE_REF (in master)
......
......@@ -2004,7 +2004,15 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr)
if (ptr.p->slaveData.dropTrig.tableId == RNIL) {
jam();
ptr.p->tables.first(tabPtr);
if(ptr.p->tables.count())
ptr.p->tables.first(tabPtr);
else
{
// Early abort, go to close files
jam();
closeFiles(signal, ptr);
return;
}
} else {
jam();
ndbrequire(findTable(ptr, tabPtr, ptr.p->slaveData.dropTrig.tableId));
......@@ -2105,8 +2113,11 @@ Backup::execDROP_TRIG_REF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
if(ref->getConf()->getTriggerId() != -1)
{
ndbout << "ERROR DROPPING TRIGGER: " << ref->getConf()->getTriggerId();
ndbout << " Err: " << (Uint32)ref->getErrorCode() << endl << endl;
}
dropTrigReply(signal, ptr);
}
......@@ -2538,8 +2549,9 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
files[i].p->filePointer = RNIL;
files[i].p->m_flags = 0;
files[i].p->errorCode = 0;
if(files[i].p->pages.seize(noOfPages[i]) == false) {
if(ERROR_INSERTED(10035) || files[i].p->pages.seize(noOfPages[i]) == false)
{
jam();
DEBUG_OUT("Failed to seize " << noOfPages[i] << " pages");
defineBackupRef(signal, ptr, DefineBackupRef::FailedToAllocateBuffers);
......@@ -4451,14 +4463,24 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
}
jam();
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
BackupFilePtr filePtr;
if(ptr.p->logFilePtr != RNIL)
{
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
conf->noOfLogBytes= filePtr.p->operation.noOfBytes;
conf->noOfLogRecords= filePtr.p->operation.noOfRecords;
}
else
{
conf->noOfLogBytes= 0;
conf->noOfLogRecords= 0;
}
sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
StopBackupConf::SignalLength, JBB);
......
......@@ -427,6 +427,7 @@ public:
: slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
, tables(tp), triggers(trp), files(bp)
, masterData(b), backup(b)
, ctlFilePtr(RNIL), logFilePtr(RNIL), dataFilePtr(RNIL)
{
}
......
......@@ -350,7 +350,8 @@ int
FailS_codes[] = {
10025,
10027,
10033
10033,
10035
};
int
......@@ -362,7 +363,8 @@ FailM_codes[] = {
10027,
10028,
10031,
10033
10033,
10035
};
int
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment