Commit e6142c47 authored by joreland@mysql.com's avatar joreland@mysql.com

bug#9924 - ndb backup abort handling

  Redo abort handling according to descr. in Backup.txt
bug#9960 - ndb backup
      increase wait completed timeout to 48 hours
parent f931466f
...@@ -75,7 +75,7 @@ class DefineBackupRef { ...@@ -75,7 +75,7 @@ class DefineBackupRef {
friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public: public:
STATIC_CONST( SignalLength = 3 ); STATIC_CONST( SignalLength = 4 );
enum ErrorCode { enum ErrorCode {
Undefined = 1340, Undefined = 1340,
...@@ -92,6 +92,7 @@ private: ...@@ -92,6 +92,7 @@ private:
Uint32 backupId; Uint32 backupId;
Uint32 backupPtr; Uint32 backupPtr;
Uint32 errorCode; Uint32 errorCode;
Uint32 nodeId;
}; };
class DefineBackupConf { class DefineBackupConf {
...@@ -158,7 +159,7 @@ class StartBackupRef { ...@@ -158,7 +159,7 @@ class StartBackupRef {
friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public: public:
STATIC_CONST( SignalLength = 4 ); STATIC_CONST( SignalLength = 5 );
enum ErrorCode { enum ErrorCode {
FailedToAllocateTriggerRecord = 1 FailedToAllocateTriggerRecord = 1
...@@ -168,6 +169,7 @@ private: ...@@ -168,6 +169,7 @@ private:
Uint32 backupPtr; Uint32 backupPtr;
Uint32 signalNo; Uint32 signalNo;
Uint32 errorCode; Uint32 errorCode;
Uint32 nodeId;
}; };
class StartBackupConf { class StartBackupConf {
...@@ -232,9 +234,8 @@ public: ...@@ -232,9 +234,8 @@ public:
private: private:
Uint32 backupId; Uint32 backupId;
Uint32 backupPtr; Uint32 backupPtr;
Uint32 tableId;
Uint32 fragmentNo;
Uint32 errorCode; Uint32 errorCode;
Uint32 nodeId;
}; };
class BackupFragmentConf { class BackupFragmentConf {
...@@ -296,12 +297,13 @@ class StopBackupRef { ...@@ -296,12 +297,13 @@ class StopBackupRef {
friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16); friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public: public:
STATIC_CONST( SignalLength = 3 ); STATIC_CONST( SignalLength = 4 );
private: private:
Uint32 backupId; Uint32 backupId;
Uint32 backupPtr; Uint32 backupPtr;
Uint32 errorCode; Uint32 errorCode;
Uint32 nodeId;
}; };
class StopBackupConf { class StopBackupConf {
......
...@@ -240,6 +240,9 @@ public: ...@@ -240,6 +240,9 @@ public:
FileOrScanError = 1325, // slave -> coordinator FileOrScanError = 1325, // slave -> coordinator
BackupFailureDueToNodeFail = 1326, // slave -> slave BackupFailureDueToNodeFail = 1326, // slave -> slave
OkToClean = 1327 // master -> slave OkToClean = 1327 // master -> slave
,AbortScan = 1328
,IncompatibleVersions = 1329
}; };
private: private:
Uint32 requestType; Uint32 requestType;
......
...@@ -90,10 +90,8 @@ printBACKUP_FRAGMENT_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ ...@@ -90,10 +90,8 @@ printBACKUP_FRAGMENT_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
bool bool
printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){ printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
BackupFragmentRef* sig = (BackupFragmentRef*)data; BackupFragmentRef* sig = (BackupFragmentRef*)data;
fprintf(out, " backupPtr: %d backupId: %d\n", fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n",
sig->backupPtr, sig->backupId); sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode);
fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n",
sig->tableId, sig->fragmentNo, sig->errorCode);
return true; return true;
} }
......
This diff is collapsed.
...@@ -232,6 +232,7 @@ public: ...@@ -232,6 +232,7 @@ public:
*/ */
bool newScan(); bool newScan();
bool scanConf(Uint32 noOfOps, Uint32 opLen); bool scanConf(Uint32 noOfOps, Uint32 opLen);
bool closeScan();
/** /**
* Per record * Per record
...@@ -330,7 +331,7 @@ public: ...@@ -330,7 +331,7 @@ public:
Uint8 fileOpened; Uint8 fileOpened;
Uint8 fileRunning; Uint8 fileRunning;
Uint8 fileDone; Uint8 fileClosing;
Uint8 scanRunning; Uint8 scanRunning;
}; };
typedef Ptr<BackupFile> BackupFilePtr; typedef Ptr<BackupFile> BackupFilePtr;
...@@ -403,13 +404,11 @@ public: ...@@ -403,13 +404,11 @@ public:
ArrayPool<TriggerRecord> & trp) ArrayPool<TriggerRecord> & trp)
: slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1) : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
, tables(tp), triggers(trp), files(bp), pages(pp) , tables(tp), triggers(trp), files(bp), pages(pp)
, masterData(b, validMasterTransitions, validMasterTransitionsCount) , masterData(b), backup(b)
, backup(b) {
{ }
closingFiles = false;
okToCleanMaster = true;
}
Uint32 m_gsn;
CompoundState slaveState; CompoundState slaveState;
Uint32 clientRef; Uint32 clientRef;
...@@ -420,9 +419,6 @@ public: ...@@ -420,9 +419,6 @@ public:
Uint32 errorCode; Uint32 errorCode;
NdbNodeBitmask nodes; NdbNodeBitmask nodes;
bool okToCleanMaster;
bool closingFiles;
Uint64 noOfBytes; Uint64 noOfBytes;
Uint64 noOfRecords; Uint64 noOfRecords;
Uint64 noOfLogBytes; Uint64 noOfLogBytes;
...@@ -444,15 +440,13 @@ public: ...@@ -444,15 +440,13 @@ public:
SimpleProperties props;// Used for (un)packing backup request SimpleProperties props;// Used for (un)packing backup request
struct MasterData { struct MasterData {
MasterData(Backup & b, const State valid[], Uint32 count) MasterData(Backup & b)
: state(b, valid, count, 0) {
{ }
}
MutexHandle2<BACKUP_DEFINE_MUTEX> m_defineBackupMutex; MutexHandle2<BACKUP_DEFINE_MUTEX> m_defineBackupMutex;
MutexHandle2<DICT_COMMIT_TABLE_MUTEX> m_dictCommitTableMutex; MutexHandle2<DICT_COMMIT_TABLE_MUTEX> m_dictCommitTableMutex;
Uint32 gsn; Uint32 gsn;
CompoundState state;
SignalCounter sendCounter; SignalCounter sendCounter;
Uint32 errorCode; Uint32 errorCode;
struct { struct {
...@@ -557,7 +551,8 @@ public: ...@@ -557,7 +551,8 @@ public:
void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId); void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0); void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0);
void backupFragmentRef(Signal * signal, BackupFilePtr filePtr);
void nextFragment(Signal*, BackupRecordPtr); void nextFragment(Signal*, BackupRecordPtr);
void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr); void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr);
...@@ -578,14 +573,14 @@ public: ...@@ -578,14 +573,14 @@ public:
void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode); void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode);
void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr, void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
Uint32 errCode); Uint32 errCode);
void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort); void masterAbort(Signal*, BackupRecordPtr ptr);
void masterSendAbortBackup(Signal*, BackupRecordPtr ptr); void masterSendAbortBackup(Signal*, BackupRecordPtr ptr);
void slaveAbort(Signal*, BackupRecordPtr ptr); void slaveAbort(Signal*, BackupRecordPtr ptr);
void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr); void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr);
void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone); void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone);
bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask); bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask);
bool checkAbort(BackupRecordPtr ptr); bool checkAbort(BackupRecordPtr ptr);
void checkNodeFail(Signal* signal, void checkNodeFail(Signal* signal,
BackupRecordPtr ptr, BackupRecordPtr ptr,
...@@ -603,9 +598,8 @@ public: ...@@ -603,9 +598,8 @@ public:
void sendBackupRef(BlockReference ref, Signal *signal, void sendBackupRef(BlockReference ref, Signal *signal,
Uint32 senderData, Uint32 errorCode); Uint32 senderData, Uint32 errorCode);
void dumpUsedResources(); void dumpUsedResources();
void cleanupMasterResources(BackupRecordPtr ptr); void cleanup(Signal*, BackupRecordPtr ptr);
void cleanupSlaveResources(BackupRecordPtr ptr); void abort_scan(Signal*, BackupRecordPtr ptr);
void cleanupFinalResources(BackupRecordPtr ptr);
void removeBackup(Signal*, BackupRecordPtr ptr); void removeBackup(Signal*, BackupRecordPtr ptr);
void sendSTTORRY(Signal*); void sendSTTORRY(Signal*);
......
...@@ -341,3 +341,28 @@ start backup ...@@ -341,3 +341,28 @@ start backup
(ERROR_INSERTED(10022))) { (ERROR_INSERTED(10022))) {
if (ERROR_INSERTED(10029)) { if (ERROR_INSERTED(10029)) {
if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) { if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) {
----- XXX ---
DEFINE_BACKUP_REF ->
ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived
START_BACKUP_REF
ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived
BACKUP_FRAGMENT_REF
ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT
When all nodes has replied BACKUP_FRAGMENT
ABORT_BACKUP_ORD(no reply)
STOP_BACKUP_REF
ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived
NF_COMPLETE_REP
slave dies
master sends OUTSTANDING_REF to self
slave does nothing
master dies
slave elects self as master and sets only itself as participant
...@@ -175,7 +175,7 @@ Backup::Backup(const Configuration & conf) : ...@@ -175,7 +175,7 @@ Backup::Backup(const Configuration & conf) :
addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF); addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF);
addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ);
//addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF); addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF);
addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF); addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF);
addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ); addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ);
......
...@@ -126,6 +126,7 @@ Cmvmi::Cmvmi(const Configuration & conf) : ...@@ -126,6 +126,7 @@ Cmvmi::Cmvmi(const Configuration & conf) :
} }
setNodeInfo(getOwnNodeId()).m_connected = true; setNodeInfo(getOwnNodeId()).m_connected = true;
setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion();
} }
Cmvmi::~Cmvmi() Cmvmi::~Cmvmi()
......
...@@ -1565,9 +1565,9 @@ ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed, ...@@ -1565,9 +1565,9 @@ ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed,
{ // start backup can take some time, set timeout high { // start backup can take some time, set timeout high
Uint64 old_timeout= handle->read_timeout; Uint64 old_timeout= handle->read_timeout;
if (wait_completed == 2) if (wait_completed == 2)
handle->read_timeout= 30*60*1000; // 30 minutes handle->read_timeout= 48*60*60*1000; // 48 hours
else if (wait_completed == 1) else if (wait_completed == 1)
handle->read_timeout= 5*60*1000; // 5 minutes handle->read_timeout= 10*60*1000; // 10 minutes
reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args); reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args);
handle->read_timeout= old_timeout; handle->read_timeout= old_timeout;
} }
......
...@@ -791,7 +791,7 @@ MgmtSrvr::restartNode(int processId, bool nostart, ...@@ -791,7 +791,7 @@ MgmtSrvr::restartNode(int processId, bool nostart,
result = sendSignal(processId, NO_WAIT, signal, true); result = sendSignal(processId, NO_WAIT, signal, true);
} }
if (result == -1) { if (result == -1 && theWaitState != WAIT_NODEFAILURE) {
m_stopRec.inUse = false; m_stopRec.inUse = false;
return SEND_OR_RECEIVE_FAILED; return SEND_OR_RECEIVE_FAILED;
} }
...@@ -1920,6 +1920,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) ...@@ -1920,6 +1920,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal)
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("I'm not master resending to %d", aNodeId); ndbout_c("I'm not master resending to %d", aNodeId);
#endif #endif
theWaitNode= aNodeId;
NdbApiSignal aSignal(_ownReference); NdbApiSignal aSignal(_ownReference);
BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend()); BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend());
aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ, aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ,
...@@ -1947,6 +1948,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal) ...@@ -1947,6 +1948,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal)
event.Event = BackupEvent::BackupAborted; event.Event = BackupEvent::BackupAborted;
event.Aborted.Reason = rep->reason; event.Aborted.Reason = rep->reason;
event.Aborted.BackupId = rep->backupId; event.Aborted.BackupId = rep->backupId;
event.Aborted.ErrorCode = rep->reason;
backupCallback(event); backupCallback(event);
} }
break; break;
...@@ -2076,6 +2078,13 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete) ...@@ -2076,6 +2078,13 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete)
handleStopReply(nodeId, 0); handleStopReply(nodeId, 0);
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
if(theWaitNode == nodeId &&
theWaitState != NO_WAIT && theWaitState != WAIT_STOP)
{
theWaitState = WAIT_NODEFAILURE;
NdbCondition_Signal(theMgmtWaitForResponseCondPtr);
}
} }
eventReport(_ownNodeId, theData); eventReport(_ownNodeId, theData);
...@@ -2427,7 +2436,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) ...@@ -2427,7 +2436,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
int result; int result;
if (waitCompleted == 2) { if (waitCompleted == 2) {
result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED, result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED,
signal, true, 30*60*1000 /*30 secs*/); signal, true, 48*60*60*1000 /* 48 hours */);
} }
else if (waitCompleted == 1) { else if (waitCompleted == 1) {
result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED, result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED,
...@@ -2456,22 +2465,6 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) ...@@ -2456,22 +2465,6 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
return -1; return -1;
break; break;
} }
} else {
switch(m_lastBackupEvent.Event){
case BackupEvent::BackupCompleted:
backupId = m_lastBackupEvent.Completed.BackupId;
break;
case BackupEvent::BackupStarted:
backupId = m_lastBackupEvent.Started.BackupId;
break;
case BackupEvent::BackupFailedToStart:
return m_lastBackupEvent.FailedToStart.ErrorCode;
case BackupEvent::BackupAborted:
return m_lastBackupEvent.Aborted.ErrorCode;
default:
return -1;
break;
}
} }
return 0; return 0;
......
...@@ -611,7 +611,8 @@ private: ...@@ -611,7 +611,8 @@ private:
WAIT_STOP, WAIT_STOP,
WAIT_BACKUP_STARTED, WAIT_BACKUP_STARTED,
WAIT_BACKUP_COMPLETED, WAIT_BACKUP_COMPLETED,
WAIT_VERSION WAIT_VERSION,
WAIT_NODEFAILURE
}; };
/** /**
...@@ -695,6 +696,7 @@ private: ...@@ -695,6 +696,7 @@ private:
NdbApiSignal* theSignalIdleList; NdbApiSignal* theSignalIdleList;
// List of unused signals // List of unused signals
Uint32 theWaitNode;
WaitSignalType theWaitState; WaitSignalType theWaitState;
// State denoting a set of signals we accept to recieve. // State denoting a set of signals we accept to recieve.
......
...@@ -108,6 +108,7 @@ MgmtSrvr::sendRecSignal(Uint16 aNodeId, ...@@ -108,6 +108,7 @@ MgmtSrvr::sendRecSignal(Uint16 aNodeId,
return -1; return -1;
} }
theWaitState = aWaitState; theWaitState = aWaitState;
theWaitNode = aNodeId;
return receiveOptimisedResponse(waitTime); return receiveOptimisedResponse(waitTime);
} }
...@@ -119,11 +120,12 @@ MgmtSrvr::receiveOptimisedResponse(int waitTime) ...@@ -119,11 +120,12 @@ MgmtSrvr::receiveOptimisedResponse(int waitTime)
theFacade->checkForceSend(_blockNumber); theFacade->checkForceSend(_blockNumber);
NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime; NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
while (theWaitState != NO_WAIT && waitTime > 0) { while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE
&& waitTime > 0) {
NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr, NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr,
theFacade->theMutexPtr, theFacade->theMutexPtr,
waitTime); waitTime);
if(theWaitState == NO_WAIT) if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE)
break; break;
waitTime = (maxTime - NdbTick_CurrentMillisecond()); waitTime = (maxTime - NdbTick_CurrentMillisecond());
}//while }//while
......
...@@ -345,7 +345,7 @@ ErrorBundle ErrorCodes[] = { ...@@ -345,7 +345,7 @@ ErrorBundle ErrorCodes[] = {
{ 1325, IE, "File or scan error" }, { 1325, IE, "File or scan error" },
{ 1326, IE, "Backup abortet due to node failure" }, { 1326, IE, "Backup abortet due to node failure" },
{ 1327, IE, "1327" }, { 1327, IE, "1327" },
{ 1340, IE, "Backup undefined error" }, { 1340, IE, "Backup undefined error" },
{ 1342, AE, "Backup failed to allocate buffers (check configuration)" }, { 1342, AE, "Backup failed to allocate buffers (check configuration)" },
{ 1343, AE, "Backup failed to setup fs buffers (check configuration)" }, { 1343, AE, "Backup failed to setup fs buffers (check configuration)" },
...@@ -355,7 +355,8 @@ ErrorBundle ErrorCodes[] = { ...@@ -355,7 +355,8 @@ ErrorBundle ErrorCodes[] = {
{ 1347, AE, "Backup failed to allocate table memory (check configuration)" }, { 1347, AE, "Backup failed to allocate table memory (check configuration)" },
{ 1348, AE, "Backup failed to allocate file record (check configuration)" }, { 1348, AE, "Backup failed to allocate file record (check configuration)" },
{ 1349, AE, "Backup failed to allocate attribute record (check configuration)" }, { 1349, AE, "Backup failed to allocate attribute record (check configuration)" },
{ 1329, AE, "Backup during software upgrade not supported" },
/** /**
* Still uncategorized * Still uncategorized
*/ */
......
...@@ -74,20 +74,20 @@ int runAbort(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -74,20 +74,20 @@ int runAbort(NDBT_Context* ctx, NDBT_Step* step){
if (testMaster) { if (testMaster) {
if (testSlave) { if (testSlave) {
if (backup.NFMasterAsSlave(restarter) == -1){ if (backup.NFMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} else { } else {
if (backup.NFMaster(restarter) == -1){ if (backup.NFMaster(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} }
} else { } else {
if (backup.NFSlave(restarter) == -1){ if (backup.NFSlave(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} }
return NDBT_OK; return NDBT_OK;
} }
...@@ -108,16 +108,16 @@ int runFail(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -108,16 +108,16 @@ int runFail(NDBT_Context* ctx, NDBT_Step* step){
if (testMaster) { if (testMaster) {
if (testSlave) { if (testSlave) {
if (backup.FailMasterAsSlave(restarter) == -1){ if (backup.FailMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} else { } else {
if (backup.FailMaster(restarter) == -1){ if (backup.FailMaster(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} }
} else { } else {
if (backup.FailSlave(restarter) == -1){ if (backup.FailSlave(restarter) != NDBT_OK){
return NDBT_FAILED; return NDBT_FAILED;
} }
} }
......
...@@ -2,6 +2,30 @@ max-time: 3600 ...@@ -2,6 +2,30 @@ max-time: 3600
cmd: atrt-mysql-test-run cmd: atrt-mysql-test-run
args: --force args: --force
max-time: 600
cmd: atrt-testBackup
args: -n NFMaster T1
max-time: 600
cmd: atrt-testBackup
args: -n NFMasterAsSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n NFSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n FailMaster T1
max-time: 600
cmd: atrt-testBackup
args: -n FailMasterAsSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n FailSlave T1
max-time: 600 max-time: 600
cmd: atrt-testBackup cmd: atrt-testBackup
args: -n BackupOne T1 T6 T3 I3 args: -n BackupOne T1 T6 T3 I3
......
...@@ -245,6 +245,10 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){ ...@@ -245,6 +245,10 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){
int int
NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){
{ {
int nNodes = _restarter.getNumDbNodes();
if(nNodes == 1)
return NDBT_OK;
int nodeId = _restarter.getMasterNodeId(); int nodeId = _restarter.getMasterNodeId();
CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
...@@ -255,15 +259,11 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -255,15 +259,11 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
CHECK(_restarter.startNodes(&nodeId, 1) == 0, CHECK(_restarter.startNodes(&nodeId, 1) == 0,
"failed to start node"); "failed to start node");
NdbSleep_SecSleep(10);
} }
CHECK(_restarter.waitClusterStarted() == 0, CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed"); "waitClusterStarted failed");
int nNodes = _restarter.getNumDbNodes();
myRandom48Init(NdbTick_CurrentMillisecond()); myRandom48Init(NdbTick_CurrentMillisecond());
for(int i = 0; i<sz; i++){ for(int i = 0; i<sz; i++){
...@@ -296,6 +296,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -296,6 +296,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
"failed to set error insert"); "failed to set error insert");
g_info << "error inserted" << endl; g_info << "error inserted" << endl;
NdbSleep_SecSleep(1);
g_info << "starting backup" << endl; g_info << "starting backup" << endl;
int r = start(backupId); int r = start(backupId);
...@@ -304,6 +305,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -304,6 +305,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
if (r == 0) { if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error << endl g_err << "Backup should have failed on error_insertion " << error << endl
<< "Master = " << masterNodeId << "Node = " << nodeId << endl; << "Master = " << masterNodeId << "Node = " << nodeId << endl;
return NDBT_FAILED;
} }
CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
...@@ -316,8 +318,6 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -316,8 +318,6 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
return NDBT_FAILED; return NDBT_FAILED;
} }
NdbSleep_SecSleep(1);
g_info << "starting new backup" << endl; g_info << "starting new backup" << endl;
CHECK(start(backupId) == 0, CHECK(start(backupId) == 0,
"failed to start backup"); "failed to start backup");
...@@ -331,8 +331,14 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -331,8 +331,14 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
"waitClusterStarted failed"); "waitClusterStarted failed");
g_info << "node started" << endl; g_info << "node started" << endl;
int val2[] = { 24, 2424 };
CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
"failed to check backup resources RestartOnErrorInsert");
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0, CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert"); "failed to set error insert");
NdbSleep_SecSleep(1);
} }
return NDBT_OK; return NDBT_OK;
...@@ -340,15 +346,8 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, ...@@ -340,15 +346,8 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
int int
FailS_codes[] = { FailS_codes[] = {
10023,
10024,
10025,
10026,
10027, 10027,
10028, 10033
10029,
10030,
10031
}; };
int int
...@@ -359,9 +358,8 @@ FailM_codes[] = { ...@@ -359,9 +358,8 @@ FailM_codes[] = {
10026, 10026,
10027, 10027,
10028, 10028,
10029, 10031,
10030, 10033
10031
}; };
int int
...@@ -426,13 +424,21 @@ NdbBackup::Fail(NdbRestarter& _restarter, int *Fail_codes, const int sz, bool on ...@@ -426,13 +424,21 @@ NdbBackup::Fail(NdbRestarter& _restarter, int *Fail_codes, const int sz, bool on
if (r == 0) { if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error << endl g_err << "Backup should have failed on error_insertion " << error << endl
<< "Master = " << masterNodeId << "Node = " << nodeId << endl; << "Master = " << masterNodeId << "Node = " << nodeId << endl;
return NDBT_FAILED;
} }
CHECK(_restarter.waitClusterStarted() == 0, CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed"); "waitClusterStarted failed");
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0, CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert"); "failed to set error insert");
NdbSleep_SecSleep(5);
int val2[] = { 24, 2424 };
CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
"failed to check backup resources RestartOnErrorInsert");
} }
return NDBT_OK; return NDBT_OK;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment