ndb - bug#27283 (wl2325-5.0)

    Handle race condtition between MASTER_GCPCONF and execGCP_NODEFINISH
parent 78980793
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014 Next DBTUP 4014
Next DBLQH 5043 Next DBLQH 5043
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7181 Next DBDIH 7183
Next DBTC 8039 Next DBTC 8039
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10022 Next BACKUP 10022
......
...@@ -4811,6 +4811,15 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal) ...@@ -4811,6 +4811,15 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal)
} else { } else {
ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING); ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
}//if }//if
if (ERROR_INSERTED(7181))
{
ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
CLEAR_ERROR_INSERT_VALUE;
signal->theData[1] = coldgcp;
execGCP_TCFINISHED(signal);
}
MasterGCPConf::State gcpState; MasterGCPConf::State gcpState;
switch (cgcpParticipantState) { switch (cgcpParticipantState) {
case GCP_PARTICIPANT_READY: case GCP_PARTICIPANT_READY:
...@@ -4877,6 +4886,14 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal) ...@@ -4877,6 +4886,14 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal)
masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i]; masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal, sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
MasterGCPConf::SignalLength, JBB); MasterGCPConf::SignalLength, JBB);
if (ERROR_INSERTED(7182))
{
ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
CLEAR_ERROR_INSERT_VALUE;
signal->theData[1] = coldgcp;
execGCP_TCFINISHED(signal);
}
}//Dbdih::execMASTER_GCPREQ() }//Dbdih::execMASTER_GCPREQ()
void Dbdih::execMASTER_GCPCONF(Signal* signal) void Dbdih::execMASTER_GCPCONF(Signal* signal)
...@@ -7542,10 +7559,10 @@ void Dbdih::execGCP_NODEFINISH(Signal* signal) ...@@ -7542,10 +7559,10 @@ void Dbdih::execGCP_NODEFINISH(Signal* signal)
} else if (cmasterState == MASTER_TAKE_OVER_GCP) { } else if (cmasterState == MASTER_TAKE_OVER_GCP) {
jam(); jam();
//------------------------------------------------------------- //-------------------------------------------------------------
// We are currently taking over as master. We will delay the // We are currently taking over as master. Ignore
// signal until we have completed the take over gcp handling. // signal in this case since we will discover it in reception of
// MASTER_GCPCONF.
//------------------------------------------------------------- //-------------------------------------------------------------
sendSignalWithDelay(reference(), GSN_GCP_NODEFINISH, signal, 20, 3);
return; return;
} else { } else {
ndbrequire(cmasterState == MASTER_ACTIVE); ndbrequire(cmasterState == MASTER_ACTIVE);
...@@ -7692,6 +7709,15 @@ void Dbdih::execGCP_TCFINISHED(Signal* signal) ...@@ -7692,6 +7709,15 @@ void Dbdih::execGCP_TCFINISHED(Signal* signal)
Uint32 gci = signal->theData[1]; Uint32 gci = signal->theData[1];
ndbrequire(gci == coldgcp); ndbrequire(gci == coldgcp);
if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182))
{
ndbout_c("killing %d", refToNode(cmasterdihref));
signal->theData[0] = 9999;
sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
GSN_NDB_TAMPER, signal, 1, JBB);
return;
}
cgcpParticipantState = GCP_PARTICIPANT_TC_FINISHED; cgcpParticipantState = GCP_PARTICIPANT_TC_FINISHED;
signal->theData[0] = cownNodeId; signal->theData[0] = cownNodeId;
signal->theData[1] = coldgcp; signal->theData[1] = coldgcp;
......
...@@ -1178,6 +1178,48 @@ runBug27003(NDBT_Context* ctx, NDBT_Step* step) ...@@ -1178,6 +1178,48 @@ runBug27003(NDBT_Context* ctx, NDBT_Step* step)
} }
int
runBug27283(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
if (res.getNumDbNodes() < 2)
{
return NDBT_OK;
}
static const int errnos[] = { 7181, 7182, 0 };
Uint32 pos = 0;
for (Uint32 i = 0; i<loops; i++)
{
while (errnos[pos] != 0)
{
int master = res.getMasterNodeId();
int next = res.getNextMasterNodeId(master);
int next2 = res.getNextMasterNodeId(next);
int node = (i & 1) ? next : next2;
ndbout_c("Tesing err: %d", errnos[pos]);
if (res.insertErrorInNode(next, errnos[pos]))
return NDBT_FAILED;
NdbSleep_SecSleep(3);
if (res.waitClusterStarted())
return NDBT_FAILED;
pos++;
}
pos = 0;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\ "Test that one node at a time can be stopped and then restarted "\
...@@ -1508,6 +1550,9 @@ TESTCASE("Bug26481", ""){ ...@@ -1508,6 +1550,9 @@ TESTCASE("Bug26481", ""){
TESTCASE("Bug27003", ""){ TESTCASE("Bug27003", ""){
INITIALIZER(runBug27003); INITIALIZER(runBug27003);
} }
TESTCASE("Bug27283", ""){
INITIALIZER(runBug27283);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -429,6 +429,10 @@ max-time: 1000 ...@@ -429,6 +429,10 @@ max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug27003 T1 args: -n Bug27003 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug27283 T1
max-time: 500 max-time: 500
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug15587 T1 args: -n Bug15587 T1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment