ndb - bug#27283 (wl2325-5.0)

    Handle race condtition between MASTER_GCPCONF and execGCP_NODEFINISH
parent 78980793
......@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014
Next DBLQH 5043
Next DBDICT 6007
Next DBDIH 7181
Next DBDIH 7183
Next DBTC 8039
Next CMVMI 9000
Next BACKUP 10022
......
......@@ -4811,6 +4811,15 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal)
} else {
ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
}//if
if (ERROR_INSERTED(7181))
{
ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
CLEAR_ERROR_INSERT_VALUE;
signal->theData[1] = coldgcp;
execGCP_TCFINISHED(signal);
}
MasterGCPConf::State gcpState;
switch (cgcpParticipantState) {
case GCP_PARTICIPANT_READY:
......@@ -4877,6 +4886,14 @@ void Dbdih::execMASTER_GCPREQ(Signal* signal)
masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
MasterGCPConf::SignalLength, JBB);
if (ERROR_INSERTED(7182))
{
ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
CLEAR_ERROR_INSERT_VALUE;
signal->theData[1] = coldgcp;
execGCP_TCFINISHED(signal);
}
}//Dbdih::execMASTER_GCPREQ()
void Dbdih::execMASTER_GCPCONF(Signal* signal)
......@@ -7542,10 +7559,10 @@ void Dbdih::execGCP_NODEFINISH(Signal* signal)
} else if (cmasterState == MASTER_TAKE_OVER_GCP) {
jam();
//-------------------------------------------------------------
// We are currently taking over as master. We will delay the
// signal until we have completed the take over gcp handling.
// We are currently taking over as master. Ignore
// signal in this case since we will discover it in reception of
// MASTER_GCPCONF.
//-------------------------------------------------------------
sendSignalWithDelay(reference(), GSN_GCP_NODEFINISH, signal, 20, 3);
return;
} else {
ndbrequire(cmasterState == MASTER_ACTIVE);
......@@ -7692,6 +7709,15 @@ void Dbdih::execGCP_TCFINISHED(Signal* signal)
Uint32 gci = signal->theData[1];
ndbrequire(gci == coldgcp);
if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182))
{
ndbout_c("killing %d", refToNode(cmasterdihref));
signal->theData[0] = 9999;
sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
GSN_NDB_TAMPER, signal, 1, JBB);
return;
}
cgcpParticipantState = GCP_PARTICIPANT_TC_FINISHED;
signal->theData[0] = cownNodeId;
signal->theData[1] = coldgcp;
......
......@@ -1178,6 +1178,48 @@ runBug27003(NDBT_Context* ctx, NDBT_Step* step)
}
int
runBug27283(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
if (res.getNumDbNodes() < 2)
{
return NDBT_OK;
}
static const int errnos[] = { 7181, 7182, 0 };
Uint32 pos = 0;
for (Uint32 i = 0; i<loops; i++)
{
while (errnos[pos] != 0)
{
int master = res.getMasterNodeId();
int next = res.getNextMasterNodeId(master);
int next2 = res.getNextMasterNodeId(next);
int node = (i & 1) ? next : next2;
ndbout_c("Tesing err: %d", errnos[pos]);
if (res.insertErrorInNode(next, errnos[pos]))
return NDBT_FAILED;
NdbSleep_SecSleep(3);
if (res.waitClusterStarted())
return NDBT_FAILED;
pos++;
}
pos = 0;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
......@@ -1508,6 +1550,9 @@ TESTCASE("Bug26481", ""){
TESTCASE("Bug27003", ""){
INITIALIZER(runBug27003);
}
TESTCASE("Bug27283", ""){
INITIALIZER(runBug27283);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -429,6 +429,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug27003 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug27283 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug15587 T1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment