ndb - bug#26481

  fix for killed node during initial node restart
parent 2cff674c
......@@ -1368,6 +1368,7 @@ private:
Uint32 csystemnodes;
Uint32 currentgcp;
Uint32 c_newest_restorable_gci;
Uint32 c_set_initial_start_flag;
enum GcpMasterTakeOverState {
GMTOS_IDLE = 0,
......
......@@ -75,6 +75,7 @@ void Dbdih::initData()
c_blockCommit = false;
c_blockCommitNo = 1;
cntrlblockref = RNIL;
c_set_initial_start_flag = FALSE;
}//Dbdih::initData()
void Dbdih::initRecords()
......
......@@ -667,6 +667,12 @@ done:
{
jam();
memcpy(sysfileData, cdata, sizeof(sysfileData));
if (c_set_initial_start_flag)
{
jam();
Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
}
}
c_copyGCISlave.m_copyReason = reason;
......@@ -1260,6 +1266,11 @@ void Dbdih::execNDB_STTOR(Signal* signal)
// The permission is given by the master node in the alive set.
/*-----------------------------------------------------------------------*/
createMutexes(signal, 0);
if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
{
jam();
c_set_initial_start_flag = TRUE; // In sysfile...
}
break;
case ZNDB_SPH3:
......@@ -10277,6 +10288,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
LcpCompleteRep::SignalLength, JBB);
/**
* Say that an initial node restart does not need to be redone
* once node has been part of first LCP
*/
if (c_set_initial_start_flag &&
c_lcpState.m_participatingLQH.get(getOwnNodeId()))
{
jam();
c_set_initial_start_flag = FALSE;
}
}
/*-------------------------------------------------------------------------- */
......
......@@ -11673,7 +11673,8 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
return;
}
if(getNodeState().getNodeRestartInProgress()){
if(getNodeState().getNodeRestartInProgress() && cstartRecReq == ZFALSE)
{
GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
saveRef->dihPtr = dihPtr;
saveRef->nodeId = getOwnNodeId();
......
......@@ -1085,6 +1085,46 @@ retry:
return NDBT_OK;
}
int
runBug26481(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
int node = res.getRandomNotMasterNodeId(rand());
ndbout_c("node: %d", node);
if (res.restartOneDbNode(node, true, true, true))
return NDBT_FAILED;
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (res.dumpStateOneNode(node, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(node, 7018))
return NDBT_FAILED;
if (res.startNodes(&node, 1))
return NDBT_FAILED;
res.waitNodesStartPhase(&node, 1, 3);
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
res.startNodes(&node, 1);
if (res.waitClusterStarted())
return NDBT_FAILED;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
......@@ -1409,6 +1449,9 @@ TESTCASE("Bug25554", ""){
TESTCASE("Bug26457", ""){
INITIALIZER(runBug26457);
}
TESTCASE("Bug26481", ""){
INITIALIZER(runBug26481);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment