Commit 5b10b7bc authored by unknown's avatar unknown

bug#15632 - ndb

  Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a) by also waiting for starting nodes


ndb/include/ndb_version.h.in:
  Handle upgrade of bug fix
ndb/src/kernel/blocks/ERROR_codes.txt:
  New error code for delaying INCL_NODE_REQ
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Fix race between INCL_NODEREQ(prio b) and GCP_PREPARE(prio a)
    by also waiting for starting nodes
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug#15632
parent fc4c198e
......@@ -57,5 +57,8 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
*/
/*#define NDB_VERSION_ID 0*/
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
#endif
......@@ -61,6 +61,8 @@ Insert system error in GCP participant when receiving GCP_SAVEREQ.
5007:
Delay GCP_SAVEREQ by 10 secs
7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
......
......@@ -215,7 +215,7 @@ void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId)
signal->theData[2] = c_nodeStartMaster.failNr;
signal->theData[3] = 0;
signal->theData[4] = currentgcp;
sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBB);
sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 5, JBA);
}//Dbdih::sendINCL_NODEREQ()
void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId)
......@@ -1857,6 +1857,14 @@ void Dbdih::gcpBlockedLab(Signal* signal)
// global checkpoint id and the correct state. We do not wait for any reply
// since the starting node will not send any.
/*-------------------------------------------------------------------------*/
Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
if ((getMajor(startVersion) == 4 && startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
(getMajor(startVersion) == 5 && startVersion >= NDBD_INCL_NODECONF_VERSION_5))
{
c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
}
sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode);
}//Dbdih::gcpBlockedLab()
......@@ -2059,6 +2067,13 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
jamEntry();
Uint32 retRef = signal->theData[0];
Uint32 nodeId = signal->theData[1];
if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
{
CLEAR_ERROR_INSERT_VALUE;
sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000, signal->getLength());
return;
}
Uint32 tnodeStartFailNr = signal->theData[2];
currentgcp = signal->theData[4];
CRASH_INSERTION(7127);
......@@ -2086,6 +2101,15 @@ void Dbdih::execINCL_NODEREQ(Signal* signal)
// id's and the lcp status.
/*-----------------------------------------------------------------------*/
CRASH_INSERTION(7171);
Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
if ((NDB_VERSION_MAJOR == 4 && masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
(NDB_VERSION_MAJOR == 5 && masterVersion >= NDBD_INCL_NODECONF_VERSION_5))
{
signal->theData[0] = getOwnNodeId();
signal->theData[1] = getOwnNodeId();
sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
}
return;
}//if
if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
......@@ -3737,8 +3761,16 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
/*------------------------------------------------------------------------*/
// Verify that a starting node has also crashed. Reset the node start record.
/*-------------------------------------------------------------------------*/
if (c_nodeStartMaster.startNode != RNIL) {
ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)!= NodeRecord::ALIVE);
if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
{
BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
SystemError * const sysErr = (SystemError*)&signal->theData[0];
sysErr->errorCode = SystemError::StartInProgressError;
sysErr->errorRef = reference();
sysErr->data1= 0;
sysErr->data2= __LINE__;
sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
nodeResetStart();
}//if
/*--------------------------------------------------*/
......
......@@ -446,6 +446,56 @@ int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int runBug15632(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
int nodeId = restarter.getDbNodeId(1);
ndbout << "Restart node " << nodeId << endl;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.insertErrorInNode(nodeId, 7165))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.insertErrorInNode(nodeId, 7171))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
ctx->stopTest();
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
......@@ -596,6 +646,8 @@ TESTCASE("RestartNFDuringNR",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
STEP(runPkUpdateUntilStopped);
STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
......@@ -685,6 +737,8 @@ TESTCASE("RestartNodeDuringLCP",
INITIALIZER(runCheckAllNodesStarted);
INITIALIZER(runLoadTable);
STEP(runRestarts);
STEP(runPkUpdateUntilStopped);
STEP(runScanUpdateUntilStopped);
FINALIZER(runScanReadVerify);
FINALIZER(runClearTable);
}
......@@ -716,6 +770,12 @@ TESTCASE("Bug15587",
STEP(runBug15587);
FINALIZER(runClearTable);
}
TESTCASE("Bug15632",
"Test bug with NF during NR"){
INITIALIZER(runLoadTable);
STEP(runBug15632);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment