Commit 49a4c85b authored by unknown's avatar unknown

ndb - bug#20185

  Dont be too aggressive in Dbtc::nodeFailCheckTransaction
  let it timeout by 1, so that it does not assert that it has waited too long
  old impl. set timeotu value to 0, making timeout = (ctcTimer - 0) which could be quite big.


ndb/src/kernel/blocks/ERROR_codes.txt:
  error codes
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  2 new error inserts
  7030 - delay in GCP_PREPARE until checkLocalNodefailComplete is true
  7031 - delay in GCP_PREPARE and die
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Dont set api con timer to 0, as this might trigger asserion in timeoutfound lab if state == PREPARE_TO_COMMIT
ndb/test/ndbapi/testNodeRestart.cpp:
  testcase
ndb/test/run-test/daily-basic-tests.txt:
  autotest
parent 60873234
...@@ -63,6 +63,9 @@ Delay GCP_SAVEREQ by 10 secs ...@@ -63,6 +63,9 @@ Delay GCP_SAVEREQ by 10 secs
7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE 7165: Delay INCL_NODE_REQ in starting node yeilding error in GCP_PREPARE
7030: Delay in GCP_PREPARE until node has completed a node failure
7031: Delay in GCP_PREPARE and die 3s later
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -5417,6 +5417,12 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId, ...@@ -5417,6 +5417,12 @@ Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
return; return;
} }
if (ERROR_INSERTED(7030))
{
ndbout_c("Reenable GCP_PREPARE");
CLEAR_ERROR_INSERT_VALUE;
}
NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0]; NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
nf->blockNo = DBDIH; nf->blockNo = DBDIH;
nf->nodeId = cownNodeId; nf->nodeId = cownNodeId;
...@@ -7459,6 +7465,16 @@ void Dbdih::execGCP_PREPARE(Signal* signal) ...@@ -7459,6 +7465,16 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
{ {
jamEntry(); jamEntry();
CRASH_INSERTION(7005); CRASH_INSERTION(7005);
if (ERROR_INSERTED(7030))
{
cgckptflag = true;
ndbout_c("Delayed GCP_PREPARE 5s");
sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
signal->getLength());
return;
}
Uint32 masterNodeId = signal->theData[0]; Uint32 masterNodeId = signal->theData[0];
Uint32 gci = signal->theData[1]; Uint32 gci = signal->theData[1];
BlockReference retRef = calcDihBlockRef(masterNodeId); BlockReference retRef = calcDihBlockRef(masterNodeId);
...@@ -7471,6 +7487,14 @@ void Dbdih::execGCP_PREPARE(Signal* signal) ...@@ -7471,6 +7487,14 @@ void Dbdih::execGCP_PREPARE(Signal* signal)
cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED; cgcpParticipantState = GCP_PARTICIPANT_PREPARE_RECEIVED;
cnewgcp = gci; cnewgcp = gci;
if (ERROR_INSERTED(7031))
{
ndbout_c("Crashing delayed in GCP_PREPARE 3s");
signal->theData[0] = 9999;
sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
return;
}
signal->theData[0] = cownNodeId; signal->theData[0] = cownNodeId;
signal->theData[1] = gci; signal->theData[1] = gci;
sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA); sendSignal(retRef, GSN_GCP_PREPARECONF, signal, 2, JBA);
......
...@@ -7002,18 +7002,20 @@ Dbtc::nodeFailCheckTransactions(Signal* signal, ...@@ -7002,18 +7002,20 @@ Dbtc::nodeFailCheckTransactions(Signal* signal,
{ {
jam(); jam();
Ptr<ApiConnectRecord> transPtr; Ptr<ApiConnectRecord> transPtr;
Uint32 TtcTimer = ctcTimer;
Uint32 TapplTimeout = c_appl_timeout_value;
for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++) for (transPtr.i = transPtrI; transPtr.i < capiConnectFilesize; transPtr.i++)
{ {
ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord); ptrCheckGuard(transPtr, capiConnectFilesize, apiConnectRecord);
if (transPtr.p->m_transaction_nodes.get(failedNodeId)) if (transPtr.p->m_transaction_nodes.get(failedNodeId))
{ {
jam(); jam();
// Force timeout regardless of state // Force timeout regardless of state
Uint32 save = c_appl_timeout_value;
c_appl_timeout_value = 1; c_appl_timeout_value = 1;
setApiConTimer(transPtr.i, 0, __LINE__); setApiConTimer(transPtr.i, TtcTimer - 2, __LINE__);
timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT); timeOutFoundLab(signal, transPtr.i, ZNODEFAIL_BEFORE_COMMIT);
c_appl_timeout_value = save; c_appl_timeout_value = TapplTimeout;
} }
// Send CONTINUEB to continue later // Send CONTINUEB to continue later
......
...@@ -868,6 +868,56 @@ runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -868,6 +868,56 @@ runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int runBug20185(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
HugoOperations hugoOps(*ctx->getTab());
Ndb* pNdb = GETNDB(step);
int dump[] = { 7090, 20 } ;
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
NdbSleep_MilliSleep(3000);
if(hugoOps.startTransaction(pNdb) != 0)
return NDBT_FAILED;
if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0)
return NDBT_FAILED;
if (hugoOps.execute_NoCommit(pNdb) != 0)
return NDBT_FAILED;
int nodeId;
const int node = hugoOps.getTransaction()->getConnectedNodeId();
do {
nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
} while (nodeId == node);
if (restarter.insertErrorInAllNodes(7030))
return NDBT_FAILED;
if (restarter.insertErrorInNode(nodeId, 7031))
return NDBT_FAILED;
NdbSleep_MilliSleep(500);
if (hugoOps.execute_Commit(pNdb) == 0)
return NDBT_FAILED;
NdbSleep_MilliSleep(3000);
restarter.waitClusterStarted();
if (restarter.dumpStateAllNodes(dump, 1))
return NDBT_FAILED;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -1175,6 +1225,12 @@ TESTCASE("Bug18612SR", ...@@ -1175,6 +1225,12 @@ TESTCASE("Bug18612SR",
STEP(runBug18612SR); STEP(runBug18612SR);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
TESTCASE("Bug20185",
""){
INITIALIZER(runLoadTable);
STEP(runBug20185);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -470,6 +470,10 @@ max-time: 1000 ...@@ -470,6 +470,10 @@ max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug18612SR T1 args: -n Bug18612SR T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug20185 T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment