Commit e5269eed authored by unknown's avatar unknown

bug#15587 - ndb

  Fix error in NF during NR


ndb/include/kernel/signaldata/DumpStateOrd.hpp:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/ERROR_codes.txt:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Run updateNodeInfo if failed node is not in list of storedReplicas
ndb/src/kernel/blocks/dblqh/Dblqh.hpp:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  Add dump for ERROR 5002 with specified table
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug#15587
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for bug#15587
parent 8354e066
......@@ -78,6 +78,8 @@ public:
LqhDumpAllScanRec = 2301,
LqhDumpAllActiveScanRec = 2302,
LqhDumpLcpState = 2303,
LqhErrorInsert5042 = 2315,
AccDumpOneScanRec = 2400,
AccDumpAllScanRec = 2401,
AccDumpAllActiveScanRec = 2402,
......
......@@ -155,6 +155,9 @@ Insert node failure handling when receiving COMPLETEREQ.
5006:
Insert node failure handling when receiving ABORTREQ.
5042:
As 5002, but with specified table (see DumpStateOrd)
These error code can be combined with error codes for testing time-out
handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure
......
......@@ -5187,15 +5187,16 @@ void Dbdih::removeNodeFromTable(Signal* signal,
/**
* For each of replica record
*/
Uint32 replicaNo = 0;
bool found = false;
ReplicaRecordPtr replicaPtr;
for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) {
replicaPtr.i = replicaPtr.p->nextReplica) {
jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
if(replicaPtr.p->procNode == nodeId){
jam();
found = true;
noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr);
if(replicaPtr.p->lcpOngoingFlag){
......@@ -5211,6 +5212,15 @@ void Dbdih::removeNodeFromTable(Signal* signal,
}
}
}
if (!found)
{
jam();
/**
* Run updateNodeInfo to remove any dead nodes from list of activeNodes
* see bug#15587
*/
updateNodeInfo(fragPtr);
}
noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
}
......
......@@ -2881,6 +2881,7 @@ private:
UintR ctransidHash[1024];
Uint32 c_diskless;
Uint32 c_error_insert_table_id;
public:
/**
......
......@@ -3532,6 +3532,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal)
jam();
regTcPtr->activeCreat = ZTRUE;
CRASH_INSERTION(5002);
CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
} else {
regTcPtr->activeCreat = ZFALSE;
}//if
......@@ -18402,8 +18403,12 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
return;
}
if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
{
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
}
}//Dblqh::execDUMP_STATE_ORD()
void Dblqh::execSET_VAR_REQ(Signal* signal)
......
......@@ -21,6 +21,7 @@
#include <NdbRestarter.hpp>
#include <NdbRestarts.hpp>
#include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
......@@ -409,6 +410,43 @@ int runLateCommit(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
Uint32 tableId = ctx->getTab()->getTableId();
int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
dump[1] = tableId;
int nodeId = restarter.getDbNodeId(1);
ndbout << "Restart node " << nodeId << endl;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(nodeId, dump, 2))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
ctx->stopTest();
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\
......@@ -671,6 +709,13 @@ TESTCASE("LateCommit",
STEP(runLateCommit);
FINALIZER(runClearTable);
}
TESTCASE("Bug15587",
"Test bug with NF during NR"){
INITIALIZER(runLoadTable);
STEP(runScanUpdateUntilStopped);
STEP(runBug15587);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -434,6 +434,10 @@ max-time: 500
cmd: testScan
args: -l 100 -n Scan-bug8262 T7
max-time: 500
cmd: testNodeRestart
args: -n BugBug15587 T1
# OLD FLEX
max-time: 500
cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment