Commit 7e9ec9a1 authored by tomas@poseidon.mysql.com's avatar tomas@poseidon.mysql.com

Merge tulin@bk-internal.mysql.com:/home/bk/mysql-5.0-ndb

into  poseidon.mysql.com:/home/tomas/mysql-5.0-ndb
parents 491017b1 a7aed14b
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4014 Next DBTUP 4014
Next DBLQH 5043 Next DBLQH 5043
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7178 Next DBDIH 7181
Next DBTC 8039 Next DBTC 8039
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10022 Next BACKUP 10022
...@@ -71,6 +71,8 @@ Delay GCP_SAVEREQ by 10 secs ...@@ -71,6 +71,8 @@ Delay GCP_SAVEREQ by 10 secs
7177: Delay copying of sysfileData in execCOPY_GCIREQ 7177: Delay copying of sysfileData in execCOPY_GCIREQ
7180: Crash master during master-take-over in execMASTER_LCPCONF
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -1366,6 +1366,7 @@ private: ...@@ -1366,6 +1366,7 @@ private:
Uint32 csystemnodes; Uint32 csystemnodes;
Uint32 currentgcp; Uint32 currentgcp;
Uint32 c_newest_restorable_gci; Uint32 c_newest_restorable_gci;
Uint32 c_set_initial_start_flag;
enum GcpMasterTakeOverState { enum GcpMasterTakeOverState {
GMTOS_IDLE = 0, GMTOS_IDLE = 0,
......
...@@ -74,6 +74,7 @@ void Dbdih::initData() ...@@ -74,6 +74,7 @@ void Dbdih::initData()
c_blockCommit = false; c_blockCommit = false;
c_blockCommitNo = 1; c_blockCommitNo = 1;
cntrlblockref = RNIL; cntrlblockref = RNIL;
c_set_initial_start_flag = FALSE;
}//Dbdih::initData() }//Dbdih::initData()
void Dbdih::initRecords() void Dbdih::initRecords()
......
...@@ -666,6 +666,12 @@ done: ...@@ -666,6 +666,12 @@ done:
{ {
jam(); jam();
memcpy(sysfileData, cdata, sizeof(sysfileData)); memcpy(sysfileData, cdata, sizeof(sysfileData));
if (c_set_initial_start_flag)
{
jam();
Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
}
} }
c_copyGCISlave.m_copyReason = reason; c_copyGCISlave.m_copyReason = reason;
...@@ -1259,6 +1265,11 @@ void Dbdih::execNDB_STTOR(Signal* signal) ...@@ -1259,6 +1265,11 @@ void Dbdih::execNDB_STTOR(Signal* signal)
// The permission is given by the master node in the alive set. // The permission is given by the master node in the alive set.
/*-----------------------------------------------------------------------*/ /*-----------------------------------------------------------------------*/
createMutexes(signal, 0); createMutexes(signal, 0);
if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
{
jam();
c_set_initial_start_flag = TRUE; // In sysfile...
}
break; break;
case ZNDB_SPH3: case ZNDB_SPH3:
...@@ -4612,6 +4623,8 @@ void ...@@ -4612,6 +4623,8 @@ void
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
jam(); jam();
Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
c_lcpMasterTakeOverState.minTableId = ~0; c_lcpMasterTakeOverState.minTableId = ~0;
c_lcpMasterTakeOverState.minFragId = ~0; c_lcpMasterTakeOverState.minFragId = ~0;
c_lcpMasterTakeOverState.failedNodeId = nodeId; c_lcpMasterTakeOverState.failedNodeId = nodeId;
...@@ -4630,7 +4643,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ ...@@ -4630,7 +4643,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/** /**
* Node failure during master take over... * Node failure during master take over...
*/ */
g_eventLogger.info("Nodefail during master take over"); g_eventLogger.info("Nodefail during master take over (old: %d)", oldNode);
}
NodeRecordPtr nodePtr;
nodePtr.i = oldNode;
if (oldNode > 0 && oldNode < MAX_NDB_NODES)
{
jam();
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
{
jam();
checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
}
} }
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
...@@ -5647,6 +5673,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal) ...@@ -5647,6 +5673,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal)
jamEntry(); jamEntry();
const BlockReference newMasterBlockref = req->masterRef; const BlockReference newMasterBlockref = req->masterRef;
if (newMasterBlockref != cmasterdihref)
{
jam();
ndbout_c("resending GSN_MASTER_LCPREQ");
sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
signal->getLength(), 50);
return;
}
Uint32 failedNodeId = req->failedNodeId; Uint32 failedNodeId = req->failedNodeId;
/** /**
...@@ -5947,6 +5981,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ...@@ -5947,6 +5981,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
nodePtr.p->lcpStateAtTakeOver = lcpState; nodePtr.p->lcpStateAtTakeOver = lcpState;
CRASH_INSERTION(7180);
#ifdef VM_TRACE #ifdef VM_TRACE
g_eventLogger.info("MASTER_LCPCONF"); g_eventLogger.info("MASTER_LCPCONF");
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
...@@ -10259,6 +10295,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){ ...@@ -10259,6 +10295,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal, sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
LcpCompleteRep::SignalLength, JBB); LcpCompleteRep::SignalLength, JBB);
/**
* Say that an initial node restart does not need to be redone
* once node has been part of first LCP
*/
if (c_set_initial_start_flag &&
c_lcpState.m_participatingLQH.get(getOwnNodeId()))
{
jam();
c_set_initial_start_flag = FALSE;
}
} }
/*-------------------------------------------------------------------------- */ /*-------------------------------------------------------------------------- */
......
...@@ -11672,7 +11672,8 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) ...@@ -11672,7 +11672,8 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
return; return;
} }
if(getNodeState().getNodeRestartInProgress()){ if(getNodeState().getNodeRestartInProgress() && cstartRecReq == ZFALSE)
{
GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
saveRef->dihPtr = dihPtr; saveRef->dihPtr = dihPtr;
saveRef->nodeId = getOwnNodeId(); saveRef->nodeId = getOwnNodeId();
......
...@@ -61,6 +61,8 @@ public: ...@@ -61,6 +61,8 @@ public:
int dumpStateAllNodes(int * _args, int _num_args); int dumpStateAllNodes(int * _args, int _num_args);
int getMasterNodeId(); int getMasterNodeId();
int getNextMasterNodeId(int nodeId);
int getNodeGroup(int nodeId);
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber); int getRandomNotMasterNodeId(int randomNumber);
......
...@@ -8,6 +8,15 @@ ...@@ -8,6 +8,15 @@
static const char* _dbname = "TEST_DB"; static const char* _dbname = "TEST_DB";
static int g_loops = 7; static int g_loops = 7;
NDB_STD_OPTS_VARS;
static struct my_option my_long_options[] =
{
NDB_STD_OPTS("ndb_desc"),
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
static void usage() static void usage()
{ {
ndb_std_print_version(); ndb_std_print_version();
...@@ -36,9 +45,10 @@ main(int argc, char** argv){ ...@@ -36,9 +45,10 @@ main(int argc, char** argv){
load_defaults("my",load_default_groups,&argc,&argv); load_defaults("my",load_default_groups,&argc,&argv);
int ho_error; int ho_error;
argc--; if ((ho_error=handle_options(&argc, &argv, my_long_options,
argv++; ndb_std_get_one_option)))
return NDBT_ProgramExit(NDBT_WRONGARGS);
Ndb_cluster_connection con(opt_connect_str); Ndb_cluster_connection con(opt_connect_str);
if(con.connect(12, 5, 1)) if(con.connect(12, 5, 1))
{ {
......
...@@ -1044,6 +1044,85 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -1044,6 +1044,85 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int
runBug26457(NDBT_Context* ctx, NDBT_Step* step)
{
NdbRestarter res;
if (res.getNumDbNodes() < 4)
return NDBT_OK;
int loops = ctx->getNumLoops();
while (loops --)
{
retry:
int master = res.getMasterNodeId();
int next = res.getNextMasterNodeId(master);
ndbout_c("master: %d next: %d", master, next);
if (res.getNodeGroup(master) == res.getNodeGroup(next))
{
res.restartOneDbNode(next, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
goto retry;
}
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
if (res.dumpStateOneNode(next, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(next, 7180))
return NDBT_FAILED;
res.restartOneDbNode(master, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug26481(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
int node = res.getRandomNotMasterNodeId(rand());
ndbout_c("node: %d", node);
if (res.restartOneDbNode(node, true, true, true))
return NDBT_FAILED;
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (res.dumpStateOneNode(node, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(node, 7018))
return NDBT_FAILED;
if (res.startNodes(&node, 1))
return NDBT_FAILED;
res.waitNodesStartPhase(&node, 1, 3);
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
res.startNodes(&node, 1);
if (res.waitClusterStarted())
return NDBT_FAILED;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -1366,6 +1445,12 @@ TESTCASE("Bug25364", ""){ ...@@ -1366,6 +1445,12 @@ TESTCASE("Bug25364", ""){
TESTCASE("Bug25554", ""){ TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554); INITIALIZER(runBug25554);
} }
TESTCASE("Bug26457", ""){
INITIALIZER(runBug26457);
}
TESTCASE("Bug26481", ""){
INITIALIZER(runBug26481);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -477,6 +477,14 @@ max-time: 1000 ...@@ -477,6 +477,14 @@ max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug25554 T1 args: -n Bug25554 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug26457 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug26481 T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
...@@ -127,6 +127,68 @@ NdbRestarter::getMasterNodeId(){ ...@@ -127,6 +127,68 @@ NdbRestarter::getMasterNodeId(){
return node; return node;
} }
int
NdbRestarter::getNodeGroup(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
for(size_t i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
return ndbNodes[i].node_group;
}
}
return -1;
}
int
NdbRestarter::getNextMasterNodeId(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
size_t i;
for(i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
break;
}
}
assert(i < ndbNodes.size());
if (i == ndbNodes.size())
return -1;
int dynid = ndbNodes[i].dynamic_id;
int minid = dynid;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > minid)
minid = ndbNodes[i].dynamic_id;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > dynid &&
ndbNodes[i].dynamic_id < minid)
{
minid = ndbNodes[i].dynamic_id;
}
if (minid != ~0)
{
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id == minid)
return ndbNodes[i].node_id;
}
return getMasterNodeId();
}
int int
NdbRestarter::getRandomNotMasterNodeId(int rand){ NdbRestarter::getRandomNotMasterNodeId(int rand){
int master = getMasterNodeId(); int master = getMasterNodeId();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment