Commit 0e410aa1 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/41-work

into  perch.ndb.mysql.com:/home/jonas/src/50-work
parents 4e819b73 2abc5e2f
...@@ -64,6 +64,7 @@ public: ...@@ -64,6 +64,7 @@ public:
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc. // 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
// 20-24 BACKUP // 20-24 BACKUP
NdbcntrTestStopOnError = 25, NdbcntrTestStopOnError = 25,
NdbcntrStopNodes = 70,
// 100-105 TUP and ACC // 100-105 TUP and ACC
// 200-240 UTIL // 200-240 UTIL
// 300-305 TRIX // 300-305 TRIX
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define FAIL_REP_HPP #define FAIL_REP_HPP
#include "SignalData.hpp" #include "SignalData.hpp"
#include <NodeBitmask.hpp>
/** /**
* *
...@@ -27,6 +28,7 @@ class FailRep { ...@@ -27,6 +28,7 @@ class FailRep {
* Sender(s) & Reciver(s) * Sender(s) & Reciver(s)
*/ */
friend class Qmgr; friend class Qmgr;
friend class Ndbcntr;
/** /**
* For printing * For printing
...@@ -35,6 +37,7 @@ class FailRep { ...@@ -35,6 +37,7 @@ class FailRep {
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 2 );
STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size );
enum FailCause { enum FailCause {
ZOWN_FAILURE=0, ZOWN_FAILURE=0,
...@@ -43,13 +46,20 @@ public: ...@@ -43,13 +46,20 @@ public:
ZSTART_IN_REGREQ=3, ZSTART_IN_REGREQ=3,
ZHEARTBEAT_FAILURE=4, ZHEARTBEAT_FAILURE=4,
ZLINK_FAILURE=5, ZLINK_FAILURE=5,
ZOTHERNODE_FAILED_DURING_START=6 ZOTHERNODE_FAILED_DURING_START=6,
ZMULTI_NODE_SHUTDOWN = 7,
ZPARTITIONED_CLUSTER = 8
}; };
private: private:
Uint32 failNodeId; Uint32 failNodeId;
Uint32 failCause; Uint32 failCause;
/**
* Used when failCause == ZPARTITIONED_CLUSTER
*/
Uint32 president;
Uint32 partition[NdbNodeBitmask::Size];
}; };
......
...@@ -32,7 +32,7 @@ class StopReq ...@@ -32,7 +32,7 @@ class StopReq
friend class MgmtSrvr; friend class MgmtSrvr;
public: public:
STATIC_CONST( SignalLength = 9 ); STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size);
public: public:
Uint32 senderRef; Uint32 senderRef;
...@@ -49,29 +49,34 @@ public: ...@@ -49,29 +49,34 @@ public:
Int32 readOperationTimeout; // Timeout before read operations are aborted Int32 readOperationTimeout; // Timeout before read operations are aborted
Int32 operationTimeout; // Timeout before all operations are aborted Int32 operationTimeout; // Timeout before all operations are aborted
Uint32 nodes[NdbNodeBitmask::Size];
static void setSystemStop(Uint32 & requestInfo, bool value); static void setSystemStop(Uint32 & requestInfo, bool value);
static void setPerformRestart(Uint32 & requestInfo, bool value); static void setPerformRestart(Uint32 & requestInfo, bool value);
static void setNoStart(Uint32 & requestInfo, bool value); static void setNoStart(Uint32 & requestInfo, bool value);
static void setInitialStart(Uint32 & requestInfo, bool value); static void setInitialStart(Uint32 & requestInfo, bool value);
static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value);
/** /**
* Don't perform "graceful" shutdown/restart... * Don't perform "graceful" shutdown/restart...
*/ */
static void setStopAbort(Uint32 & requestInfo, bool value); static void setStopAbort(Uint32 & requestInfo, bool value);
static void setStopNodes(Uint32 & requestInfo, bool value);
static bool getSystemStop(const Uint32 & requestInfo); static bool getSystemStop(const Uint32 & requestInfo);
static bool getPerformRestart(const Uint32 & requestInfo); static bool getPerformRestart(const Uint32 & requestInfo);
static bool getNoStart(const Uint32 & requestInfo); static bool getNoStart(const Uint32 & requestInfo);
static bool getInitialStart(const Uint32 & requestInfo); static bool getInitialStart(const Uint32 & requestInfo);
static bool getEscalateOnNodeFail(const Uint32 & requestInfo);
static bool getStopAbort(const Uint32 & requestInfo); static bool getStopAbort(const Uint32 & requestInfo);
static bool getStopNodes(const Uint32 & requestInfo);
}; };
struct StopConf struct StopConf
{ {
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 2 );
Uint32 senderData; Uint32 senderData;
union {
Uint32 nodeState; Uint32 nodeState;
Uint32 nodeId;
};
}; };
class StopRef class StopRef
...@@ -94,7 +99,9 @@ public: ...@@ -94,7 +99,9 @@ public:
NodeShutdownInProgress = 1, NodeShutdownInProgress = 1,
SystemShutdownInProgress = 2, SystemShutdownInProgress = 2,
NodeShutdownWouldCauseSystemCrash = 3, NodeShutdownWouldCauseSystemCrash = 3,
TransactionAbortFailed = 4 TransactionAbortFailed = 4,
UnsupportedNodeShutdown = 5,
MultiNodeShutdownNotMaster = 6
}; };
public: public:
...@@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo) ...@@ -132,16 +139,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
inline inline
bool bool
StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo) StopReq::getStopAbort(const Uint32 & requestInfo)
{ {
return requestInfo & 16; return requestInfo & 32;
} }
inline inline
bool bool
StopReq::getStopAbort(const Uint32 & requestInfo) StopReq::getStopNodes(const Uint32 & requestInfo)
{ {
return requestInfo & 32; return requestInfo & 64;
} }
...@@ -187,24 +194,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value) ...@@ -187,24 +194,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
inline inline
void void
StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value) StopReq::setStopAbort(Uint32 & requestInfo, bool value)
{ {
if(value) if(value)
requestInfo |= 16; requestInfo |= 32;
else else
requestInfo &= ~16; requestInfo &= ~32;
} }
inline inline
void void
StopReq::setStopAbort(Uint32 & requestInfo, bool value) StopReq::setStopNodes(Uint32 & requestInfo, bool value)
{ {
if(value) if(value)
requestInfo |= 32; requestInfo |= 64;
else else
requestInfo &= ~32; requestInfo &= ~64;
} }
#endif #endif
...@@ -46,7 +46,9 @@ public: ...@@ -46,7 +46,9 @@ public:
Complete = 1, ///< Wait for a GCP to complete Complete = 1, ///< Wait for a GCP to complete
CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed
CompleteIfRunning = 3, ///< Wait for ongoing GCP CompleteIfRunning = 3, ///< Wait for ongoing GCP
CurrentGCI = 8 ///< Immediately return current GCI CurrentGCI = 8, ///< Immediately return current GCI
BlockStartGcp = 9,
UnblockStartGcp = 10
}; };
Uint32 senderRef; Uint32 senderRef;
...@@ -70,11 +72,12 @@ class WaitGCPConf { ...@@ -70,11 +72,12 @@ class WaitGCPConf {
//friend class Grep::PSCoord; //friend class Grep::PSCoord;
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 3 );
public: public:
Uint32 senderData; Uint32 senderData;
Uint32 gcp; Uint32 gcp;
Uint32 blockStatus;
}; };
class WaitGCPRef { class WaitGCPRef {
......
...@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi() ...@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
{ {
} }
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal) void Cmvmi::execNDB_TAMPER(Signal* signal)
{ {
...@@ -419,6 +422,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -419,6 +422,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength(); const Uint32 len = signal->getLength();
if(len == 2){ if(len == 2){
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode); globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO); globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
...@@ -429,11 +437,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -429,11 +437,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
signal->theData[1] = tStartingNode; signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//----------------------------------------------------- //-----------------------------------------------------
}
} else { } else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) { for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam(); jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam(); jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i); globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO); globalTransporterRegistry.setIOState(i, HaltIO);
...@@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1039,7 +1054,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){ for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = ""; const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){ switch(getNodeInfo(i).m_type){
...@@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1072,13 +1088,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree()); g_sectionSegmentPool.getNoOfFree());
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{ {
if(signal->getLength() == 1) if(signal->getLength() == 1)
{ {
...@@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1098,7 +1114,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i; unsigned i;
Uint32 loopCount = dumpState->args[1]; Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11; const unsigned len0 = 11;
...@@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1126,6 +1142,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
} }
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE #ifdef VM_TRACE
#if 0 #if 0
{ {
......
...@@ -14273,11 +14273,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) ...@@ -14273,11 +14273,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam(); jam();
conf->senderData = senderData; conf->senderData = senderData;
conf->gcp = cnewgcp; conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
return; return;
}//if }//if
if (requestType == WaitGCPReq::BlockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 1;
return;
}
if (requestType == WaitGCPReq::UnblockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 0;
return;
}
if(isMaster()) { if(isMaster()) {
/** /**
* Master * Master
...@@ -14289,6 +14314,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) ...@@ -14289,6 +14314,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam(); jam();
conf->senderData = senderData; conf->senderData = senderData;
conf->gcp = coldgcp; conf->gcp = coldgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
return; return;
...@@ -14375,6 +14401,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal) ...@@ -14375,6 +14401,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
conf->senderData = ptr.p->clientData; conf->senderData = ptr.p->clientData;
conf->gcp = gcp; conf->gcp = gcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal, sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
...@@ -14442,6 +14469,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal) ...@@ -14442,6 +14469,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
c_waitGCPMasterList.next(ptr); c_waitGCPMasterList.next(ptr);
conf->senderData = clientData; conf->senderData = clientData;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal, sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
......
...@@ -203,6 +203,7 @@ private: ...@@ -203,6 +203,7 @@ private:
void execWAIT_GCP_CONF(Signal* signal); void execWAIT_GCP_CONF(Signal* signal);
void execSTOP_REQ(Signal* signal); void execSTOP_REQ(Signal* signal);
void execSTOP_CONF(Signal* signal);
void execRESUME_REQ(Signal* signal); void execRESUME_REQ(Signal* signal);
void execCHANGE_NODE_STATE_CONF(Signal* signal); void execCHANGE_NODE_STATE_CONF(Signal* signal);
...@@ -338,6 +339,16 @@ public: ...@@ -338,6 +339,16 @@ public:
void progError(int line, int cause, const char * extra) { void progError(int line, int cause, const char * extra) {
cntr.progError(line, cause, extra); cntr.progError(line, cause, extra);
} }
enum StopNodesStep {
SR_BLOCK_GCP_START_GCP = 0,
SR_WAIT_COMPLETE_GCP = 1,
SR_UNBLOCK_GCP_START_GCP = 2,
SR_QMGR_STOP_REQ = 3,
SR_WAIT_NODE_FAILURES = 4,
SR_CLUSTER_SHUTDOWN = 12
} m_state;
SignalCounter m_stop_req_counter;
}; };
private: private:
StopRecord c_stopRec; StopRecord c_stopRec;
......
...@@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf): ...@@ -87,6 +87,7 @@ Ndbcntr::Ndbcntr(const class Configuration & conf):
addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF); addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF);
addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ); addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ);
addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF);
addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ); addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ);
addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF); addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <signaldata/CmRegSignalData.hpp> #include <signaldata/CmRegSignalData.hpp>
#include <signaldata/ApiRegSignalData.hpp> #include <signaldata/ApiRegSignalData.hpp>
#include <signaldata/FailRep.hpp> #include <signaldata/FailRep.hpp>
#include <signaldata/StopReq.hpp>
#include "timer.hpp" #include "timer.hpp"
...@@ -100,7 +101,12 @@ public: ...@@ -100,7 +101,12 @@ public:
}; };
struct StartRecord { struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;} void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey; Uint32 m_startKey;
Uint32 m_startNode; Uint32 m_startNode;
Uint64 m_startTimeout; Uint64 m_startTimeout;
...@@ -112,6 +118,14 @@ public: ...@@ -112,6 +118,14 @@ public:
NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_readnodes_nodes;
Uint32 c_maxDynamicId; Uint32 c_maxDynamicId;
// Records // Records
...@@ -204,6 +218,7 @@ private: ...@@ -204,6 +218,7 @@ private:
void execPRES_TOCONF(Signal* signal); void execPRES_TOCONF(Signal* signal);
void execDISCONNECT_REP(Signal* signal); void execDISCONNECT_REP(Signal* signal);
void execSYSTEM_ERROR(Signal* signal); void execSYSTEM_ERROR(Signal* signal);
void execSTOP_REQ(Signal* signal);
// Received signals // Received signals
void execDUMP_STATE_ORD(Signal* signal); void execDUMP_STATE_ORD(Signal* signal);
...@@ -218,6 +233,8 @@ private: ...@@ -218,6 +233,8 @@ private:
void execREAD_NODESREQ(Signal* signal); void execREAD_NODESREQ(Signal* signal);
void execSET_VAR_REQ(Signal* signal); void execSET_VAR_REQ(Signal* signal);
void execREAD_NODESREF(Signal* signal);
void execREAD_NODESCONF(Signal* signal);
void execAPI_VERSION_REQ(Signal* signal); void execAPI_VERSION_REQ(Signal* signal);
void execAPI_BROADCAST_REP(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal);
...@@ -234,6 +251,8 @@ private: ...@@ -234,6 +251,8 @@ private:
void execARBIT_STOPREP(Signal* signal); void execARBIT_STOPREP(Signal* signal);
// Statement blocks // Statement blocks
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
void node_failed(Signal* signal, Uint16 aFailedNode); void node_failed(Signal* signal, Uint16 aFailedNode);
void checkStartInterface(Signal* signal); void checkStartInterface(Signal* signal);
void failReport(Signal* signal, void failReport(Signal* signal,
...@@ -251,8 +270,9 @@ private: ...@@ -251,8 +270,9 @@ private:
// Generated statement blocks // Generated statement blocks
void startphase1(Signal* signal); void startphase1(Signal* signal);
void electionWon(); void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal); void cmInfoconf010Lab(Signal* signal);
void apiHbHandlingLab(Signal* signal); void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal); void hbReceivedLab(Signal* signal);
...@@ -388,6 +408,8 @@ private: ...@@ -388,6 +408,8 @@ private:
Uint16 cprepFailedNodes[MAX_NDB_NODES]; Uint16 cprepFailedNodes[MAX_NDB_NODES];
Uint16 ccommitFailedNodes[MAX_NDB_NODES]; Uint16 ccommitFailedNodes[MAX_NDB_NODES];
StopReq c_stopReq;
void check_multi_node_shutdown(Signal* signal);
}; };
#endif #endif
...@@ -35,9 +35,8 @@ void Qmgr::initData() ...@@ -35,9 +35,8 @@ void Qmgr::initData()
Uint32 hbDBAPI = 500; Uint32 hbDBAPI = 500;
setHbApiDelay(hbDBAPI); setHbApiDelay(hbDBAPI);
c_connectedNodes.clear();
c_connectedNodes.set(getOwnNodeId()); c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
}//Qmgr::initData() }//Qmgr::initData()
void Qmgr::initRecords() void Qmgr::initRecords()
...@@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf) ...@@ -52,6 +51,7 @@ Qmgr::Qmgr(const class Configuration & conf)
// Transit signals // Transit signals
addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD); addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD);
addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ);
addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG); addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG);
addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB); addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB);
addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT); addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT);
...@@ -96,6 +96,9 @@ Qmgr::Qmgr(const class Configuration & conf) ...@@ -96,6 +96,9 @@ Qmgr::Qmgr(const class Configuration & conf)
addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF); addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF);
addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP); addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP);
addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF);
addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF);
initData(); initData();
}//Qmgr::Qmgr() }//Qmgr::Qmgr()
......
This diff is collapsed.
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <NdbRestarts.hpp> #include <NdbRestarts.hpp>
#include <Vector.hpp> #include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp> #include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
...@@ -669,6 +669,206 @@ err: ...@@ -669,6 +669,206 @@ err:
return NDBT_FAILED; return NDBT_FAILED;
} }
int
runBug18612(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
int dump[255];
dump[0] = DumpStateOrd::NdbcntrStopNodes;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
Uint32 master = restarter.getMasterNodeId();
if (restarter.dumpStateOneNode(master, dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.startNodes(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesStartPhase(partition0, cnt/2, 2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.restartOneDbNode(partition0[i], true, true, true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
if (restarter.restartAll(false, true, false))
return NDBT_FAILED;
int dump[255];
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStartPhase(2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitClusterNoStart(30))
if (restarter.waitNodesNoStart(partition0, cnt/2, 10))
if (restarter.waitNodesNoStart(partition1, cnt/2, 10))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\ "Test that one node at a time can be stopped and then restarted "\
...@@ -963,6 +1163,18 @@ TESTCASE("Bug18414", ...@@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
STEP(runBug18414); STEP(runBug18414);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
TESTCASE("Bug18612",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612);
FINALIZER(runClearTable);
}
TESTCASE("Bug18612SR",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612SR);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -433,10 +433,18 @@ args: -n Bug16772 T1 ...@@ -433,10 +433,18 @@ args: -n Bug16772 T1
#cmd: testSystemRestart #cmd: testSystemRestart
#args: -n Bug18385 T1 #args: -n Bug18385 T1
# #
max-time: 500 max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug18414 T1 args: -n Bug18414 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612SR T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
...@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter, ...@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
<< ") secs " << endl; << ") secs " << endl;
NdbSleep_SecSleep(seconds); NdbSleep_SecSleep(seconds);
randomId = (rand() % _restarter.getNumDbNodes()); nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand());
nodeId = _restarter.getDbNodeId(randomId);
g_info << _restart->m_name << ": node = "<< nodeId << endl; g_info << _restart->m_name << ": node = "<< nodeId << endl;
CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment