Commit 5a25026d authored by unknown's avatar unknown

ndb - bug#15695 bug#16447 bug#18612

  For various reasone have a partitioned cluster been created
  This patch makes sure that when they connect
  1) it's detected
  2) shutdown is forced


ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
  New dump/error insert for simulating network failure
ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
parent 2a00c516
......@@ -133,6 +133,9 @@ Cmvmi::~Cmvmi()
{
}
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal)
{
......@@ -390,21 +393,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength();
if(len == 2){
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = EventReport::CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = EventReport::CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
}
} else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO);
......@@ -1010,7 +1025,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){
Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){
......@@ -1043,13 +1059,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){
if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree());
}
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{
if(signal->getLength() == 1)
{
......@@ -1069,7 +1085,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i;
Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11;
......@@ -1097,6 +1113,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
}
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE
#if 0
{
......
......@@ -100,7 +100,12 @@ public:
};
struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;}
void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey;
Uint32 m_startNode;
Uint64 m_startTimeout;
......@@ -112,6 +117,14 @@ public:
NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_cmregreq_nodes;
Uint32 c_maxDynamicId;
// Records
......@@ -251,8 +264,10 @@ private:
// Generated statement blocks
void startphase1(Signal* signal);
void electionWon();
void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal);
bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment