Commit 452a2fb1 authored by jonas@perch.ndb.mysql.com's avatar jonas@perch.ndb.mysql.com

Merge perch.ndb.mysql.com:/home/jonas/src/51-work

into  perch.ndb.mysql.com:/home/jonas/src/mysql-5.1-new-ndb
parents 26b94adc 878ce564
...@@ -30,12 +30,17 @@ class CmRegReq { ...@@ -30,12 +30,17 @@ class CmRegReq {
friend class Qmgr; friend class Qmgr;
public: public:
STATIC_CONST( SignalLength = 3 ); STATIC_CONST( SignalLength = 5 + NdbNodeBitmask::Size );
private: private:
Uint32 blockRef; Uint32 blockRef;
Uint32 nodeId; Uint32 nodeId;
Uint32 version; // See ndb_version.h Uint32 version; // See ndb_version.h
Uint32 start_type; // As specified by cmd-line or mgm, NodeState::StartType
Uint32 latest_gci; // 0 means no fs
Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
// to be part of restart
}; };
/** /**
...@@ -59,7 +64,6 @@ private: ...@@ -59,7 +64,6 @@ private:
* The dynamic id that the node reciving this signal has * The dynamic id that the node reciving this signal has
*/ */
Uint32 dynamicId; Uint32 dynamicId;
Uint32 allNdbNodes[NdbNodeBitmask::Size]; Uint32 allNdbNodes[NdbNodeBitmask::Size];
}; };
...@@ -73,7 +77,7 @@ class CmRegRef { ...@@ -73,7 +77,7 @@ class CmRegRef {
friend class Qmgr; friend class Qmgr;
public: public:
STATIC_CONST( SignalLength = 4 ); STATIC_CONST( SignalLength = 7 + NdbNodeBitmask::Size );
enum ErrorCode { enum ErrorCode {
ZBUSY = 0, /* Only the president can send this */ ZBUSY = 0, /* Only the president can send this */
...@@ -85,14 +89,27 @@ public: ...@@ -85,14 +89,27 @@ public:
* as president. */ * as president. */
ZNOT_PRESIDENT = 5, /* We are not president */ ZNOT_PRESIDENT = 5, /* We are not president */
ZNOT_DEAD = 6, /* We are not dead when we are starting */ ZNOT_DEAD = 6, /* We are not dead when we are starting */
ZINCOMPATIBLE_VERSION = 7 ZINCOMPATIBLE_VERSION = 7,
ZINCOMPATIBLE_START_TYPE = 8
}; };
private: private:
Uint32 blockRef; Uint32 blockRef;
Uint32 nodeId; Uint32 nodeId;
Uint32 errorCode; Uint32 errorCode;
/**
* Applicable if ZELECTION
*/
Uint32 presidentCandidate; Uint32 presidentCandidate;
Uint32 candidate_latest_gci; // 0 means non
/**
* Data for sending node sending node
*/
Uint32 latest_gci;
Uint32 start_type;
Uint32 skip_nodes[NdbNodeBitmask::Size]; // Nodes that does not _need_
// to be part of restart
}; };
class CmAdd { class CmAdd {
......
...@@ -64,6 +64,7 @@ public: ...@@ -64,6 +64,7 @@ public:
// 19 NDBFS Fipple with O_SYNC, O_CREATE etc. // 19 NDBFS Fipple with O_SYNC, O_CREATE etc.
// 20-24 BACKUP // 20-24 BACKUP
NdbcntrTestStopOnError = 25, NdbcntrTestStopOnError = 25,
NdbcntrStopNodes = 70,
// 100-105 TUP and ACC // 100-105 TUP and ACC
// 200-240 UTIL // 200-240 UTIL
// 300-305 TRIX // 300-305 TRIX
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define FAIL_REP_HPP #define FAIL_REP_HPP
#include "SignalData.hpp" #include "SignalData.hpp"
#include <NodeBitmask.hpp>
/** /**
* *
...@@ -27,6 +28,7 @@ class FailRep { ...@@ -27,6 +28,7 @@ class FailRep {
* Sender(s) & Reciver(s) * Sender(s) & Reciver(s)
*/ */
friend class Qmgr; friend class Qmgr;
friend class Ndbcntr;
/** /**
* For printing * For printing
...@@ -35,6 +37,7 @@ class FailRep { ...@@ -35,6 +37,7 @@ class FailRep {
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 2 );
STATIC_CONST( ExtraLength = 1 + NdbNodeBitmask::Size );
enum FailCause { enum FailCause {
ZOWN_FAILURE=0, ZOWN_FAILURE=0,
...@@ -43,13 +46,20 @@ public: ...@@ -43,13 +46,20 @@ public:
ZSTART_IN_REGREQ=3, ZSTART_IN_REGREQ=3,
ZHEARTBEAT_FAILURE=4, ZHEARTBEAT_FAILURE=4,
ZLINK_FAILURE=5, ZLINK_FAILURE=5,
ZOTHERNODE_FAILED_DURING_START=6 ZOTHERNODE_FAILED_DURING_START=6,
ZMULTI_NODE_SHUTDOWN = 7,
ZPARTITIONED_CLUSTER = 8
}; };
private: private:
Uint32 failNodeId; Uint32 failNodeId;
Uint32 failCause; Uint32 failCause;
/**
* Used when failCause == ZPARTITIONED_CLUSTER
*/
Uint32 president;
Uint32 partition[NdbNodeBitmask::Size];
}; };
......
...@@ -32,7 +32,7 @@ class StopReq ...@@ -32,7 +32,7 @@ class StopReq
friend class MgmtSrvr; friend class MgmtSrvr;
public: public:
STATIC_CONST( SignalLength = 9 ); STATIC_CONST( SignalLength = 9 + NdbNodeBitmask::Size);
public: public:
Uint32 senderRef; Uint32 senderRef;
...@@ -49,29 +49,34 @@ public: ...@@ -49,29 +49,34 @@ public:
Int32 readOperationTimeout; // Timeout before read operations are aborted Int32 readOperationTimeout; // Timeout before read operations are aborted
Int32 operationTimeout; // Timeout before all operations are aborted Int32 operationTimeout; // Timeout before all operations are aborted
Uint32 nodes[NdbNodeBitmask::Size];
static void setSystemStop(Uint32 & requestInfo, bool value); static void setSystemStop(Uint32 & requestInfo, bool value);
static void setPerformRestart(Uint32 & requestInfo, bool value); static void setPerformRestart(Uint32 & requestInfo, bool value);
static void setNoStart(Uint32 & requestInfo, bool value); static void setNoStart(Uint32 & requestInfo, bool value);
static void setInitialStart(Uint32 & requestInfo, bool value); static void setInitialStart(Uint32 & requestInfo, bool value);
static void setEscalateOnNodeFail(Uint32 & requestInfo, bool value);
/** /**
* Don't perform "graceful" shutdown/restart... * Don't perform "graceful" shutdown/restart...
*/ */
static void setStopAbort(Uint32 & requestInfo, bool value); static void setStopAbort(Uint32 & requestInfo, bool value);
static void setStopNodes(Uint32 & requestInfo, bool value);
static bool getSystemStop(const Uint32 & requestInfo); static bool getSystemStop(const Uint32 & requestInfo);
static bool getPerformRestart(const Uint32 & requestInfo); static bool getPerformRestart(const Uint32 & requestInfo);
static bool getNoStart(const Uint32 & requestInfo); static bool getNoStart(const Uint32 & requestInfo);
static bool getInitialStart(const Uint32 & requestInfo); static bool getInitialStart(const Uint32 & requestInfo);
static bool getEscalateOnNodeFail(const Uint32 & requestInfo);
static bool getStopAbort(const Uint32 & requestInfo); static bool getStopAbort(const Uint32 & requestInfo);
static bool getStopNodes(const Uint32 & requestInfo);
}; };
struct StopConf struct StopConf
{ {
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 2 );
Uint32 senderData; Uint32 senderData;
union {
Uint32 nodeState; Uint32 nodeState;
Uint32 nodeId;
};
}; };
class StopRef class StopRef
...@@ -87,19 +92,22 @@ class StopRef ...@@ -87,19 +92,22 @@ class StopRef
friend class Ndbcntr; friend class Ndbcntr;
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 3 );
enum ErrorCode { enum ErrorCode {
OK = 0, OK = 0,
NodeShutdownInProgress = 1, NodeShutdownInProgress = 1,
SystemShutdownInProgress = 2, SystemShutdownInProgress = 2,
NodeShutdownWouldCauseSystemCrash = 3, NodeShutdownWouldCauseSystemCrash = 3,
TransactionAbortFailed = 4 TransactionAbortFailed = 4,
UnsupportedNodeShutdown = 5,
MultiNodeShutdownNotMaster = 6
}; };
public: public:
Uint32 senderData; Uint32 senderData;
Uint32 errorCode; Uint32 errorCode;
Uint32 masterNodeId;
}; };
inline inline
...@@ -132,16 +140,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo) ...@@ -132,16 +140,16 @@ StopReq::getInitialStart(const Uint32 & requestInfo)
inline inline
bool bool
StopReq::getEscalateOnNodeFail(const Uint32 & requestInfo) StopReq::getStopAbort(const Uint32 & requestInfo)
{ {
return requestInfo & 16; return requestInfo & 32;
} }
inline inline
bool bool
StopReq::getStopAbort(const Uint32 & requestInfo) StopReq::getStopNodes(const Uint32 & requestInfo)
{ {
return requestInfo & 32; return requestInfo & 64;
} }
...@@ -187,24 +195,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value) ...@@ -187,24 +195,23 @@ StopReq::setInitialStart(Uint32 & requestInfo, bool value)
inline inline
void void
StopReq::setEscalateOnNodeFail(Uint32 & requestInfo, bool value) StopReq::setStopAbort(Uint32 & requestInfo, bool value)
{ {
if(value) if(value)
requestInfo |= 16; requestInfo |= 32;
else else
requestInfo &= ~16; requestInfo &= ~32;
} }
inline inline
void void
StopReq::setStopAbort(Uint32 & requestInfo, bool value) StopReq::setStopNodes(Uint32 & requestInfo, bool value)
{ {
if(value) if(value)
requestInfo |= 32; requestInfo |= 64;
else else
requestInfo &= ~32; requestInfo &= ~64;
} }
#endif #endif
...@@ -46,7 +46,9 @@ public: ...@@ -46,7 +46,9 @@ public:
Complete = 1, ///< Wait for a GCP to complete Complete = 1, ///< Wait for a GCP to complete
CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed CompleteForceStart = 2, ///< Wait for a GCP to complete start one if needed
CompleteIfRunning = 3, ///< Wait for ongoing GCP CompleteIfRunning = 3, ///< Wait for ongoing GCP
CurrentGCI = 8 ///< Immediately return current GCI CurrentGCI = 8, ///< Immediately return current GCI
BlockStartGcp = 9,
UnblockStartGcp = 10
}; };
Uint32 senderRef; Uint32 senderRef;
...@@ -70,11 +72,12 @@ class WaitGCPConf { ...@@ -70,11 +72,12 @@ class WaitGCPConf {
//friend class Grep::PSCoord; //friend class Grep::PSCoord;
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 3 );
public: public:
Uint32 senderData; Uint32 senderData;
Uint32 gcp; Uint32 gcp;
Uint32 blockStatus;
}; };
class WaitGCPRef { class WaitGCPRef {
......
...@@ -169,9 +169,13 @@ extern "C" { ...@@ -169,9 +169,13 @@ extern "C" {
NDB_LE_BackupAborted = 57, NDB_LE_BackupAborted = 57,
/** NDB_MGM_EVENT_CATEGORY_INFO */ /** NDB_MGM_EVENT_CATEGORY_INFO */
NDB_LE_EventBufferStatus = 58 NDB_LE_EventBufferStatus = 58,
/* 59 used */ /* 59 used */
/** NDB_MGM_EVENT_CATEGORY_STARTUP */
NDB_LE_StartReport = 60
/* 60 unused */ /* 60 unused */
/* 61 unused */ /* 61 unused */
/* 62 unused */ /* 62 unused */
...@@ -637,6 +641,13 @@ extern "C" { ...@@ -637,6 +641,13 @@ extern "C" {
unsigned type; unsigned type;
unsigned node_id; unsigned node_id;
} SingleUser; } SingleUser;
/** Log even data @ref NDB_LE_StartReport */
struct {
unsigned report_type;
unsigned remaining_time;
unsigned bitmask_size;
unsigned bitmask_data[1];
} StartReport;
#ifndef DOXYGEN_FIX #ifndef DOXYGEN_FIX
}; };
#else #else
......
...@@ -743,6 +743,90 @@ void getTextSingleUser(QQQQ) { ...@@ -743,6 +743,90 @@ void getTextSingleUser(QQQQ) {
} }
} }
void getTextStartReport(QQQQ) {
Uint32 time = theData[2];
Uint32 sz = theData[3];
char mask1[100];
char mask2[100];
char mask3[100];
char mask4[100];
BitmaskImpl::getText(sz, theData + 4 + (0 * sz), mask1);
BitmaskImpl::getText(sz, theData + 4 + (1 * sz), mask2);
BitmaskImpl::getText(sz, theData + 4 + (2 * sz), mask3);
BitmaskImpl::getText(sz, theData + 4 + (3 * sz), mask4);
switch(theData[1]){
case 1: // Wait initial
BaseString::snprintf
(m_text, m_text_len,
"Initial start, waiting for %s to connect, "
" nodes [ all: %s connected: %s no-wait: %s ]",
mask4, mask1, mask2, mask3);
break;
case 2: // Wait partial
BaseString::snprintf
(m_text, m_text_len,
"Waiting until nodes: %s connects, "
"nodes [ all: %s connected: %s no-wait: %s ]",
mask4, mask1, mask2, mask3);
break;
case 3: // Wait partial timeout
BaseString::snprintf
(m_text, m_text_len,
"Waiting %u sec for nodes %s to connect, "
"nodes [ all: %s connected: %s no-wait: %s ]",
time, mask4, mask1, mask2, mask3);
break;
case 4: // Wait partioned
BaseString::snprintf
(m_text, m_text_len,
"Waiting for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
mask1, mask2, mask4, mask3);
break;
case 5:
BaseString::snprintf
(m_text, m_text_len,
"Waiting %u sec for non partitioned start, "
"nodes [ all: %s connected: %s missing: %s no-wait: %s ]",
time, mask1, mask2, mask4, mask3);
break;
case 0x8000: // Do initial
BaseString::snprintf
(m_text, m_text_len,
"Initial start with nodes %s [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
case 0x8001: // Do start
BaseString::snprintf
(m_text, m_text_len,
"Start with all nodes %s",
mask2);
break;
case 0x8002: // Do partial
BaseString::snprintf
(m_text, m_text_len,
"Start with nodes %s [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
case 0x8003: // Do partioned
BaseString::snprintf
(m_text, m_text_len,
"Start potentially partitioned with nodes %s "
" [ missing: %s no-wait: %s ]",
mask2, mask4, mask3);
break;
default:
BaseString::snprintf
(m_text, m_text_len,
"Unknown startreport: 0x%x [ %s %s %s %s ]",
theData[1],
mask1, mask2, mask3, mask4);
}
}
#if 0 #if 0
BaseString::snprintf(m_text, BaseString::snprintf(m_text,
m_text_len, m_text_len,
...@@ -791,6 +875,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = { ...@@ -791,6 +875,7 @@ const EventLoggerBase::EventRepLogLevelMatrix EventLoggerBase::matrix[] = {
ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartREDOLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ), ROW(StartLog, LogLevel::llStartUp, 10, Logger::LL_INFO ),
ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ), ROW(UNDORecordsExecuted, LogLevel::llStartUp, 15, Logger::LL_INFO ),
ROW(StartReport, LogLevel::llStartUp, 4, Logger::LL_INFO ),
// NODERESTART // NODERESTART
ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ), ROW(NR_CopyDict, LogLevel::llNodeRestart, 8, Logger::LL_INFO ),
......
...@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi() ...@@ -134,6 +134,9 @@ Cmvmi::~Cmvmi()
m_shared_page_pool.clear(); m_shared_page_pool.clear();
} }
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal) void Cmvmi::execNDB_TAMPER(Signal* signal)
{ {
...@@ -441,6 +444,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -441,6 +444,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength(); const Uint32 len = signal->getLength();
if(len == 2){ if(len == 2){
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode); globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO); globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
...@@ -451,11 +459,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) ...@@ -451,11 +459,18 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
signal->theData[1] = tStartingNode; signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//----------------------------------------------------- //-----------------------------------------------------
}
} else { } else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) { for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam(); jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam(); jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i); globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO); globalTransporterRegistry.setIOState(i, HaltIO);
...@@ -1064,7 +1079,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1064,7 +1079,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0]; DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){ Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){ for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = ""; const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){ switch(getNodeInfo(i).m_type){
...@@ -1094,7 +1110,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1094,7 +1110,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){ if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d", infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(), g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree()); g_sectionSegmentPool.getNoOfFree());
...@@ -1131,7 +1147,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1131,7 +1147,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
return; return;
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert) if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{ {
if(signal->getLength() == 1) if(signal->getLength() == 1)
{ {
...@@ -1151,7 +1167,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1151,7 +1167,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
} }
} }
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) { if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i; unsigned i;
Uint32 loopCount = dumpState->args[1]; Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11; const unsigned len0 = 11;
...@@ -1179,6 +1195,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal) ...@@ -1179,6 +1195,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2); sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
} }
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE #ifdef VM_TRACE
#if 0 #if 0
{ {
......
...@@ -58,6 +58,7 @@ void Dbdih::initData() ...@@ -58,6 +58,7 @@ void Dbdih::initData()
cwaitLcpSr = false; cwaitLcpSr = false;
c_blockCommit = false; c_blockCommit = false;
c_blockCommitNo = 1; c_blockCommitNo = 1;
cntrlblockref = RNIL;
}//Dbdih::initData() }//Dbdih::initData()
void Dbdih::initRecords() void Dbdih::initRecords()
......
...@@ -11995,7 +11995,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) ...@@ -11995,7 +11995,7 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Uint32 tmngNode; Uint32 tmngNode;
Uint32 tmngNodeGroup; Uint32 tmngNodeGroup;
Uint32 tmngLimit; Uint32 tmngLimit;
Uint32 i; Uint32 i, j;
/**----------------------------------------------------------------------- /**-----------------------------------------------------------------------
* ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
...@@ -12041,6 +12041,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[]) ...@@ -12041,6 +12041,38 @@ void Dbdih::makeNodeGroups(Uint32 nodeArray[])
Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup); Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups, mngNodeptr.p->nodeGroup);
}//if }//if
}//for }//for
for (i = 0; i<cnoOfNodeGroups; i++)
{
jam();
bool alive = false;
NodeGroupRecordPtr NGPtr;
NGPtr.i = i;
ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
for (j = 0; j<NGPtr.p->nodeCount; j++)
{
jam();
mngNodeptr.i = NGPtr.p->nodesInGroup[j];
ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
{
alive = true;
break;
}
}
if (!alive)
{
char buf[255];
BaseString::snprintf
(buf, sizeof(buf),
"Illegal initial start, no alive node in nodegroup %u", i);
progError(__LINE__,
NDBD_EXIT_SR_RESTARTCONFLICT,
buf);
}
}
}//Dbdih::makeNodeGroups() }//Dbdih::makeNodeGroups()
/** /**
...@@ -12851,7 +12883,6 @@ void Dbdih::sendStartFragreq(Signal* signal, ...@@ -12851,7 +12883,6 @@ void Dbdih::sendStartFragreq(Signal* signal,
void Dbdih::setInitialActiveStatus() void Dbdih::setInitialActiveStatus()
{ {
NodeRecordPtr siaNodeptr; NodeRecordPtr siaNodeptr;
Uint32 tsiaNodeActiveStatus;
Uint32 tsiaNoActiveNodes; Uint32 tsiaNoActiveNodes;
tsiaNoActiveNodes = csystemnodes - cnoHotSpare; tsiaNoActiveNodes = csystemnodes - cnoHotSpare;
...@@ -12859,39 +12890,34 @@ void Dbdih::setInitialActiveStatus() ...@@ -12859,39 +12890,34 @@ void Dbdih::setInitialActiveStatus()
SYSFILE->nodeStatus[i] = 0; SYSFILE->nodeStatus[i] = 0;
for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) { for (siaNodeptr.i = 1; siaNodeptr.i < MAX_NDB_NODES; siaNodeptr.i++) {
ptrAss(siaNodeptr, nodeRecord); ptrAss(siaNodeptr, nodeRecord);
if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE) { switch(siaNodeptr.p->nodeStatus){
case NodeRecord::ALIVE:
case NodeRecord::DEAD:
if (tsiaNoActiveNodes == 0) { if (tsiaNoActiveNodes == 0) {
jam(); jam();
siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare; siaNodeptr.p->activeStatus = Sysfile::NS_HotSpare;
} else { } else {
jam(); jam();
tsiaNoActiveNodes = tsiaNoActiveNodes - 1; tsiaNoActiveNodes = tsiaNoActiveNodes - 1;
siaNodeptr.p->activeStatus = Sysfile::NS_Active; if (siaNodeptr.p->nodeStatus == NodeRecord::ALIVE)
}//if {
} else {
jam();
siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
}//if
switch (siaNodeptr.p->activeStatus) {
case Sysfile::NS_Active:
jam();
tsiaNodeActiveStatus = Sysfile::NS_Active;
break;
case Sysfile::NS_HotSpare:
jam();
tsiaNodeActiveStatus = Sysfile::NS_HotSpare;
break;
case Sysfile::NS_NotDefined:
jam(); jam();
tsiaNodeActiveStatus = Sysfile::NS_NotDefined; siaNodeptr.p->activeStatus = Sysfile::NS_Active;
}
else
{
siaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
}
}
break; break;
default: default:
ndbrequire(false); jam();
return; siaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
break; break;
}//switch }//if
Sysfile::setNodeStatus(siaNodeptr.i, SYSFILE->nodeStatus, Sysfile::setNodeStatus(siaNodeptr.i,
tsiaNodeActiveStatus); SYSFILE->nodeStatus,
siaNodeptr.p->activeStatus);
}//for }//for
}//Dbdih::setInitialActiveStatus() }//Dbdih::setInitialActiveStatus()
...@@ -14613,11 +14639,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) ...@@ -14613,11 +14639,36 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam(); jam();
conf->senderData = senderData; conf->senderData = senderData;
conf->gcp = cnewgcp; conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
return; return;
}//if }//if
if (requestType == WaitGCPReq::BlockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 1;
return;
}
if (requestType == WaitGCPReq::UnblockStartGcp)
{
jam();
conf->senderData = senderData;
conf->gcp = cnewgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB);
cgcpOrderBlocked = 0;
return;
}
if(isMaster()) { if(isMaster()) {
/** /**
* Master * Master
...@@ -14629,6 +14680,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal) ...@@ -14629,6 +14680,7 @@ void Dbdih::execWAIT_GCP_REQ(Signal* signal)
jam(); jam();
conf->senderData = senderData; conf->senderData = senderData;
conf->gcp = coldgcp; conf->gcp = coldgcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal, sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
return; return;
...@@ -14715,6 +14767,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal) ...@@ -14715,6 +14767,7 @@ void Dbdih::execWAIT_GCP_CONF(Signal* signal)
conf->senderData = ptr.p->clientData; conf->senderData = ptr.p->clientData;
conf->gcp = gcp; conf->gcp = gcp;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal, sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
...@@ -14782,6 +14835,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal) ...@@ -14782,6 +14835,7 @@ void Dbdih::emptyWaitGCPMasterQueue(Signal* signal)
c_waitGCPMasterList.next(ptr); c_waitGCPMasterList.next(ptr);
conf->senderData = clientData; conf->senderData = clientData;
conf->blockStatus = cgcpOrderBlocked;
sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal, sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
WaitGCPConf::SignalLength, JBB); WaitGCPConf::SignalLength, JBB);
......
...@@ -204,6 +204,7 @@ private: ...@@ -204,6 +204,7 @@ private:
void execWAIT_GCP_CONF(Signal* signal); void execWAIT_GCP_CONF(Signal* signal);
void execSTOP_REQ(Signal* signal); void execSTOP_REQ(Signal* signal);
void execSTOP_CONF(Signal* signal);
void execRESUME_REQ(Signal* signal); void execRESUME_REQ(Signal* signal);
void execCHANGE_NODE_STATE_CONF(Signal* signal); void execCHANGE_NODE_STATE_CONF(Signal* signal);
...@@ -339,6 +340,16 @@ public: ...@@ -339,6 +340,16 @@ public:
void progError(int line, int cause, const char * extra) { void progError(int line, int cause, const char * extra) {
cntr.progError(line, cause, extra); cntr.progError(line, cause, extra);
} }
enum StopNodesStep {
SR_BLOCK_GCP_START_GCP = 0,
SR_WAIT_COMPLETE_GCP = 1,
SR_UNBLOCK_GCP_START_GCP = 2,
SR_QMGR_STOP_REQ = 3,
SR_WAIT_NODE_FAILURES = 4,
SR_CLUSTER_SHUTDOWN = 12
} m_state;
SignalCounter m_stop_req_counter;
}; };
private: private:
StopRecord c_stopRec; StopRecord c_stopRec;
......
...@@ -88,6 +88,7 @@ Ndbcntr::Ndbcntr(Block_context& ctx): ...@@ -88,6 +88,7 @@ Ndbcntr::Ndbcntr(Block_context& ctx):
addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF); addRecSignal(GSN_STOP_ME_CONF, &Ndbcntr::execSTOP_ME_CONF);
addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ); addRecSignal(GSN_STOP_REQ, &Ndbcntr::execSTOP_REQ);
addRecSignal(GSN_STOP_CONF, &Ndbcntr::execSTOP_CONF);
addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ); addRecSignal(GSN_RESUME_REQ, &Ndbcntr::execRESUME_REQ);
addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF); addRecSignal(GSN_WAIT_GCP_REF, &Ndbcntr::execWAIT_GCP_REF);
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <SafeCounter.hpp> #include <SafeCounter.hpp>
#include <RequestTracker.hpp> #include <RequestTracker.hpp>
#include <signaldata/StopReq.hpp>
#include "timer.hpp" #include "timer.hpp"
...@@ -53,6 +54,7 @@ ...@@ -53,6 +54,7 @@
#define ZAPI_HB_HANDLING 3 #define ZAPI_HB_HANDLING 3
#define ZTIMER_HANDLING 4 #define ZTIMER_HANDLING 4
#define ZARBIT_HANDLING 5 #define ZARBIT_HANDLING 5
#define ZSTART_FAILURE_LIMIT 6
/* Error Codes ------------------------------*/ /* Error Codes ------------------------------*/
#define ZERRTOOMANY 1101 #define ZERRTOOMANY 1101
...@@ -104,18 +106,42 @@ public: ...@@ -104,18 +106,42 @@ public:
}; };
struct StartRecord { struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;} void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey; Uint32 m_startKey;
Uint32 m_startNode; Uint32 m_startNode;
Uint64 m_startTimeout; Uint64 m_startTimeout;
Uint32 m_gsn; Uint32 m_gsn;
SignalCounter m_nodes; SignalCounter m_nodes;
Uint32 m_latest_gci;
Uint32 m_start_type;
NdbNodeBitmask m_skip_nodes;
NdbNodeBitmask m_starting_nodes;
NdbNodeBitmask m_starting_nodes_w_log;
Uint16 m_president_candidate;
Uint32 m_president_candidate_gci;
Uint16 m_regReqReqSent;
Uint16 m_regReqReqRecv;
} c_start; } c_start;
NdbNodeBitmask c_definedNodes; // DB nodes in config NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_readnodes_nodes;
Uint32 c_maxDynamicId; Uint32 c_maxDynamicId;
// Records // Records
...@@ -208,6 +234,7 @@ private: ...@@ -208,6 +234,7 @@ private:
void execPRES_TOCONF(Signal* signal); void execPRES_TOCONF(Signal* signal);
void execDISCONNECT_REP(Signal* signal); void execDISCONNECT_REP(Signal* signal);
void execSYSTEM_ERROR(Signal* signal); void execSYSTEM_ERROR(Signal* signal);
void execSTOP_REQ(Signal* signal);
// Received signals // Received signals
void execDUMP_STATE_ORD(Signal* signal); void execDUMP_STATE_ORD(Signal* signal);
...@@ -222,6 +249,11 @@ private: ...@@ -222,6 +249,11 @@ private:
void execREAD_NODESREQ(Signal* signal); void execREAD_NODESREQ(Signal* signal);
void execSET_VAR_REQ(Signal* signal); void execSET_VAR_REQ(Signal* signal);
void execREAD_NODESREF(Signal* signal);
void execREAD_NODESCONF(Signal* signal);
void execDIH_RESTARTREF(Signal* signal);
void execDIH_RESTARTCONF(Signal* signal);
void execAPI_VERSION_REQ(Signal* signal); void execAPI_VERSION_REQ(Signal* signal);
void execAPI_BROADCAST_REP(Signal* signal); void execAPI_BROADCAST_REP(Signal* signal);
...@@ -244,6 +276,9 @@ private: ...@@ -244,6 +276,9 @@ private:
void execARBIT_STOPREP(Signal* signal); void execARBIT_STOPREP(Signal* signal);
// Statement blocks // Statement blocks
void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
Uint32 check_startup(Signal* signal);
void node_failed(Signal* signal, Uint16 aFailedNode); void node_failed(Signal* signal, Uint16 aFailedNode);
void checkStartInterface(Signal* signal); void checkStartInterface(Signal* signal);
void failReport(Signal* signal, void failReport(Signal* signal,
...@@ -261,8 +296,9 @@ private: ...@@ -261,8 +296,9 @@ private:
// Generated statement blocks // Generated statement blocks
void startphase1(Signal* signal); void startphase1(Signal* signal);
void electionWon(); void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal); void cmInfoconf010Lab(Signal* signal);
void apiHbHandlingLab(Signal* signal); void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal); void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal); void hbReceivedLab(Signal* signal);
...@@ -364,12 +400,12 @@ private: ...@@ -364,12 +400,12 @@ private:
/* Status flags ----------------------------------*/ /* Status flags ----------------------------------*/
Uint32 c_restartPartialTimeout; Uint32 c_restartPartialTimeout;
Uint32 c_restartPartionedTimeout;
Uint32 c_restartFailureTimeout;
Uint64 c_start_election_time;
Uint16 creadyDistCom; Uint16 creadyDistCom;
Uint16 c_regReqReqSent;
Uint16 c_regReqReqRecv;
Uint64 c_stopElectionTime;
Uint16 cpresidentCandidate;
Uint16 cdelayRegreq; Uint16 cdelayRegreq;
Uint16 cpresidentAlive; Uint16 cpresidentAlive;
Uint16 cnoFailedNodes; Uint16 cnoFailedNodes;
...@@ -406,6 +442,9 @@ private: ...@@ -406,6 +442,9 @@ private:
}; };
struct OpAllocNodeIdReq opAllocNodeIdReq; struct OpAllocNodeIdReq opAllocNodeIdReq;
StopReq c_stopReq;
bool check_multi_node_shutdown(Signal* signal);
}; };
#endif #endif
...@@ -35,9 +35,8 @@ void Qmgr::initData() ...@@ -35,9 +35,8 @@ void Qmgr::initData()
Uint32 hbDBAPI = 500; Uint32 hbDBAPI = 500;
setHbApiDelay(hbDBAPI); setHbApiDelay(hbDBAPI);
c_connectedNodes.clear();
c_connectedNodes.set(getOwnNodeId()); c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
}//Qmgr::initData() }//Qmgr::initData()
void Qmgr::initRecords() void Qmgr::initRecords()
...@@ -52,6 +51,7 @@ Qmgr::Qmgr(Block_context& ctx) ...@@ -52,6 +51,7 @@ Qmgr::Qmgr(Block_context& ctx)
// Transit signals // Transit signals
addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD); addRecSignal(GSN_DUMP_STATE_ORD, &Qmgr::execDUMP_STATE_ORD);
addRecSignal(GSN_STOP_REQ, &Qmgr::execSTOP_REQ);
addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG); addRecSignal(GSN_DEBUG_SIG, &Qmgr::execDEBUG_SIG);
addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB); addRecSignal(GSN_CONTINUEB, &Qmgr::execCONTINUEB);
addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT); addRecSignal(GSN_CM_HEARTBEAT, &Qmgr::execCM_HEARTBEAT);
...@@ -101,6 +101,12 @@ Qmgr::Qmgr(Block_context& ctx) ...@@ -101,6 +101,12 @@ Qmgr::Qmgr(Block_context& ctx)
addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF); addRecSignal(GSN_ARBIT_CHOOSEREF, &Qmgr::execARBIT_CHOOSEREF);
addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP); addRecSignal(GSN_ARBIT_STOPREP, &Qmgr::execARBIT_STOPREP);
addRecSignal(GSN_READ_NODESREF, &Qmgr::execREAD_NODESREF);
addRecSignal(GSN_READ_NODESCONF, &Qmgr::execREAD_NODESCONF);
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
initData(); initData();
}//Qmgr::Qmgr() }//Qmgr::Qmgr()
......
...@@ -55,6 +55,12 @@ enum ndbd_options { ...@@ -55,6 +55,12 @@ enum ndbd_options {
NDB_STD_OPTS_VARS; NDB_STD_OPTS_VARS;
// XXX should be my_bool ??? // XXX should be my_bool ???
static int _daemon, _no_daemon, _foreground, _initial, _no_start; static int _daemon, _no_daemon, _foreground, _initial, _no_start;
static int _initialstart;
static const char* _nowait_nodes;
extern Uint32 g_start_type;
extern NdbNodeBitmask g_nowait_nodes;
/** /**
* Arguments to NDB process * Arguments to NDB process
*/ */
...@@ -82,6 +88,14 @@ static struct my_option my_long_options[] = ...@@ -82,6 +88,14 @@ static struct my_option my_long_options[] =
" (implies --nodaemon)", " (implies --nodaemon)",
(gptr*) &_foreground, (gptr*) &_foreground, 0, (gptr*) &_foreground, (gptr*) &_foreground, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 }, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ "nowait-nodes", NO_ARG,
"Nodes that will not be waited for during start",
(gptr*) &_nowait_nodes, (gptr*) &_nowait_nodes, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
{ "initial-start", NO_ARG,
"Perform initial start",
(gptr*) &_initialstart, (gptr*) &_initialstart, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
}; };
static void short_usage_sub(void) static void short_usage_sub(void)
...@@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv) ...@@ -150,6 +164,37 @@ Configuration::init(int argc, char** argv)
globalData.ownId= 0; globalData.ownId= 0;
if (_nowait_nodes)
{
BaseString str(_nowait_nodes);
Vector<BaseString> arr;
str.split(arr, ",");
for (Uint32 i = 0; i<arr.size(); i++)
{
char *endptr = 0;
long val = strtol(arr[i].c_str(), &endptr, 10);
if (*endptr)
{
ndbout_c("Unable to parse nowait-nodes argument: %s : %s",
arr[i].c_str(), _nowait_nodes);
exit(-1);
}
if (! (val > 0 && val < MAX_NDB_NODES))
{
ndbout_c("Invalid nodeid specified in nowait-nodes: %d : %s",
val, _nowait_nodes);
exit(-1);
}
g_nowait_nodes.set(val);
}
}
if (_initialstart)
{
_initialStart = true;
g_start_type |= (1 << NodeState::ST_INITIAL_START);
}
return true; return true;
} }
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <my_sys.h> #include <my_sys.h>
#include <Vector.hpp> #include <Vector.hpp>
#include <mgmapi.h> #include <mgmapi.h>
#include <util/BaseString.hpp>
class MgmtSrvr; class MgmtSrvr;
...@@ -63,6 +64,9 @@ private: ...@@ -63,6 +64,9 @@ private:
*/ */
void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr); void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr);
void executeCommand(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes);
/** /**
* Parse the block specification part of the LOG* commands, * Parse the block specification part of the LOG* commands,
* things after LOG*: [BLOCK = {ALL|<blockName>+}] * things after LOG*: [BLOCK = {ALL|<blockName>+}]
...@@ -97,10 +101,14 @@ private: ...@@ -97,10 +101,14 @@ private:
public: public:
void executeStop(int processId, const char* parameters, bool all); void executeStop(int processId, const char* parameters, bool all);
void executeStop(Vector<BaseString> &command_list, unsigned command_pos,
int *node_ids, int no_of_nodes);
void executeEnterSingleUser(char* parameters); void executeEnterSingleUser(char* parameters);
void executeExitSingleUser(char* parameters); void executeExitSingleUser(char* parameters);
void executeStart(int processId, const char* parameters, bool all); void executeStart(int processId, const char* parameters, bool all);
void executeRestart(int processId, const char* parameters, bool all); void executeRestart(int processId, const char* parameters, bool all);
void executeRestart(Vector<BaseString> &command_list, unsigned command_pos,
int *node_ids, int no_of_nodes);
void executeLogLevel(int processId, const char* parameters, bool all); void executeLogLevel(int processId, const char* parameters, bool all);
void executeError(int processId, const char* parameters, bool all); void executeError(int processId, const char* parameters, bool all);
void executeLog(int processId, const char* parameters, bool all); void executeLog(int processId, const char* parameters, bool all);
...@@ -583,6 +591,13 @@ CommandInterpreter::execute_impl(const char *_line) ...@@ -583,6 +591,13 @@ CommandInterpreter::execute_impl(const char *_line)
} }
} while (do_continue); } while (do_continue);
// if there is anything in the line proceed // if there is anything in the line proceed
Vector<BaseString> command_list;
{
BaseString tmp(line);
tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
}
char* firstToken = strtok(line, " "); char* firstToken = strtok(line, " ");
char* allAfterFirstToken = strtok(NULL, ""); char* allAfterFirstToken = strtok(NULL, "");
...@@ -656,22 +671,45 @@ CommandInterpreter::execute_impl(const char *_line) ...@@ -656,22 +671,45 @@ CommandInterpreter::execute_impl(const char *_line)
analyseAfterFirstToken(-1, allAfterFirstToken); analyseAfterFirstToken(-1, allAfterFirstToken);
} else { } else {
/** /**
* First token should be a digit, node ID * First tokens should be digits, node ID's
*/ */
int nodeId; int node_ids[MAX_NODES];
unsigned pos;
if (! convert(firstToken, nodeId)) { for (pos= 0; pos < command_list.size(); pos++)
{
int node_id;
if (convert(command_list[pos].c_str(), node_id))
{
if (node_id <= 0) {
ndbout << "Invalid node ID: " << command_list[pos].c_str()
<< "." << endl;
DBUG_RETURN(true);
}
node_ids[pos]= node_id;
continue;
}
break;
}
int no_of_nodes= pos;
if (no_of_nodes == 0)
{
/* No digit found */
invalid_command(_line); invalid_command(_line);
DBUG_RETURN(true); DBUG_RETURN(true);
} }
if (pos == command_list.size())
if (nodeId <= 0) { {
ndbout << "Invalid node ID: " << firstToken << "." << endl; /* No command found */
invalid_command(_line);
DBUG_RETURN(true); DBUG_RETURN(true);
} }
if (no_of_nodes == 1)
analyseAfterFirstToken(nodeId, allAfterFirstToken); {
analyseAfterFirstToken(node_ids[0], allAfterFirstToken);
DBUG_RETURN(true);
}
executeCommand(command_list, pos, node_ids, no_of_nodes);
DBUG_RETURN(true);
} }
DBUG_RETURN(true); DBUG_RETURN(true);
} }
...@@ -741,6 +779,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ...@@ -741,6 +779,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId,
ndbout << endl; ndbout << endl;
} }
void
CommandInterpreter::executeCommand(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{
const char *cmd= command_list[command_pos].c_str();
if (strcasecmp("STOP", cmd) == 0)
{
executeStop(command_list, command_pos+1, node_ids, no_of_nodes);
return;
}
if (strcasecmp("RESTART", cmd) == 0)
{
executeRestart(command_list, command_pos+1, node_ids, no_of_nodes);
return;
}
ndbout_c("Invalid command: '%s' after multi node id list. "
"Expected STOP or RESTART.", cmd);
return;
}
/** /**
* Get next nodeid larger than the give node_id. node_id will be * Get next nodeid larger than the give node_id. node_id will be
* set to the next node_id in the list. node_id should be set * set to the next node_id in the list. node_id should be set
...@@ -1326,23 +1385,59 @@ CommandInterpreter::executeClusterLog(char* parameters) ...@@ -1326,23 +1385,59 @@ CommandInterpreter::executeClusterLog(char* parameters)
//***************************************************************************** //*****************************************************************************
void void
CommandInterpreter::executeStop(int processId, const char *, bool all) CommandInterpreter::executeStop(int processId, const char *parameters,
bool all)
{ {
int result = 0; Vector<BaseString> command_list;
if(all) { if (parameters)
result = ndb_mgm_stop(m_mgmsrv, 0, 0); {
} else { BaseString tmp(parameters);
result = ndb_mgm_stop(m_mgmsrv, 1, &processId); tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
} }
if (result < 0) { if (all)
ndbout << "Shutdown failed." << endl; executeStop(command_list, 0, 0, 0);
else
executeStop(command_list, 0, &processId, 1);
}
void
CommandInterpreter::executeStop(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{
int abort= 0;
for (; command_pos < command_list.size(); command_pos++)
{
const char *item= command_list[command_pos].c_str();
if (strcasecmp(item, "-A") == 0)
{
abort= 1;
continue;
}
ndbout_c("Invalid option: %s. Expecting -A after STOP",
item);
return;
}
int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort);
if (result < 0)
{
ndbout_c("Shutdown failed.");
printError(); printError();
} else }
else
{ {
if(all) if (node_ids == 0)
ndbout << "NDB Cluster has shutdown." << endl; ndbout_c("NDB Cluster has shutdown.");
else else
ndbout << "Node " << processId << " has shutdown." << endl; {
ndbout << "Node";
for (int i= 0; i < no_of_nodes; i++)
ndbout << " " << node_ids[i];
ndbout_c(" has shutdown.");
}
} }
} }
...@@ -1410,45 +1505,72 @@ CommandInterpreter::executeStart(int processId, const char* parameters, ...@@ -1410,45 +1505,72 @@ CommandInterpreter::executeStart(int processId, const char* parameters,
void void
CommandInterpreter::executeRestart(int processId, const char* parameters, CommandInterpreter::executeRestart(int processId, const char* parameters,
bool all) bool all)
{
Vector<BaseString> command_list;
if (parameters)
{
BaseString tmp(parameters);
tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
}
if (all)
executeRestart(command_list, 0, 0, 0);
else
executeRestart(command_list, 0, &processId, 1);
}
void
CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{ {
int result; int result;
int nostart = 0; int nostart= 0;
int initialstart = 0; int initialstart= 0;
int abort = 0; int abort= 0;
if(parameters != 0 && strlen(parameters) != 0){ for (; command_pos < command_list.size(); command_pos++)
char * tmpString = my_strdup(parameters,MYF(MY_WME)); {
My_auto_ptr<char> ap1(tmpString); const char *item= command_list[command_pos].c_str();
char * tmpPtr = 0; if (strcasecmp(item, "-N") == 0)
char * item = strtok_r(tmpString, " ", &tmpPtr); {
while(item != NULL){ nostart= 1;
if(strcasecmp(item, "-N") == 0) continue;
nostart = 1;
if(strcasecmp(item, "-I") == 0)
initialstart = 1;
if(strcasecmp(item, "-A") == 0)
abort = 1;
item = strtok_r(NULL, " ", &tmpPtr);
} }
if (strcasecmp(item, "-I") == 0)
{
initialstart= 1;
continue;
} }
if (strcasecmp(item, "-A") == 0)
if(all) { {
result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort); abort= 1;
} else { continue;
int v[1]; }
v[0] = processId; ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART",
result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort); item);
return;
} }
result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids,
initialstart, nostart, abort);
if (result <= 0) { if (result <= 0) {
ndbout.println("Restart failed.", result); ndbout_c("Restart failed.");
printError(); printError();
} else }
else
{ {
if(all) if (node_ids == 0)
ndbout << "NDB Cluster is being restarted." << endl; ndbout_c("NDB Cluster is being restarted.");
else else
ndbout_c("Node %d is being restarted.", processId); {
ndbout << "Node";
for (int i= 0; i < no_of_nodes; i++)
ndbout << " " << node_ids[i];
ndbout_c(" is being restarted");
}
} }
} }
......
This diff is collapsed.
...@@ -176,6 +176,7 @@ public: ...@@ -176,6 +176,7 @@ public:
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 );
STATIC_CONST( NODE_NOT_API_NODE = 5062 ); STATIC_CONST( NODE_NOT_API_NODE = 5062 );
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
...@@ -252,7 +253,7 @@ public: ...@@ -252,7 +253,7 @@ public:
* @param processId: Id of the DB process to stop * @param processId: Id of the DB process to stop
* @return 0 if succeeded, otherwise: as stated above, plus: * @return 0 if succeeded, otherwise: as stated above, plus:
*/ */
int stopNode(int nodeId, bool abort = false); int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort);
/** /**
* Stop the system * Stop the system
...@@ -286,11 +287,12 @@ public: ...@@ -286,11 +287,12 @@ public:
int start(int processId); int start(int processId);
/** /**
* Restart a node * Restart nodes
* @param processId: Id of the DB process to start * @param processId: Id of the DB process to start
*/ */
int restartNode(int processId, bool nostart, bool initialStart, int restartNodes(const Vector<NodeId> &node_ids,
bool abort = false); int *stopCount, bool nostart,
bool initialStart, bool abort);
/** /**
* Restart the system * Restart the system
...@@ -489,7 +491,7 @@ private: ...@@ -489,7 +491,7 @@ private:
bool nostart, bool nostart,
bool initialStart); bool initialStart);
int sendSTOP_REQ(NodeId nodeId, int sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeBitmask &stoppedNodes, NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId, Uint32 singleUserNodeId,
bool abort, bool abort,
...@@ -649,6 +651,8 @@ private: ...@@ -649,6 +651,8 @@ private:
friend class Ndb_mgmd_event_service; friend class Ndb_mgmd_event_service;
Ndb_mgmd_event_service m_event_listner; Ndb_mgmd_event_service m_event_listner;
NodeId m_master_node;
/** /**
* Handles the thread wich upon a 'Node is started' event will * Handles the thread wich upon a 'Node is started' event will
* set the node's previous loglevel settings. * set the node's previous loglevel settings.
......
...@@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &, ...@@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &,
} }
int restarted = 0; int restarted = 0;
int result = 0; int result= m_mgmsrv.restartNodes(nodes,
&restarted,
for(size_t i = 0; i < nodes.size(); i++)
if((result = m_mgmsrv.restartNode(nodes[i],
nostart != 0, nostart != 0,
initialstart != 0, initialstart != 0,
abort != 0)) == 0) abort != 0);
restarted++;
m_output->println("restart reply"); m_output->println("restart reply");
if(result != 0){ if(result != 0){
...@@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
args.get("node", (const char **)&nodes_str); args.get("node", (const char **)&nodes_str);
if(nodes_str == NULL) if(nodes_str == NULL)
{
m_output->println("stop reply");
m_output->println("result: empty node list");
m_output->println("");
return; return;
}
args.get("abort", &abort); args.get("abort", &abort);
char *p, *last; char *p, *last;
...@@ -1008,29 +1010,10 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -1008,29 +1010,10 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
nodes.push_back(atoi(p)); nodes.push_back(atoi(p));
} }
int stop_self= 0; int stopped= 0;
size_t i; int result= 0;
if (nodes.size())
for(i=0; i < nodes.size(); i++) { result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0);
if (nodes[i] == m_mgmsrv.getOwnNodeId()) {
stop_self= 1;
if (i != nodes.size()-1) {
m_output->println("stop reply");
m_output->println("result: server must be stopped last");
m_output->println("");
return;
}
}
}
int stopped = 0, result = 0;
for(i=0; i < nodes.size(); i++)
if (nodes[i] != m_mgmsrv.getOwnNodeId()) {
if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0)
stopped++;
} else
stopped++;
m_output->println("stop reply"); m_output->println("stop reply");
if(result != 0) if(result != 0)
...@@ -1039,9 +1022,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -1039,9 +1022,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
m_output->println("result: Ok"); m_output->println("result: Ok");
m_output->println("stopped: %d", stopped); m_output->println("stopped: %d", stopped);
m_output->println(""); m_output->println("");
if (stop_self)
g_StopServer= true;
} }
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <NdbRestarts.hpp> #include <NdbRestarts.hpp>
#include <Vector.hpp> #include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp> #include <signaldata/DumpStateOrd.hpp>
#include <Bitmask.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
...@@ -669,6 +669,206 @@ err: ...@@ -669,6 +669,206 @@ err:
return NDBT_FAILED; return NDBT_FAILED;
} }
int
runBug18612(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
int dump[255];
dump[0] = DumpStateOrd::NdbcntrStopNodes;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
Uint32 master = restarter.getMasterNodeId();
if (restarter.dumpStateOneNode(master, dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
if (restarter.startNodes(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.waitNodesStartPhase(partition0, cnt/2, 2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.restartOneDbNode(partition0[i], true, true, true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(partition0, cnt/2))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){
// Assume two replicas
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Uint32 cnt = restarter.getNumDbNodes();
for(int loop = 0; loop < ctx->getNumLoops(); loop++)
{
int partition0[256];
int partition1[256];
bzero(partition0, sizeof(partition0));
bzero(partition1, sizeof(partition1));
Bitmask<4> nodesmask;
Uint32 node1 = restarter.getDbNodeId(rand()%cnt);
for (Uint32 i = 0; i<cnt/2; i++)
{
do {
int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand());
if (tmp == -1)
break;
node1 = tmp;
} while(nodesmask.get(node1));
partition0[i] = node1;
partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand());
ndbout_c("nodes %d %d", node1, partition1[i]);
assert(!nodesmask.get(node1));
assert(!nodesmask.get(partition1[i]));
nodesmask.set(node1);
nodesmask.set(partition1[i]);
}
ndbout_c("done");
if (restarter.restartAll(false, true, false))
return NDBT_FAILED;
int dump[255];
dump[0] = 9000;
memcpy(dump + 1, partition0, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2))
return NDBT_FAILED;
dump[0] = 9000;
memcpy(dump + 1, partition1, sizeof(int)*cnt/2);
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateAllNodes(val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInAllNodes(932))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStartPhase(2))
return NDBT_FAILED;
dump[0] = 9001;
for (Uint32 i = 0; i<cnt/2; i++)
if (restarter.dumpStateAllNodes(dump, 2))
return NDBT_FAILED;
if (restarter.waitClusterNoStart(30))
if (restarter.waitNodesNoStart(partition0, cnt/2, 10))
if (restarter.waitNodesNoStart(partition1, cnt/2, 10))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\ "Test that one node at a time can be stopped and then restarted "\
...@@ -963,6 +1163,18 @@ TESTCASE("Bug18414", ...@@ -963,6 +1163,18 @@ TESTCASE("Bug18414",
STEP(runBug18414); STEP(runBug18414);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
TESTCASE("Bug18612",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612);
FINALIZER(runClearTable);
}
TESTCASE("Bug18612SR",
"Test bug with partitioned clusters"){
INITIALIZER(runLoadTable);
STEP(runBug18612SR);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -453,10 +453,18 @@ args: -n Bug16772 T1 ...@@ -453,10 +453,18 @@ args: -n Bug16772 T1
#cmd: testSystemRestart #cmd: testSystemRestart
#args: -n Bug18385 T1 #args: -n Bug18385 T1
# #
max-time: 500 max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug18414 T1 args: -n Bug18414 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug18612SR T1
# #
# DICT TESTS # DICT TESTS
max-time: 1500 max-time: 1500
......
...@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter, ...@@ -445,8 +445,7 @@ int twoNodeFailure(NdbRestarter& _restarter,
<< ") secs " << endl; << ") secs " << endl;
NdbSleep_SecSleep(seconds); NdbSleep_SecSleep(seconds);
randomId = (rand() % _restarter.getNumDbNodes()); nodeId = _restarter.getRandomNodeOtherNodeGroup(nodeId, rand());
nodeId = _restarter.getDbNodeId(randomId);
g_info << _restart->m_name << ": node = "<< nodeId << endl; g_info << _restart->m_name << ": node = "<< nodeId << endl;
CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0, CHECK(_restarter.insertErrorInNode(nodeId, 9999) == 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment