Commit 0d178959 authored by joreland@mysql.com's avatar joreland@mysql.com

bug#10358 - ndb

  Cluster failure with non started nodes can result in timedout transactions
parent 86c7c4d1
......@@ -857,7 +857,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("initialstart", initial);
args.put("nostart", nostart);
const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart all", &args);
handle->read_timeout= timeout;
CHECK_REPLY(reply, -1);
BaseString result;
......@@ -890,7 +893,10 @@ ndb_mgm_restart2(NdbMgmHandle handle, int no_of_nodes, const int * node_list,
args.put("nostart", nostart);
const Properties *reply;
const int timeout = handle->read_timeout;
handle->read_timeout= 5*60*1000; // 5 minutes
reply = ndb_mgm_call(handle, restart_reply, "restart node", &args);
handle->read_timeout= timeout;
if(reply != NULL) {
BaseString result;
reply->get("result", result);
......
......@@ -66,6 +66,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
{
ndbSetOwnVersion();
clusterMgrThreadMutex = NdbMutex_Create();
noOfAliveNodes= 0;
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
}
......@@ -335,9 +336,9 @@ ClusterMgr::execAPI_REGCONF(const Uint32 * theData){
node.m_state = apiRegConf->nodeState;
if (node.compatible && (node.m_state.startLevel == NodeState::SL_STARTED ||
node.m_state.startLevel == NodeState::SL_SINGLEUSER)){
node.m_alive = true;
set_node_alive(node, true);
} else {
node.m_alive = false;
set_node_alive(node, false);
}//if
node.hbSent = 0;
node.hbCounter = 0;
......@@ -360,7 +361,7 @@ ClusterMgr::execAPI_REGREF(const Uint32 * theData){
assert(node.defined == true);
node.compatible = false;
node.m_alive = false;
set_node_alive(node, false);
node.m_state = NodeState::SL_NOTHING;
node.m_info.m_version = ref->version;
......@@ -437,7 +438,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
Node & theNode = theNodes[nodeId];
theNode.m_alive = false;
set_node_alive(theNode, false);
if(theNode.connected)
theFacade.doDisconnect(nodeId);
......@@ -449,8 +450,8 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
}
theNode.nfCompleteRep = false;
if(noOfConnectedNodes == 0){
if(noOfAliveNodes == 0){
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
......
......@@ -80,6 +80,7 @@ public:
Uint32 getNoOfConnectedNodes() const;
private:
Uint32 noOfAliveNodes;
Uint32 noOfConnectedNodes;
Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread;
......@@ -100,6 +101,19 @@ private:
void execAPI_REGREF (const Uint32 * theData);
void execNODE_FAILREP (const Uint32 * theData);
void execNF_COMPLETEREP(const Uint32 * theData);
inline void set_node_alive(Node& node, bool alive){
if(node.m_alive && !alive)
{
assert(noOfAliveNodes);
noOfAliveNodes--;
}
else if(!node.m_alive && alive)
{
noOfAliveNodes++;
}
node.m_alive = alive;
}
};
inline
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment