ndb - bug#22013

    Fix bug in event handling wrt early node shutdown
parent 12911bb5
......@@ -137,8 +137,11 @@ MgmtSrvr::logLevelThreadRun()
m_started_nodes.erase(0, false);
m_started_nodes.unlock();
setEventReportingLevelImpl(node, req);
if (setEventReportingLevelImpl(node, req))
{
ndbout_c("setEventReportingLevelImpl(%d): failed", node);
}
SetLogLevelOrd ord;
ord = m_nodeLogLevel[node];
setNodeLogLevelImpl(node, ord);
......@@ -155,10 +158,16 @@ MgmtSrvr::logLevelThreadRun()
m_log_level_requests.erase(0, false);
m_log_level_requests.unlock();
if(req.blockRef == 0){
if(req.blockRef == 0)
{
req.blockRef = _ownReference;
setEventReportingLevelImpl(0, req);
} else {
if (setEventReportingLevelImpl(0, req))
{
ndbout_c("setEventReportingLevelImpl: failed 2!");
}
}
else
{
SetLogLevelOrd ord;
ord = req;
setNodeLogLevelImpl(req.blockRef, ord);
......@@ -1376,9 +1385,6 @@ int MgmtSrvr::restartDB(bool nostart, bool initialStart,
NodeId nodeId = 0;
NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
ndbout_c(" %d", nodes.get(1));
ndbout_c(" %d", nodes.get(2));
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
if (!nodes.get(nodeId))
continue;
......@@ -1584,6 +1590,11 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
}
}
if (nodes.isclear())
{
return SEND_OR_RECEIVE_FAILED;
}
int error = 0;
while (!nodes.isclear())
{
......@@ -1600,16 +1611,24 @@ MgmtSrvr::setEventReportingLevelImpl(int nodeId,
error = 1;
break;
}
// Since sending okToSend(true),
// there is no guarantee that NF_COMPLETEREP will come
// i.e listen also to NODE_FAILREP
case GSN_NODE_FAILREP: {
const NodeFailRep * const rep =
CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
NdbNodeBitmask mask;
mask.assign(NdbNodeBitmask::Size, rep->theNodes);
nodes.bitANDC(mask);
break;
}
case GSN_NF_COMPLETEREP:{
const NFCompleteRep * const rep =
CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
nodes.clear(rep->failedNodeId);
break;
}
case GSN_NODE_FAILREP:{
// ignore, NF_COMPLETEREP will arrive later
break;
}
default:
report_unknown_signal(signal);
return SEND_OR_RECEIVE_FAILED;
......@@ -1909,7 +1928,10 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete)
theData[1] = nodeId;
if (alive) {
m_started_nodes.push_back(nodeId);
if (nodeTypes[nodeId] == NODE_TYPE_DB)
{
m_started_nodes.push_back(nodeId);
}
rep->setEventType(NDB_LE_Connected);
} else {
rep->setEventType(NDB_LE_Disconnected);
......
......@@ -507,6 +507,7 @@ ClusterMgr::reportConnected(NodeId nodeId){
theNode.m_info.m_version = 0;
theNode.compatible = true;
theNode.nfCompleteRep = true;
theNode.m_state.startLevel = NodeState::SL_NOTHING;
theFacade.ReportNodeAlive(nodeId);
}
......@@ -518,14 +519,13 @@ ClusterMgr::reportDisconnected(NodeId nodeId){
noOfConnectedNodes--;
theNodes[nodeId].connected = false;
theNodes[nodeId].m_state.m_connected_nodes.clear();
reportNodeFailed(nodeId);
reportNodeFailed(nodeId, true);
}
void
ClusterMgr::reportNodeFailed(NodeId nodeId){
ClusterMgr::reportNodeFailed(NodeId nodeId, bool disconnect){
Node & theNode = theNodes[nodeId];
......@@ -536,10 +536,11 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
{
theFacade.doDisconnect(nodeId);
}
const bool report = (theNode.m_state.startLevel != NodeState::SL_NOTHING);
theNode.m_state.startLevel = NodeState::SL_NOTHING;
if(report)
if(disconnect || report)
{
theFacade.ReportNodeDead(nodeId);
}
......
......@@ -97,8 +97,8 @@ private:
NdbMutex* clusterMgrThreadMutex;
void showState(NodeId nodeId);
void reportNodeFailed(NodeId nodeId);
void reportNodeFailed(NodeId nodeId, bool disconnect = false);
/**
* Signals received
*/
......
......@@ -19,6 +19,14 @@
#include <signaldata/NFCompleteRep.hpp>
#include <signaldata/NodeFailRep.hpp>
static
void
require(bool x)
{
if (!x)
abort();
}
SimpleSignal::SimpleSignal(bool dealloc){
memset(this, 0, sizeof(* this));
deallocSections = dealloc;
......@@ -145,6 +153,7 @@ SignalSender::waitFor(Uint32 timeOutMillis, T & t)
{
SimpleSignal * s = t.check(m_jobBuffer);
if(s != 0){
m_usedBuffer.push_back(s);
return s;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment