ndb - bug#18781 (5.0) handle rolling upgrade, minor fixes, logging, docs

parent cdc421dc
......@@ -55,7 +55,9 @@ public:
enum ErrorCode {
NotMaster = 1,
InvalidLockType = 2,
TooManyRequests = 3
BadUserRef = 3,
TooLate = 4,
TooManyRequests = 5
};
private:
Uint32 userPtr;
......
......@@ -60,5 +60,7 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
#define NDBD_INCL_NODECONF_VERSION_4 MAKE_VERSION(4,1,17)
#define NDBD_INCL_NODECONF_VERSION_5 MAKE_VERSION(5,0,18)
#define NDBD_DICT_LOCK_VERSION_5 MAKE_VERSION(5,0,23)
#endif
......@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4013
Next DBLQH 5043
Next DBDICT 6007
Next DBDIH 7175
Next DBDIH 7177
Next DBTC 8037
Next CMVMI 9000
Next BACKUP 10022
......@@ -312,7 +312,9 @@ Test Crashes in handling node restarts
7170: Crash when receiving START_PERMREF (InitialStartRequired)
7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
7174: Crash starting node before sending DICT_LOCK_REQ
7175: Master sends one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
7176: Slave NR pretends master does not support DICT lock (rolling upgrade)
DICT:
6000 Crash during NR when receiving DICTSTARTREQ
......
......@@ -205,7 +205,7 @@ void Dbdict::execCONTINUEB(Signal* signal)
case ZDICT_LOCK_POLL:
jam();
checkDictLockQueue(signal);
checkDictLockQueue(signal, true);
break;
default :
......@@ -2836,7 +2836,6 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
case BS_NODE_RESTART:
jam();
ok = true;
removeStaleDictLocks(signal, theFailedNodes);
break;
}
ndbrequire(ok);
......@@ -2860,6 +2859,15 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
}//if
}//for
/*
* NODE_FAILREP guarantees that no "in flight" signal from
* a dead node is accepted, and also that the job buffer contains
* no such (un-executed) signals. Therefore no DICT_UNLOCK_ORD
* from a dead node (leading to master crash) is possible after
* this clean-up removes the lock record.
*/
removeStaleDictLocks(signal, theFailedNodes);
}//execNODE_FAILREP()
......@@ -12210,7 +12218,7 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask)
const Dbdict::DictLockType*
Dbdict::getDictLockType(Uint32 lockType)
{
static DictLockType lt[] = {
static const DictLockType lt[] = {
{ DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" }
};
for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) {
......@@ -12220,12 +12228,40 @@ Dbdict::getDictLockType(Uint32 lockType)
return NULL;
}
void
Dbdict::sendDictLockInfoEvent(Uint32 pollCount)
{
DictLockPtr loopPtr;
c_dictLockQueue.first(loopPtr);
unsigned count = 0;
char queue_buf[100];
char *p = &queue_buf[0];
const char *const q = &queue_buf[sizeof(queue_buf)];
*p = 0;
while (loopPtr.i != RNIL) {
jam();
my_snprintf(p, q-p, "%s%u%s",
++count == 1 ? "" : " ",
(unsigned)refToNode(loopPtr.p->req.userRef),
loopPtr.p->locked ? "L" : "");
p += strlen(p);
c_dictLockQueue.next(loopPtr);
}
infoEvent("DICT: lock bs: %d ops: %d poll: %d cnt: %d queue: %s",
(int)c_blockState,
c_opRecordPool.getSize() - c_opRecordPool.getNoOfFree(),
c_dictLockPoll, (int)pollCount, queue_buf);
}
void
Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text)
{
infoEvent("DICT: %s %u for %s",
text,
(unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
(unsigned)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
}
void
......@@ -12234,6 +12270,8 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
jamEntry();
const DictLockReq* req = (const DictLockReq*)&signal->theData[0];
// make sure bad request crashes slave, not master (us)
if (getOwnNodeId() != c_masterNodeId) {
jam();
sendDictLockRef(signal, *req, DictLockRef::NotMaster);
......@@ -12247,6 +12285,19 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
return;
}
if (req->userRef != signal->getSendersBlockRef() ||
getNodeInfo(refToNode(req->userRef)).m_type != NodeInfo::DB) {
jam();
sendDictLockRef(signal, *req, DictLockRef::BadUserRef);
return;
}
if (c_aliveNodes.get(refToNode(req->userRef))) {
jam();
sendDictLockRef(signal, *req, DictLockRef::TooLate);
return;
}
DictLockPtr lockPtr;
if (! c_dictLockQueue.seize(lockPtr)) {
jam();
......@@ -12258,21 +12309,23 @@ Dbdict::execDICT_LOCK_REQ(Signal* signal)
lockPtr.p->locked = false;
lockPtr.p->lt = lt;
checkDictLockQueue(signal);
checkDictLockQueue(signal, false);
if (! lockPtr.p->locked)
sendDictLockInfoEvent(lockPtr, "lock request by node");
}
void
Dbdict::checkDictLockQueue(Signal* signal)
Dbdict::checkDictLockQueue(Signal* signal, bool poll)
{
Uint32 pollCount = ! poll ? 0 : signal->theData[1];
DictLockPtr lockPtr;
do {
if (! c_dictLockQueue.first(lockPtr)) {
jam();
setDictLockPoll(signal, false);
setDictLockPoll(signal, false, pollCount);
return;
}
......@@ -12299,7 +12352,7 @@ Dbdict::checkDictLockQueue(Signal* signal)
// this routine is called again when it is removed for any reason
bool on = ! lockPtr.p->locked;
setDictLockPoll(signal, on);
setDictLockPoll(signal, on, pollCount);
}
void
......@@ -12326,7 +12379,7 @@ Dbdict::execDICT_UNLOCK_ORD(Signal* signal)
c_dictLockQueue.release(lockPtr);
checkDictLockQueue(signal);
checkDictLockQueue(signal, false);
}
void
......@@ -12359,21 +12412,32 @@ Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode)
// control polling
void
Dbdict::setDictLockPoll(Signal* signal, bool on)
Dbdict::setDictLockPoll(Signal* signal, bool on, Uint32 pollCount)
{
if (on) {
jam();
signal->theData[0] = ZDICT_LOCK_POLL;
sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
signal->theData[1] = pollCount + 1;
sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
}
if (c_dictLockPoll != on) {
bool change = (c_dictLockPoll != on);
if (change) {
jam();
#ifdef VM_TRACE
infoEvent("DICT: lock polling %s", on ? "On" : "Off");
#endif
c_dictLockPoll = on;
}
// avoid too many messages if master is stuck busy (BS_NODE_FAILURE)
bool periodic =
pollCount < 8 ||
pollCount < 64 && pollCount % 8 == 0 ||
pollCount < 512 && pollCount % 64 == 0 ||
pollCount < 4096 && pollCount % 512 == 0 ||
pollCount % 4096 == 0; // about every 6 minutes
if (change || periodic)
sendDictLockInfoEvent(pollCount);
}
// NF handling
......@@ -12384,6 +12448,11 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
DictLockPtr loopPtr;
c_dictLockQueue.first(loopPtr);
if (getOwnNodeId() != c_masterNodeId) {
ndbrequire(loopPtr.i == RNIL);
return;
}
while (loopPtr.i != RNIL) {
jam();
DictLockPtr lockPtr = loopPtr;
......@@ -12409,7 +12478,7 @@ Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
}
}
checkDictLockQueue(signal);
checkDictLockQueue(signal, false);
}
......
......@@ -1804,14 +1804,15 @@ private:
bool c_dictLockPoll;
static const DictLockType* getDictLockType(Uint32 lockType);
void sendDictLockInfoEvent(Uint32 pollCount);
void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text);
void checkDictLockQueue(Signal* signal);
void checkDictLockQueue(Signal* signal, bool poll);
void sendDictLockConf(Signal* signal, DictLockPtr lockPtr);
void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode);
// control polling i.e. continueB loop
void setDictLockPoll(Signal* signal, bool on);
void setDictLockPoll(Signal* signal, bool on, Uint32 pollCount);
// NF handling
void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes);
......
Lock master DICT against schema operations
Implementation
--------------
[ see comments in Dbdict.hpp ]
Use case: Node startup INR / NR
-------------------------------
Master DICT (like any block) keeps list of alive nodes (c_aliveNodes).
These are participants in schema ops.
(1) c_aliveNodes is initialized when DICT starts
in sp3 in READ_NODESCONF from CNTR
(2) when slave node fails (in any sp of the slave node)
it is removed from c_aliveNodes in NODE_FAILREP
(3) when slave starts, it is added to c_aliveNodes
in sp4 of the starting node in INCL_NODEREQ
Slave DIH locks master DICT in sp2 and releases the lock when started.
Based on the constraints:
- the lock is taken when master DICT is known
DIH reads this in sp2 in READ_NODESCONF
- the lock is taken before (3)
- the lock is taken before copying starts and held until it is done
in sp4 DIH meta, DICT meta, tuple data
- on INR in sp2 in START_PERMREQ the LCP info of the slave is erased
in all DIH in invalidateNodeLCP() - not safe under schema ops
Signals:
All but DICT_LOCK are standard v5.0 signals.
s=starting node, m=master, a=all participants, l=local block.
* sp2 - DICT_LOCK and START_PERM
DIH/s
DICT_LOCK_REQ
DICT/m
DICT_LOCK_CONF
DIH/s
START_PERMREQ
DIH/m
START_INFOREQ
DIH/a
invalidateNodeLCP() if INR
DIH/a
START_INFOCONF
DIH/m
START_PERMCONF
DIH/s
* sp4 - START_ME (copy metadata, no changes)
DIH/s
START_MEREQ
DIH/m
COPY_TABREQ
DIH/s
COPY_TABCONF
DIH/m
DICTSTARTREQ
DICT/s
GET_SCHEMA_INFOREQ
DICT/m
SCHEMA_INFO
DICT/s
DICTSTARTCONF
DIH/m
INCL_NODEREQ
DIH/a
INCL_NODEREQ
ANY/l
INCL_NODECONF
DIH/a
INCL_NODECONF
DIH/m
START_MECONF
DIH/s
* sp7 - release DICT lock
DIH/s
DICT_UNLOCK_ORD
DICT/m
# vim: set et sw=4:
......@@ -1594,6 +1594,9 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal)
*/
ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
// check that we are not yet taking part in schema ops
CRASH_INSERTION(7174);
Uint32 lockType = DictLockReq::NodeRestartLock;
Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
sendDictLockReq(signal, lockType, c);
......@@ -1746,7 +1749,7 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
ndbrequire(refToNode(retRef) == nodeId);
if ((c_nodeStartMaster.activeState) ||
(c_nodeStartMaster.wait != ZFALSE) ||
ERROR_INSERTED_CLEAR(7174)) {
ERROR_INSERTED_CLEAR(7175)) {
jam();
signal->theData[0] = nodeId;
signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
......@@ -14709,6 +14712,34 @@ Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
lockPtr.p->locked = false;
lockPtr.p->callback = c;
// handle rolling upgrade
{
Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
unsigned int get_major = getMajor(masterVersion);
unsigned int get_minor = getMinor(masterVersion);
unsigned int get_build = getBuild(masterVersion);
ndbrequire(get_major == 4 || get_major == 5);
if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
ERROR_INSERTED(7176)) {
jam();
infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u",
(unsigned int)cmasterNodeId, get_major, get_minor, get_build);
DictLockConf* conf = (DictLockConf*)&signal->theData[0];
conf->userPtr = lockPtr.i;
conf->lockType = lockType;
conf->lockPtr = ZNIL;
sendSignal(reference(), GSN_DICT_LOCK_CONF, signal,
DictLockConf::SignalLength, JBB);
return;
}
}
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
DictLockReq::SignalLength, JBB);
......@@ -14758,6 +14789,19 @@ Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
c_dictLockSlavePool.release(lockPtr);
// handle rolling upgrade
{
Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
unsigned int get_major = getMajor(masterVersion);
ndbrequire(get_major == 4 || get_major == 5);
if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
ERROR_INSERTED(7176)) {
return;
}
}
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal,
DictUnlockOrd::SignalLength, JBB);
......
......@@ -1590,17 +1590,18 @@ recv_dict_ops_run(NDBT_Context* ctx)
int
runRestarts(NDBT_Context* ctx, NDBT_Step* step)
{
static int err_master[] = { // non-crashing
0,
7174 // send one fake START_PERMREF
static int errlst_master[] = { // non-crashing
7175, // send one fake START_PERMREF
0
};
static int err_node[] = {
0,
7121, // crash on START_PERMCONF
7130 // crash on START_MECONF
static int errlst_node[] = {
7174, // crash before sending DICT_LOCK_REQ
7176, // pretend master does not support DICT lock
7121, // crash at receive START_PERMCONF
0
};
const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]);
const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]);
const uint errcnt_master = sizeof(errlst_master)/sizeof(errlst_master[0]);
const uint errcnt_node = sizeof(errlst_node)/sizeof(errlst_node[0]);
myRandom48Init(NdbTick_CurrentMillisecond());
NdbRestarter restarter;
......@@ -1632,7 +1633,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
nodeIdList[nodeIdCnt++] = nodeId;
}
if (numnodes >= 4) {
if (numnodes >= 4 && myRandom48(2) == 0) {
int rand = myRandom48(numnodes);
int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand);
CHECK(nodeId != -1);
......@@ -1642,6 +1643,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl;
const uint timeout = 60; //secs for node wait
const unsigned maxsleep = 2000; //ms
bool NF_ops = ctx->getProperty("Restart_NF_ops");
......@@ -1655,9 +1657,8 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
NdbSleep_MilliSleep(myRandom48(maxsleep));
{
int i = 0;
while (i < nodeIdCnt) {
int nodeId = nodeIdList[i++];
for (int i = 0; i < nodeIdCnt; i++) {
int nodeId = nodeIdList[i];
bool nostart = true;
bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2);
......@@ -1676,9 +1677,31 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
}
g_info << "1: wait for nostart" << endl;
CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0);
CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt, timeout) == 0);
NdbSleep_MilliSleep(myRandom48(maxsleep));
int err_master = 0;
int err_node[2] = { 0, 0 };
if (NR_error) {
err_master = errlst_master[l % errcnt_master];
// limitation: cannot have 2 node restarts and crash_insert
// one node may die for real (NF during startup)
for (int i = 0; i < nodeIdCnt && nodeIdCnt == 1; i++) {
err_node[i] = errlst_node[l % errcnt_node];
// 7176 - no DICT lock protection
if (err_node[i] == 7176) {
g_info << "1: no dict ops due to error insert "
<< err_node[i] << endl;
NR_ops = false;
}
}
}
g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl;
if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2))
break;
......@@ -1689,23 +1712,17 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
if (NR_error) {
{
int rand = myRandom48(err_master_cnt);
int err = err_master[rand];
int err = err_master;
if (err != 0) {
g_info << "1: insert master error " << err << endl;
CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0);
}
}
// limitation: cannot have 2 node restarts and crash_insert
// one node may die for real (NF during startup)
for (int i = 0; i < nodeIdCnt; i++) {
int nodeId = nodeIdList[i];
int i = 0;
while (i < nodeIdCnt && nodeIdCnt == 1) {
int nodeId = nodeIdList[i++];
int rand = myRandom48(err_node_cnt);
int err = err_node[rand];
int err = err_node[i];
if (err != 0) {
g_info << "1: insert node " << nodeId << " error " << err << endl;
CHECK(restarter.insertErrorInNode(nodeId, err) == 0);
......@@ -1715,7 +1732,7 @@ runRestarts(NDBT_Context* ctx, NDBT_Step* step)
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: wait cluster started" << endl;
CHECK(restarter.waitClusterStarted() == 0);
CHECK(restarter.waitClusterStarted(timeout) == 0);
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: restart done" << endl;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment