Commit 40f44b48 authored by unknown's avatar unknown

ndb - bug#18781 lock DICT during node restart


ndb/src/kernel/main.cpp:
  signal log from start (#if 0-ed)
ndb/test/ndbapi/testDict.cpp:
  test NF/NR + dict ops
ndb/src/kernel/vm/DLFifoList.hpp:
  add hasPrev
ndb/src/kernel/vm/pc.hpp:
  ERROR_INSERTED_CLEAR(x) test and clear if set
ndb/src/common/debugger/SignalLoggerManager.cpp:
  block no fix
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  spelling
ndb/include/kernel/GlobalSignalNumbers.h:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/include/kernel/signaldata/AlterTable.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/include/kernel/signaldata/CreateTable.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/include/kernel/signaldata/DictLock.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/include/kernel/signaldata/DropTable.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/common/debugger/signaldata/SignalNames.cpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/ERROR_codes.txt:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/dbdict/Dbdict.cpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/dbdict/Dbdict.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  locking of master DICT against schema ops, used by slave DIH under NR
ndb/src/ndbapi/ndberror.c:
  locking of master DICT against schema ops, used by slave DIH under NR
parent d9590820
...@@ -507,16 +507,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; ...@@ -507,16 +507,12 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
#define GSN_TEST_ORD 407 #define GSN_TEST_ORD 407
#define GSN_TESTSIG 408 #define GSN_TESTSIG 408
#define GSN_TIME_SIGNAL 409 #define GSN_TIME_SIGNAL 409
/* 410 unused */
/* 411 unused */
/* 412 unused */
#define GSN_TUP_ABORTREQ 414 #define GSN_TUP_ABORTREQ 414
#define GSN_TUP_ADD_ATTCONF 415 #define GSN_TUP_ADD_ATTCONF 415
#define GSN_TUP_ADD_ATTRREF 416 #define GSN_TUP_ADD_ATTRREF 416
#define GSN_TUP_ADD_ATTRREQ 417 #define GSN_TUP_ADD_ATTRREQ 417
#define GSN_TUP_ATTRINFO 418 #define GSN_TUP_ATTRINFO 418
#define GSN_TUP_COMMITREQ 419 #define GSN_TUP_COMMITREQ 419
/* 420 unused */
#define GSN_TUP_LCPCONF 421 #define GSN_TUP_LCPCONF 421
#define GSN_TUP_LCPREF 422 #define GSN_TUP_LCPREF 422
#define GSN_TUP_LCPREQ 423 #define GSN_TUP_LCPREQ 423
...@@ -938,4 +934,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES; ...@@ -938,4 +934,10 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
#define GSN_ACC_LOCKREQ 711 #define GSN_ACC_LOCKREQ 711
#define GSN_READ_PSUEDO_REQ 712 #define GSN_READ_PSUEDO_REQ 712
/* DICT LOCK signals */
#define GSN_DICT_LOCK_REQ 410
#define GSN_DICT_LOCK_CONF 411
#define GSN_DICT_LOCK_REF 412
#define GSN_DICT_UNLOCK_ORD 420
#endif #endif
...@@ -114,6 +114,7 @@ public: ...@@ -114,6 +114,7 @@ public:
InvalidTableVersion = 241, InvalidTableVersion = 241,
DropInProgress = 283, DropInProgress = 283,
Busy = 701, Busy = 701,
BusyWithNR = 711,
NotMaster = 702, NotMaster = 702,
InvalidFormat = 703, InvalidFormat = 703,
AttributeNameTooLong = 704, AttributeNameTooLong = 704,
......
...@@ -77,6 +77,7 @@ public: ...@@ -77,6 +77,7 @@ public:
enum ErrorCode { enum ErrorCode {
NoError = 0, NoError = 0,
Busy = 701, Busy = 701,
BusyWithNR = 711,
NotMaster = 702, NotMaster = 702,
InvalidFormat = 703, InvalidFormat = 703,
AttributeNameTooLong = 704, AttributeNameTooLong = 704,
......
/* Copyright (C) 2003 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#ifndef DICT_LOCK_HPP
#define DICT_LOCK_HPP
#include "SignalData.hpp"
// see comments in Dbdict.hpp
class DictLockReq {
friend class Dbdict;
friend class Dbdih;
public:
STATIC_CONST( SignalLength = 3 );
enum LockType {
NoLock = 0,
NodeRestartLock = 1
};
private:
Uint32 userPtr;
Uint32 lockType;
Uint32 userRef;
};
class DictLockConf {
friend class Dbdict;
friend class Dbdih;
public:
STATIC_CONST( SignalLength = 3 );
private:
Uint32 userPtr;
Uint32 lockType;
Uint32 lockPtr;
};
class DictLockRef {
friend class Dbdict;
friend class Dbdih;
public:
STATIC_CONST( SignalLength = 3 );
enum ErrorCode {
NotMaster = 1,
InvalidLockType = 2,
TooManyRequests = 3
};
private:
Uint32 userPtr;
Uint32 lockType;
Uint32 errorCode;
};
class DictUnlockOrd {
friend class Dbdict;
friend class Dbdih;
public:
STATIC_CONST( SignalLength = 2 );
private:
Uint32 lockPtr;
Uint32 lockType;
};
#endif
...@@ -53,6 +53,7 @@ public: ...@@ -53,6 +53,7 @@ public:
enum ErrorCode { enum ErrorCode {
Busy = 701, Busy = 701,
BusyWithNR = 711,
NotMaster = 702, NotMaster = 702,
NoSuchTable = 709, NoSuchTable = 709,
InvalidTableVersion = 241, InvalidTableVersion = 241,
......
...@@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params) ...@@ -139,7 +139,7 @@ SignalLoggerManager::log(LogMode logMode, const char * params)
} else { } else {
for (int i = 0; i < count; ++i){ for (int i = 0; i < count; ++i){
BlockNumber number = getBlockNo(blocks[i]); BlockNumber number = getBlockNo(blocks[i]);
cnt += log(SLM_ON, number-MIN_BLOCK_NO, logMode); cnt += log(SLM_ON, number, logMode);
} }
} }
for(int i = 0; i<count; i++){ for(int i = 0; i<count; i++){
......
...@@ -647,6 +647,12 @@ const GsnName SignalNames [] = { ...@@ -647,6 +647,12 @@ const GsnName SignalNames [] = {
,{ GSN_TUX_MAINT_REF, "TUX_MAINT_REF" } ,{ GSN_TUX_MAINT_REF, "TUX_MAINT_REF" }
,{ GSN_TUX_BOUND_INFO, "TUX_BOUND_INFO" } ,{ GSN_TUX_BOUND_INFO, "TUX_BOUND_INFO" }
,{ GSN_ACC_LOCKREQ, "ACC_LOCKREQ" } ,{ GSN_ACC_LOCKREQ, "ACC_LOCKREQ" }
/* DICT LOCK */
,{ GSN_DICT_LOCK_REQ, "DICT_LOCK_REQ" }
,{ GSN_DICT_LOCK_CONF, "DICT_LOCK_CONF" }
,{ GSN_DICT_LOCK_REF, "DICT_LOCK_REF" }
,{ GSN_DICT_UNLOCK_ORD, "DICT_UNLOCK_ORD" }
}; };
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName); const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4013 Next DBTUP 4013
Next DBLQH 5043 Next DBLQH 5043
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7174 Next DBDIH 7175
Next DBTC 8037 Next DBTC 8037
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10022 Next BACKUP 10022
...@@ -312,6 +312,8 @@ Test Crashes in handling node restarts ...@@ -312,6 +312,8 @@ Test Crashes in handling node restarts
7170: Crash when receiving START_PERMREF (InitialStartRequired) 7170: Crash when receiving START_PERMREF (InitialStartRequired)
7174: Send one fake START_PERMREF (ZNODE_ALREADY_STARTING_ERROR)
DICT: DICT:
6000 Crash during NR when receiving DICTSTARTREQ 6000 Crash during NR when receiving DICTSTARTREQ
6001 Crash during NR when receiving SCHEMA_INFO 6001 Crash during NR when receiving SCHEMA_INFO
......
...@@ -203,6 +203,11 @@ void Dbdict::execCONTINUEB(Signal* signal) ...@@ -203,6 +203,11 @@ void Dbdict::execCONTINUEB(Signal* signal)
sendGetTabResponse(signal); sendGetTabResponse(signal);
break; break;
case ZDICT_LOCK_POLL:
jam();
checkDictLockQueue(signal);
break;
default : default :
ndbrequire(false); ndbrequire(false);
break; break;
...@@ -1208,7 +1213,9 @@ Dbdict::Dbdict(const class Configuration & conf): ...@@ -1208,7 +1213,9 @@ Dbdict::Dbdict(const class Configuration & conf):
c_opCreateTrigger(c_opRecordPool), c_opCreateTrigger(c_opRecordPool),
c_opDropTrigger(c_opRecordPool), c_opDropTrigger(c_opRecordPool),
c_opAlterTrigger(c_opRecordPool), c_opAlterTrigger(c_opRecordPool),
c_opRecordSequence(0) c_opRecordSequence(0),
c_dictLockQueue(c_dictLockPool),
c_dictLockPoll(false)
{ {
BLOCK_CONSTRUCTOR(Dbdict); BLOCK_CONSTRUCTOR(Dbdict);
...@@ -1352,6 +1359,9 @@ Dbdict::Dbdict(const class Configuration & conf): ...@@ -1352,6 +1359,9 @@ Dbdict::Dbdict(const class Configuration & conf):
addRecSignal(GSN_DROP_TAB_CONF, &Dbdict::execDROP_TAB_CONF); addRecSignal(GSN_DROP_TAB_CONF, &Dbdict::execDROP_TAB_CONF);
addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Dbdict::execBACKUP_FRAGMENT_REQ); addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Dbdict::execBACKUP_FRAGMENT_REQ);
addRecSignal(GSN_DICT_LOCK_REQ, &Dbdict::execDICT_LOCK_REQ);
addRecSignal(GSN_DICT_UNLOCK_ORD, &Dbdict::execDICT_UNLOCK_ORD);
}//Dbdict::Dbdict() }//Dbdict::Dbdict()
Dbdict::~Dbdict() Dbdict::~Dbdict()
...@@ -1764,6 +1774,8 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal) ...@@ -1764,6 +1774,8 @@ void Dbdict::execREAD_CONFIG_REQ(Signal* signal)
c_opCreateTrigger.setSize(8); c_opCreateTrigger.setSize(8);
c_opDropTrigger.setSize(8); c_opDropTrigger.setSize(8);
c_opAlterTrigger.setSize(8); c_opAlterTrigger.setSize(8);
c_dictLockPool.setSize(32);
// Initialize schema file copies // Initialize schema file copies
c_schemaFile[0].schemaPage = c_schemaFile[0].schemaPage =
...@@ -2821,6 +2833,11 @@ void Dbdict::execNODE_FAILREP(Signal* signal) ...@@ -2821,6 +2833,11 @@ void Dbdict::execNODE_FAILREP(Signal* signal)
c_blockState = BS_NODE_FAILURE; c_blockState = BS_NODE_FAILURE;
ok = true; ok = true;
break; break;
case BS_NODE_RESTART:
jam();
ok = true;
removeStaleDictLocks(signal, theFailedNodes);
break;
} }
ndbrequire(ok); ndbrequire(ok);
...@@ -2911,6 +2928,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* signal){ ...@@ -2911,6 +2928,12 @@ Dbdict::execCREATE_TABLE_REQ(Signal* signal){
break; break;
} }
if (c_blockState == BS_NODE_RESTART){
jam();
parseRecord.errorCode = CreateTableRef::BusyWithNR;
break;
}
if (c_blockState != BS_IDLE){ if (c_blockState != BS_IDLE){
jam(); jam();
parseRecord.errorCode = CreateTableRef::Busy; parseRecord.errorCode = CreateTableRef::Busy;
...@@ -3060,6 +3083,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* signal) ...@@ -3060,6 +3083,12 @@ Dbdict::execALTER_TABLE_REQ(Signal* signal)
return; return;
} }
if(c_blockState == BS_NODE_RESTART){
jam();
alterTableRef(signal, req, AlterTableRef::BusyWithNR);
return;
}
if(c_blockState != BS_IDLE){ if(c_blockState != BS_IDLE){
jam(); jam();
alterTableRef(signal, req, AlterTableRef::Busy); alterTableRef(signal, req, AlterTableRef::Busy);
...@@ -5372,6 +5401,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signal){ ...@@ -5372,6 +5401,12 @@ Dbdict::execDROP_TABLE_REQ(Signal* signal){
return; return;
} }
if(c_blockState == BS_NODE_RESTART){
jam();
dropTableRef(signal, req, DropTableRef::BusyWithNR);
return;
}
if(c_blockState != BS_IDLE){ if(c_blockState != BS_IDLE){
jam(); jam();
dropTableRef(signal, req, DropTableRef::Busy); dropTableRef(signal, req, DropTableRef::Busy);
...@@ -12170,6 +12205,214 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask) ...@@ -12170,6 +12205,214 @@ Dbdict::getIndexAttrMask(TableRecordPtr indexPtr, AttributeMask& mask)
} }
} }
// DICT lock master
const Dbdict::DictLockType*
Dbdict::getDictLockType(Uint32 lockType)
{
static DictLockType lt[] = {
{ DictLockReq::NodeRestartLock, BS_NODE_RESTART, "NodeRestart" }
};
for (int i = 0; i < sizeof(lt)/sizeof(lt[0]); i++) {
if (lt[i].lockType == lockType)
return &lt[i];
}
return NULL;
}
void
Dbdict::sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text)
{
infoEvent("DICT: %s %u for %s",
text,
(unsigned int)refToNode(lockPtr.p->req.userRef), lockPtr.p->lt->text);
}
void
Dbdict::execDICT_LOCK_REQ(Signal* signal)
{
jamEntry();
const DictLockReq* req = (const DictLockReq*)&signal->theData[0];
if (getOwnNodeId() != c_masterNodeId) {
jam();
sendDictLockRef(signal, *req, DictLockRef::NotMaster);
return;
}
const DictLockType* lt = getDictLockType(req->lockType);
if (lt == NULL) {
jam();
sendDictLockRef(signal, *req, DictLockRef::InvalidLockType);
return;
}
DictLockPtr lockPtr;
if (! c_dictLockQueue.seize(lockPtr)) {
jam();
sendDictLockRef(signal, *req, DictLockRef::TooManyRequests);
return;
}
lockPtr.p->req = *req;
lockPtr.p->locked = false;
lockPtr.p->lt = lt;
checkDictLockQueue(signal);
if (! lockPtr.p->locked)
sendDictLockInfoEvent(lockPtr, "lock request by node");
}
void
Dbdict::checkDictLockQueue(Signal* signal)
{
DictLockPtr lockPtr;
do {
if (! c_dictLockQueue.first(lockPtr)) {
jam();
setDictLockPoll(signal, false);
return;
}
if (lockPtr.p->locked) {
jam();
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
break;
}
if (c_opRecordPool.getNoOfFree() != c_opRecordPool.getSize()) {
jam();
break;
}
ndbrequire(c_blockState == BS_IDLE);
lockPtr.p->locked = true;
c_blockState = lockPtr.p->lt->blockState;
sendDictLockConf(signal, lockPtr);
sendDictLockInfoEvent(lockPtr, "locked by node");
} while (0);
// poll while first request is open
// this routine is called again when it is removed for any reason
bool on = ! lockPtr.p->locked;
setDictLockPoll(signal, on);
}
void
Dbdict::execDICT_UNLOCK_ORD(Signal* signal)
{
jamEntry();
const DictUnlockOrd* ord = (const DictUnlockOrd*)&signal->theData[0];
DictLockPtr lockPtr;
c_dictLockQueue.getPtr(lockPtr, ord->lockPtr);
ndbrequire(lockPtr.p->lt->lockType == ord->lockType);
if (lockPtr.p->locked) {
jam();
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
c_blockState = BS_IDLE;
sendDictLockInfoEvent(lockPtr, "unlocked by node");
} else {
sendDictLockInfoEvent(lockPtr, "lock request removed by node");
}
c_dictLockQueue.release(lockPtr);
checkDictLockQueue(signal);
}
void
Dbdict::sendDictLockConf(Signal* signal, DictLockPtr lockPtr)
{
DictLockConf* conf = (DictLockConf*)&signal->theData[0];
const DictLockReq& req = lockPtr.p->req;
conf->userPtr = req.userPtr;
conf->lockType = req.lockType;
conf->lockPtr = lockPtr.i;
sendSignal(req.userRef, GSN_DICT_LOCK_CONF, signal,
DictLockConf::SignalLength, JBB);
}
void
Dbdict::sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode)
{
DictLockRef* ref = (DictLockRef*)&signal->theData[0];
ref->userPtr = req.userPtr;
ref->lockType = req.lockType;
ref->errorCode = errorCode;
sendSignal(req.userRef, GSN_DICT_LOCK_REF, signal,
DictLockRef::SignalLength, JBB);
}
// control polling
void
Dbdict::setDictLockPoll(Signal* signal, bool on)
{
if (on) {
jam();
signal->theData[0] = ZDICT_LOCK_POLL;
sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
}
if (c_dictLockPoll != on) {
jam();
#ifdef VM_TRACE
infoEvent("DICT: lock polling %s", on ? "On" : "Off");
#endif
c_dictLockPoll = on;
}
}
// NF handling
void
Dbdict::removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes)
{
DictLockPtr loopPtr;
c_dictLockQueue.first(loopPtr);
while (loopPtr.i != RNIL) {
jam();
DictLockPtr lockPtr = loopPtr;
c_dictLockQueue.next(loopPtr);
Uint32 nodeId = refToNode(lockPtr.p->req.userRef);
if (NodeBitmask::get(theFailedNodes, nodeId)) {
if (lockPtr.p->locked) {
jam();
ndbrequire(c_blockState == lockPtr.p->lt->blockState);
ndbrequire(c_opRecordPool.getNoOfFree() == c_opRecordPool.getSize());
ndbrequire(! c_dictLockQueue.hasPrev(lockPtr));
c_blockState = BS_IDLE;
sendDictLockInfoEvent(lockPtr, "remove lock by failed node");
} else {
sendDictLockInfoEvent(lockPtr, "remove lock request by failed node");
}
c_dictLockQueue.release(lockPtr);
}
}
checkDictLockQueue(signal);
}
/* **************************************************************** */ /* **************************************************************** */
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
/* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */ /* MODULE: STORE/RESTORE SCHEMA FILE---------------------- */
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <pc.hpp> #include <pc.hpp>
#include <ArrayList.hpp> #include <ArrayList.hpp>
#include <DLHashTable.hpp> #include <DLHashTable.hpp>
#include <DLFifoList.hpp>
#include <CArray.hpp> #include <CArray.hpp>
#include <KeyTable2.hpp> #include <KeyTable2.hpp>
#include <SimulatedBlock.hpp> #include <SimulatedBlock.hpp>
...@@ -50,6 +51,7 @@ ...@@ -50,6 +51,7 @@
#include <signaldata/CreateTrig.hpp> #include <signaldata/CreateTrig.hpp>
#include <signaldata/DropTrig.hpp> #include <signaldata/DropTrig.hpp>
#include <signaldata/AlterTrig.hpp> #include <signaldata/AlterTrig.hpp>
#include <signaldata/DictLock.hpp>
#include "SchemaFile.hpp" #include "SchemaFile.hpp"
#include <blocks/mutexes.hpp> #include <blocks/mutexes.hpp>
#include <SafeCounter.hpp> #include <SafeCounter.hpp>
...@@ -63,6 +65,7 @@ ...@@ -63,6 +65,7 @@
/*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/
#define ZPACK_TABLE_INTO_PAGES 0 #define ZPACK_TABLE_INTO_PAGES 0
#define ZSEND_GET_TAB_RESPONSE 3 #define ZSEND_GET_TAB_RESPONSE 3
#define ZDICT_LOCK_POLL 4
/*--------------------------------------------------------------*/ /*--------------------------------------------------------------*/
...@@ -587,6 +590,9 @@ private: ...@@ -587,6 +590,9 @@ private:
void execALTER_TAB_CONF(Signal* signal); void execALTER_TAB_CONF(Signal* signal);
bool check_ndb_versions() const; bool check_ndb_versions() const;
void execDICT_LOCK_REQ(Signal* signal);
void execDICT_UNLOCK_ORD(Signal* signal);
/* /*
* 2.4 COMMON STORED VARIABLES * 2.4 COMMON STORED VARIABLES
*/ */
...@@ -817,12 +823,43 @@ private: ...@@ -817,12 +823,43 @@ private:
// State variables // State variables
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
#ifndef ndb_dbdict_log_block_state
enum BlockState { enum BlockState {
BS_IDLE = 0, BS_IDLE = 0,
BS_CREATE_TAB = 1, BS_CREATE_TAB = 1,
BS_BUSY = 2, BS_BUSY = 2,
BS_NODE_FAILURE = 3 BS_NODE_FAILURE = 3,
BS_NODE_RESTART = 4
};
#else // quick hack to log changes
enum {
BS_IDLE = 0,
BS_CREATE_TAB = 1,
BS_BUSY = 2,
BS_NODE_FAILURE = 3,
BS_NODE_RESTART = 4
};
struct BlockState;
friend struct BlockState;
struct BlockState {
BlockState() :
m_value(BS_IDLE) {
}
BlockState(int value) :
m_value(value) {
}
operator int() const {
return m_value;
}
BlockState& operator=(const BlockState& bs) {
Dbdict* dict = (Dbdict*)globalData.getBlock(DBDICT);
dict->infoEvent("DICT: bs %d->%d", m_value, bs.m_value);
m_value = bs.m_value;
return *this;
}
int m_value;
}; };
#endif
BlockState c_blockState; BlockState c_blockState;
struct PackTable { struct PackTable {
...@@ -1722,6 +1759,64 @@ private: ...@@ -1722,6 +1759,64 @@ private:
// Unique key for operation XXX move to some system table // Unique key for operation XXX move to some system table
Uint32 c_opRecordSequence; Uint32 c_opRecordSequence;
/*
* Master DICT can be locked in 2 mutually exclusive ways:
*
* 1) for schema ops, via operation records
* 2) against schema ops, via a lock queue
*
* Current use of 2) is by a starting node, to prevent schema ops
* until started. The ops are refused (BlockState != BS_IDLE),
* not queued.
*
* Master failure is not handled, in node start case the starting
* node will crash too anyway. Use lock table in future..
*
* The lock queue is "serial" but other behaviour is possible
* by checking lock types e.g. to allow parallel node starts.
*
* Checking release of last op record is not convenient with
* current structure (5.0). Instead we poll via continueB.
*
* XXX only table ops check BlockState
*/
struct DictLockType {
DictLockReq::LockType lockType;
BlockState blockState;
const char* text;
};
struct DictLockRecord {
DictLockReq req;
const DictLockType* lt;
bool locked;
union {
Uint32 nextPool;
Uint32 nextList;
};
Uint32 prevList;
};
typedef Ptr<DictLockRecord> DictLockPtr;
ArrayPool<DictLockRecord> c_dictLockPool;
DLFifoList<DictLockRecord> c_dictLockQueue;
bool c_dictLockPoll;
static const DictLockType* getDictLockType(Uint32 lockType);
void sendDictLockInfoEvent(DictLockPtr lockPtr, const char* text);
void checkDictLockQueue(Signal* signal);
void sendDictLockConf(Signal* signal, DictLockPtr lockPtr);
void sendDictLockRef(Signal* signal, DictLockReq req, Uint32 errorCode);
// control polling i.e. continueB loop
void setDictLockPoll(Signal* signal, bool on);
// NF handling
void removeStaleDictLocks(Signal* signal, const Uint32* theFailedNodes);
// Statement blocks // Statement blocks
/* ------------------------------------------------------------ */ /* ------------------------------------------------------------ */
......
...@@ -718,6 +718,9 @@ private: ...@@ -718,6 +718,9 @@ private:
void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr); void checkPrepDropTabComplete(Signal *, TabRecordPtr tabPtr);
void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId); void checkWaitDropTabFailedLqh(Signal *, Uint32 nodeId, Uint32 tableId);
void execDICT_LOCK_CONF(Signal* signal);
void execDICT_LOCK_REF(Signal* signal);
// Statement blocks // Statement blocks
//------------------------------------ //------------------------------------
// Methods that send signals // Methods that send signals
...@@ -935,6 +938,7 @@ private: ...@@ -935,6 +938,7 @@ private:
void initialStartCompletedLab(Signal *); void initialStartCompletedLab(Signal *);
void allNodesLcpCompletedLab(Signal *); void allNodesLcpCompletedLab(Signal *);
void nodeRestartPh2Lab(Signal *); void nodeRestartPh2Lab(Signal *);
void nodeRestartPh2Lab2(Signal *);
void initGciFilesLab(Signal *); void initGciFilesLab(Signal *);
void dictStartConfLab(Signal *); void dictStartConfLab(Signal *);
void nodeDictStartConfLab(Signal *); void nodeDictStartConfLab(Signal *);
...@@ -1594,6 +1598,30 @@ private: ...@@ -1594,6 +1598,30 @@ private:
* Reply from nodeId * Reply from nodeId
*/ */
void startInfoReply(Signal *, Uint32 nodeId); void startInfoReply(Signal *, Uint32 nodeId);
/*
* Lock master DICT. Only current use is by starting node
* during NR. A pool of slave records is convenient anyway.
*/
struct DictLockSlaveRecord {
Uint32 lockPtr;
Uint32 lockType;
bool locked;
Callback callback;
Uint32 nextPool;
};
typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr;
ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool;
// slave
void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c);
void recvDictLockConf(Signal* signal);
void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI);
// NR
Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR
void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret);
}; };
#if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)
......
...@@ -66,6 +66,9 @@ void Dbdih::initData() ...@@ -66,6 +66,9 @@ void Dbdih::initData()
waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE); waitGCPProxyPool.setSize(ZPROXY_FILE_SIZE);
waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE); waitGCPMasterPool.setSize(ZPROXY_MASTER_FILE_SIZE);
c_dictLockSlavePool.setSize(1); // assert single usage
c_dictLockSlavePtrI_nodeRestart = RNIL;
cgcpOrderBlocked = 0; cgcpOrderBlocked = 0;
c_lcpState.ctcCounter = 0; c_lcpState.ctcCounter = 0;
cwaitLcpSr = false; cwaitLcpSr = false;
...@@ -264,6 +267,9 @@ Dbdih::Dbdih(const class Configuration & config): ...@@ -264,6 +267,9 @@ Dbdih::Dbdih(const class Configuration & config):
addRecSignal(GSN_CREATE_FRAGMENTATION_REQ, addRecSignal(GSN_CREATE_FRAGMENTATION_REQ,
&Dbdih::execCREATE_FRAGMENTATION_REQ); &Dbdih::execCREATE_FRAGMENTATION_REQ);
addRecSignal(GSN_DICT_LOCK_CONF, &Dbdih::execDICT_LOCK_CONF);
addRecSignal(GSN_DICT_LOCK_REF, &Dbdih::execDICT_LOCK_REF);
apiConnectRecord = 0; apiConnectRecord = 0;
connectRecord = 0; connectRecord = 0;
fileRecord = 0; fileRecord = 0;
......
...@@ -67,6 +67,7 @@ ...@@ -67,6 +67,7 @@
#include <signaldata/CreateFragmentation.hpp> #include <signaldata/CreateFragmentation.hpp>
#include <signaldata/LqhFrag.hpp> #include <signaldata/LqhFrag.hpp>
#include <signaldata/FsOpenReq.hpp> #include <signaldata/FsOpenReq.hpp>
#include <signaldata/DictLock.hpp>
#include <DebuggerNames.hpp> #include <DebuggerNames.hpp>
#include <EventLogger.hpp> #include <EventLogger.hpp>
...@@ -544,7 +545,7 @@ void Dbdih::execCONTINUEB(Signal* signal) ...@@ -544,7 +545,7 @@ void Dbdih::execCONTINUEB(Signal* signal)
break; break;
case DihContinueB::ZSTART_PERMREQ_AGAIN: case DihContinueB::ZSTART_PERMREQ_AGAIN:
jam(); jam();
nodeRestartPh2Lab(signal); nodeRestartPh2Lab2(signal);
return; return;
break; break;
case DihContinueB::SwitchReplica: case DihContinueB::SwitchReplica:
...@@ -1284,6 +1285,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) ...@@ -1284,6 +1285,7 @@ void Dbdih::execNDB_STTOR(Signal* signal)
case NodeState::ST_INITIAL_NODE_RESTART: case NodeState::ST_INITIAL_NODE_RESTART:
case NodeState::ST_NODE_RESTART: case NodeState::ST_NODE_RESTART:
jam(); jam();
/*********************************************************************** /***********************************************************************
* When starting nodes while system is operational we must be controlled * When starting nodes while system is operational we must be controlled
* by the master since only one node restart is allowed at a time. * by the master since only one node restart is allowed at a time.
...@@ -1294,7 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal) ...@@ -1294,7 +1296,7 @@ void Dbdih::execNDB_STTOR(Signal* signal)
req->startingRef = reference(); req->startingRef = reference();
req->startingVersion = 0; // Obsolete req->startingVersion = 0; // Obsolete
sendSignal(cmasterdihref, GSN_START_MEREQ, signal, sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
StartMeReq::SignalLength, JBB); StartMeReq::SignalLength, JBB);
return; return;
} }
ndbrequire(false); ndbrequire(false);
...@@ -1354,6 +1356,24 @@ void Dbdih::execNDB_STTOR(Signal* signal) ...@@ -1354,6 +1356,24 @@ void Dbdih::execNDB_STTOR(Signal* signal)
} }
ndbrequire(false); ndbrequire(false);
break; break;
case ZNDB_SPH7:
jam();
switch (typestart) {
case NodeState::ST_INITIAL_START:
case NodeState::ST_SYSTEM_RESTART:
jam();
ndbsttorry10Lab(signal, __LINE__);
return;
case NodeState::ST_NODE_RESTART:
case NodeState::ST_INITIAL_NODE_RESTART:
jam();
sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
c_dictLockSlavePtrI_nodeRestart = RNIL;
ndbsttorry10Lab(signal, __LINE__);
return;
}
ndbrequire(false);
break;
default: default:
jam(); jam();
ndbsttorry10Lab(signal, __LINE__); ndbsttorry10Lab(signal, __LINE__);
...@@ -1563,6 +1583,31 @@ void Dbdih::execREAD_NODESCONF(Signal* signal) ...@@ -1563,6 +1583,31 @@ void Dbdih::execREAD_NODESCONF(Signal* signal)
/* START NODE LOGIC FOR NODE RESTART */ /* START NODE LOGIC FOR NODE RESTART */
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
void Dbdih::nodeRestartPh2Lab(Signal* signal) void Dbdih::nodeRestartPh2Lab(Signal* signal)
{
/*
* Lock master DICT to avoid metadata operations during INR/NR.
* Done just before START_PERMREQ.
*
* It would be more elegant to do this just before START_MEREQ.
* The problem is, on INR we end up in massive invalidateNodeLCP
* which is not fully protected against metadata ops.
*/
ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
Uint32 lockType = DictLockReq::NodeRestartLock;
Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
sendDictLockReq(signal, lockType, c);
}
void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
{
ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
c_dictLockSlavePtrI_nodeRestart = data;
nodeRestartPh2Lab2(signal);
}
void Dbdih::nodeRestartPh2Lab2(Signal* signal)
{ {
/*------------------------------------------------------------------------*/ /*------------------------------------------------------------------------*/
// REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
...@@ -1574,7 +1619,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal) ...@@ -1574,7 +1619,7 @@ void Dbdih::nodeRestartPh2Lab(Signal* signal)
req->nodeId = cownNodeId; req->nodeId = cownNodeId;
req->startType = cstarttype; req->startType = cstarttype;
sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB); sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
}//Dbdih::nodeRestartPh2Lab() }
void Dbdih::execSTART_PERMCONF(Signal* signal) void Dbdih::execSTART_PERMCONF(Signal* signal)
{ {
...@@ -1696,12 +1741,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal) ...@@ -1696,12 +1741,12 @@ void Dbdih::execSTART_PERMREQ(Signal* signal)
const BlockReference retRef = req->blockRef; const BlockReference retRef = req->blockRef;
const Uint32 nodeId = req->nodeId; const Uint32 nodeId = req->nodeId;
const Uint32 typeStart = req->startType; const Uint32 typeStart = req->startType;
CRASH_INSERTION(7122); CRASH_INSERTION(7122);
ndbrequire(isMaster()); ndbrequire(isMaster());
ndbrequire(refToNode(retRef) == nodeId); ndbrequire(refToNode(retRef) == nodeId);
if ((c_nodeStartMaster.activeState) || if ((c_nodeStartMaster.activeState) ||
(c_nodeStartMaster.wait != ZFALSE)) { (c_nodeStartMaster.wait != ZFALSE) ||
ERROR_INSERTED_CLEAR(7174)) {
jam(); jam();
signal->theData[0] = nodeId; signal->theData[0] = nodeId;
signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR; signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
...@@ -10448,6 +10493,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal) ...@@ -10448,6 +10493,10 @@ void Dbdih::crashSystemAtGcpStop(Signal* signal)
c_copyGCIMaster.m_copyReason, c_copyGCIMaster.m_copyReason,
c_copyGCIMaster.m_waiting); c_copyGCIMaster.m_waiting);
break; break;
case GCP_READY: // shut up lint
case GCP_PREPARE_SENT:
case GCP_COMMIT_SENT:
break;
} }
ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d", ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
...@@ -14639,3 +14688,77 @@ Dbdih::NodeRecord::NodeRecord(){ ...@@ -14639,3 +14688,77 @@ Dbdih::NodeRecord::NodeRecord(){
copyCompleted = false; copyCompleted = false;
allowNodeStart = true; allowNodeStart = true;
} }
// DICT lock slave
void
Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
{
DictLockReq* req = (DictLockReq*)&signal->theData[0];
DictLockSlavePtr lockPtr;
c_dictLockSlavePool.seize(lockPtr);
ndbrequire(lockPtr.i != RNIL);
req->userPtr = lockPtr.i;
req->lockType = lockType;
req->userRef = reference();
lockPtr.p->lockPtr = RNIL;
lockPtr.p->lockType = lockType;
lockPtr.p->locked = false;
lockPtr.p->callback = c;
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
DictLockReq::SignalLength, JBB);
}
void
Dbdih::execDICT_LOCK_CONF(Signal* signal)
{
jamEntry();
recvDictLockConf(signal);
}
void
Dbdih::execDICT_LOCK_REF(Signal* signal)
{
jamEntry();
ndbrequire(false);
}
void
Dbdih::recvDictLockConf(Signal* signal)
{
const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
DictLockSlavePtr lockPtr;
c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
lockPtr.p->lockPtr = conf->lockPtr;
ndbrequire(lockPtr.p->lockType == conf->lockType);
ndbrequire(lockPtr.p->locked == false);
lockPtr.p->locked = true;
lockPtr.p->callback.m_callbackData = lockPtr.i;
execute(signal, lockPtr.p->callback, 0);
}
void
Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
{
DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
DictLockSlavePtr lockPtr;
c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
ord->lockPtr = lockPtr.p->lockPtr;
ord->lockType = lockPtr.p->lockType;
c_dictLockSlavePool.release(lockPtr);
BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal,
DictUnlockOrd::SignalLength, JBB);
}
...@@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ...@@ -2477,7 +2477,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
{ {
jam(); jam();
CRASH_INSERTION(932); CRASH_INSERTION(932);
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false); ndbrequire(false);
} }
...@@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ...@@ -2500,7 +2500,7 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
ndbrequire(false); ndbrequire(false);
case ZAPI_INACTIVE: case ZAPI_INACTIVE:
{ {
BaseString::snprintf(buf, 100, "Node %u disconected", nodeId); BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf); progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false); ndbrequire(false);
} }
......
...@@ -420,6 +420,10 @@ int main(int argc, char** argv) ...@@ -420,6 +420,10 @@ int main(int argc, char** argv)
FILE * signalLog = fopen(buf, "a"); FILE * signalLog = fopen(buf, "a");
globalSignalLoggers.setOwnNodeId(globalData.ownId); globalSignalLoggers.setOwnNodeId(globalData.ownId);
globalSignalLoggers.setOutputStream(signalLog); globalSignalLoggers.setOutputStream(signalLog);
#if 0 // to log startup
globalSignalLoggers.log(SignalLoggerManager::LogInOut, "BLOCK=DBDICT,DBDIH");
globalData.testOn = 1;
#endif
#endif #endif
catchsigs(false); catchsigs(false);
......
...@@ -115,6 +115,13 @@ public: ...@@ -115,6 +115,13 @@ public:
*/ */
bool hasNext(const Ptr<T> &) const; bool hasNext(const Ptr<T> &) const;
/**
* Check if prev exists i.e. this is not first
*
* NOTE ptr must be both p & i
*/
bool hasPrev(const Ptr<T> &) const;
Uint32 noOfElements() const { Uint32 noOfElements() const {
Uint32 c = 0; Uint32 c = 0;
Uint32 i = head.firstItem; Uint32 i = head.firstItem;
...@@ -357,4 +364,11 @@ DLFifoList<T>::hasNext(const Ptr<T> & p) const { ...@@ -357,4 +364,11 @@ DLFifoList<T>::hasNext(const Ptr<T> & p) const {
return p.p->nextList != RNIL; return p.p->nextList != RNIL;
} }
template <class T>
inline
bool
DLFifoList<T>::hasPrev(const Ptr<T> & p) const {
return p.p->prevList != RNIL;
}
#endif #endif
...@@ -125,11 +125,13 @@ ...@@ -125,11 +125,13 @@
#ifdef ERROR_INSERT #ifdef ERROR_INSERT
#define ERROR_INSERT_VARIABLE UintR cerrorInsert #define ERROR_INSERT_VARIABLE UintR cerrorInsert
#define ERROR_INSERTED(x) (cerrorInsert == (x)) #define ERROR_INSERTED(x) (cerrorInsert == (x))
#define ERROR_INSERTED_CLEAR(x) (cerrorInsert == (x) ? (cerrorInsert = 0, true) : false)
#define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x #define SET_ERROR_INSERT_VALUE(x) cerrorInsert = x
#define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0 #define CLEAR_ERROR_INSERT_VALUE cerrorInsert = 0
#else #else
#define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used #define ERROR_INSERT_VARIABLE typedef void * cerrorInsert // Will generate compiler error if used
#define ERROR_INSERTED(x) false #define ERROR_INSERTED(x) false
#define ERROR_INSERTED_CLEAR(x) false
#define SET_ERROR_INSERT_VALUE(x) #define SET_ERROR_INSERT_VALUE(x)
#define CLEAR_ERROR_INSERT_VALUE #define CLEAR_ERROR_INSERT_VALUE
#endif #endif
......
...@@ -325,6 +325,7 @@ ErrorBundle ErrorCodes[] = { ...@@ -325,6 +325,7 @@ ErrorBundle ErrorCodes[] = {
* SchemaError * SchemaError
*/ */
{ 701, SE, "System busy with other schema operation" }, { 701, SE, "System busy with other schema operation" },
{ 711, SE, "System busy with node restart, schema operations not allowed" },
{ 703, SE, "Invalid table format" }, { 703, SE, "Invalid table format" },
{ 704, SE, "Attribute name too long" }, { 704, SE, "Attribute name too long" },
{ 705, SE, "Table name too long" }, { 705, SE, "Table name too long" },
......
...@@ -1551,6 +1551,282 @@ end: ...@@ -1551,6 +1551,282 @@ end:
return result; return result;
} }
// NFNR
// Restarter controls dict ops : 1-run 2-pause 3-stop
// synced by polling...
static bool
send_dict_ops_cmd(NDBT_Context* ctx, Uint32 cmd)
{
ctx->setProperty("DictOps_CMD", cmd);
while (1) {
if (ctx->isTestStopped())
return false;
if (ctx->getProperty("DictOps_ACK") == cmd)
break;
NdbSleep_MilliSleep(100);
}
return true;
}
static bool
recv_dict_ops_run(NDBT_Context* ctx)
{
while (1) {
if (ctx->isTestStopped())
return false;
Uint32 cmd = ctx->getProperty("DictOps_CMD");
ctx->setProperty("DictOps_ACK", cmd);
if (cmd == 1)
break;
if (cmd == 3)
return false;
NdbSleep_MilliSleep(100);
}
return true;
}
int
runRestarts(NDBT_Context* ctx, NDBT_Step* step)
{
static int err_master[] = { // non-crashing
0,
7174 // send one fake START_PERMREF
};
static int err_node[] = {
0,
7121, // crash on START_PERMCONF
7130 // crash on START_MECONF
};
const uint err_master_cnt = sizeof(err_master)/sizeof(err_master[0]);
const uint err_node_cnt = sizeof(err_node)/sizeof(err_node[0]);
myRandom48Init(NdbTick_CurrentMillisecond());
NdbRestarter restarter;
int result = NDBT_OK;
const int loops = ctx->getNumLoops();
for (int l = 0; l < loops && result == NDBT_OK; l++) {
g_info << "1: === loop " << l << " ===" << endl;
// assuming 2-way replicated
int numnodes = restarter.getNumDbNodes();
CHECK(numnodes >= 1);
if (numnodes == 1)
break;
int masterNodeId = restarter.getMasterNodeId();
CHECK(masterNodeId != -1);
// for more complex cases need more restarter support methods
int nodeIdList[2] = { 0, 0 };
int nodeIdCnt = 0;
if (numnodes >= 2) {
int rand = myRandom48(numnodes);
int nodeId = restarter.getRandomNotMasterNodeId(rand);
CHECK(nodeId != -1);
nodeIdList[nodeIdCnt++] = nodeId;
}
if (numnodes >= 4) {
int rand = myRandom48(numnodes);
int nodeId = restarter.getRandomNodeOtherNodeGroup(nodeIdList[0], rand);
CHECK(nodeId != -1);
if (nodeId != masterNodeId)
nodeIdList[nodeIdCnt++] = nodeId;
}
g_info << "1: master=" << masterNodeId << " nodes=" << nodeIdList[0] << "," << nodeIdList[1] << endl;
const unsigned maxsleep = 2000; //ms
bool NF_ops = ctx->getProperty("Restart_NF_ops");
uint NF_type = ctx->getProperty("Restart_NF_type");
bool NR_ops = ctx->getProperty("Restart_NR_ops");
bool NR_error = ctx->getProperty("Restart_NR_error");
g_info << "1: " << (NF_ops ? "run" : "pause") << " dict ops" << endl;
if (! send_dict_ops_cmd(ctx, NF_ops ? 1 : 2))
break;
NdbSleep_MilliSleep(myRandom48(maxsleep));
{
int i = 0;
while (i < nodeIdCnt) {
int nodeId = nodeIdList[i++];
bool nostart = true;
bool abort = NF_type == 0 ? myRandom48(2) : (NF_type == 2);
bool initial = myRandom48(2);
char flags[40];
strcpy(flags, "flags: nostart");
if (abort)
strcat(flags, ",abort");
if (initial)
strcat(flags, ",initial");
g_info << "1: restart " << nodeId << " " << flags << endl;
CHECK(restarter.restartOneDbNode(nodeId, initial, nostart, abort) == 0);
}
}
g_info << "1: wait for nostart" << endl;
CHECK(restarter.waitNodesNoStart(nodeIdList, nodeIdCnt) == 0);
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: " << (NR_ops ? "run" : "pause") << " dict ops" << endl;
if (! send_dict_ops_cmd(ctx, NR_ops ? 1 : 2))
break;
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: start nodes" << endl;
CHECK(restarter.startNodes(nodeIdList, nodeIdCnt) == 0);
if (NR_error) {
{
int rand = myRandom48(err_master_cnt);
int err = err_master[rand];
if (err != 0) {
g_info << "1: insert master error " << err << endl;
CHECK(restarter.insertErrorInNode(masterNodeId, err) == 0);
}
}
// limitation: cannot have 2 node restarts and crash_insert
// one node may die for real (NF during startup)
int i = 0;
while (i < nodeIdCnt && nodeIdCnt == 1) {
int nodeId = nodeIdList[i++];
int rand = myRandom48(err_node_cnt);
int err = err_node[rand];
if (err != 0) {
g_info << "1: insert node " << nodeId << " error " << err << endl;
CHECK(restarter.insertErrorInNode(nodeId, err) == 0);
}
}
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: wait cluster started" << endl;
CHECK(restarter.waitClusterStarted() == 0);
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "1: restart done" << endl;
}
g_info << "1: stop dict ops" << endl;
send_dict_ops_cmd(ctx, 3);
return result;
}
int
runDictOps(NDBT_Context* ctx, NDBT_Step* step)
{
myRandom48Init(NdbTick_CurrentMillisecond());
int result = NDBT_OK;
for (int l = 0; result == NDBT_OK; l++) {
if (! recv_dict_ops_run(ctx))
break;
g_info << "2: === loop " << l << " ===" << endl;
Ndb* pNdb = GETNDB(step);
NdbDictionary::Dictionary* pDic = pNdb->getDictionary();
const NdbDictionary::Table* pTab = ctx->getTab();
const char* tabName = pTab->getName();
const unsigned long maxsleep = 100; //ms
g_info << "2: create table" << endl;
{
uint count = 0;
try_create:
count++;
if (pDic->createTable(*pTab) != 0) {
const NdbError err = pDic->getNdbError();
if (count == 1)
g_err << "2: " << tabName << ": create failed: " << err << endl;
if (err.code != 711) {
result = NDBT_FAILED;
break;
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
goto try_create;
}
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "2: verify create" << endl;
const NdbDictionary::Table* pTab2 = pDic->getTable(tabName);
if (pTab2 == NULL) {
const NdbError err = pDic->getNdbError();
g_err << "2: " << tabName << ": verify create: " << err << endl;
result = NDBT_FAILED;
break;
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
// replace by the Retrieved table
pTab = pTab2;
int records = myRandom48(ctx->getNumRecords());
g_info << "2: load " << records << " records" << endl;
HugoTransactions hugoTrans(*pTab);
if (hugoTrans.loadTable(pNdb, records) != 0) {
// XXX get error code from hugo
g_err << "2: " << tabName << ": load failed" << endl;
result = NDBT_FAILED;
break;
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "2: drop" << endl;
{
uint count = 0;
try_drop:
count++;
if (pDic->dropTable(tabName) != 0) {
const NdbError err = pDic->getNdbError();
if (count == 1)
g_err << "2: " << tabName << ": drop failed: " << err << endl;
if (err.code != 711) {
result = NDBT_FAILED;
break;
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
goto try_drop;
}
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
g_info << "2: verify drop" << endl;
const NdbDictionary::Table* pTab3 = pDic->getTable(tabName);
if (pTab3 != NULL) {
g_err << "2: " << tabName << ": verify drop: table exists" << endl;
result = NDBT_FAILED;
break;
}
if (pDic->getNdbError().code != 709) {
const NdbError err = pDic->getNdbError();
g_err << "2: " << tabName << ": verify drop: " << err << endl;
result = NDBT_FAILED;
break;
}
NdbSleep_MilliSleep(myRandom48(maxsleep));
}
return result;
}
NDBT_TESTSUITE(testDict); NDBT_TESTSUITE(testDict);
TESTCASE("CreateAndDrop", TESTCASE("CreateAndDrop",
"Try to create and drop the table loop number of times\n"){ "Try to create and drop the table loop number of times\n"){
...@@ -1655,6 +1931,34 @@ TESTCASE("FailAddFragment", ...@@ -1655,6 +1931,34 @@ TESTCASE("FailAddFragment",
"Fail add fragment or attribute in ACC or TUP or TUX\n"){ "Fail add fragment or attribute in ACC or TUP or TUX\n"){
INITIALIZER(runFailAddFragment); INITIALIZER(runFailAddFragment);
} }
TESTCASE("Restart_NF1",
"DICT ops during node graceful shutdown (not master)"){
TC_PROPERTY("Restart_NF_ops", 1);
TC_PROPERTY("Restart_NF_type", 1);
STEP(runRestarts);
STEP(runDictOps);
}
TESTCASE("Restart_NF2",
"DICT ops during node shutdown abort (not master)"){
TC_PROPERTY("Restart_NF_ops", 1);
TC_PROPERTY("Restart_NF_type", 2);
STEP(runRestarts);
STEP(runDictOps);
}
TESTCASE("Restart_NR1",
"DICT ops during node startup (not master)"){
TC_PROPERTY("Restart_NR_ops", 1);
STEP(runRestarts);
STEP(runDictOps);
}
TESTCASE("Restart_NR2",
"DICT ops during node startup with crash inserts (not master)"){
TC_PROPERTY("Restart_NR_ops", 1);
TC_PROPERTY("Restart_NR_error", 1);
STEP(runRestarts);
STEP(runDictOps);
}
NDBT_TESTSUITE_END(testDict); NDBT_TESTSUITE_END(testDict);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment