Commit 165d5390 authored by unknown's avatar unknown

bug#10987 - ndb - unable to find restorable replica

  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
    
  This variable is used when cutting redo (calcKeepGci)
  
  Also make sure complete GCI is run inbetween LCP's


ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Introduce new variable c_newest_restorable_gci
    which is set _after_ both GCP_SAVE and COPY_GCI
  
  This variable is used when cutting redo (calcKeepGci)
  Also make sure complete GCI is run inbetween LCP's
parent bbcb4a56
...@@ -774,7 +774,7 @@ private: ...@@ -774,7 +774,7 @@ private:
//------------------------------------ //------------------------------------
// Methods for LCP functionality // Methods for LCP functionality
//------------------------------------ //------------------------------------
void checkKeepGci(Uint32 replicaStartIndex); void checkKeepGci(TabRecordPtr, Uint32, Fragmentstore*, Uint32);
void checkLcpStart(Signal *, Uint32 lineNo); void checkLcpStart(Signal *, Uint32 lineNo);
void checkStartMoreLcp(Signal *, Uint32 nodeId); void checkStartMoreLcp(Signal *, Uint32 nodeId);
bool reportLcpCompletion(const class LcpFragRep *); bool reportLcpCompletion(const class LcpFragRep *);
...@@ -1292,7 +1292,7 @@ private: ...@@ -1292,7 +1292,7 @@ private:
} }
Uint32 lcpStart; Uint32 lcpStart;
Uint32 lcpStartGcp; Uint32 lcpStopGcp;
Uint32 keepGci; /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP */ Uint32 keepGci; /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP */
Uint32 oldestRestorableGci; Uint32 oldestRestorableGci;
...@@ -1361,6 +1361,7 @@ private: ...@@ -1361,6 +1361,7 @@ private:
Uint32 cstarttype; Uint32 cstarttype;
Uint32 csystemnodes; Uint32 csystemnodes;
Uint32 currentgcp; Uint32 currentgcp;
Uint32 c_newest_restorable_gci;
enum GcpMasterTakeOverState { enum GcpMasterTakeOverState {
GMTOS_IDLE = 0, GMTOS_IDLE = 0,
......
...@@ -674,6 +674,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal) ...@@ -674,6 +674,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
jam(); jam();
coldgcp = SYSFILE->newestRestorableGCI; coldgcp = SYSFILE->newestRestorableGCI;
crestartGci = SYSFILE->newestRestorableGCI; crestartGci = SYSFILE->newestRestorableGCI;
c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
Sysfile::setRestartOngoing(SYSFILE->systemRestartBits); Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
currentgcp = coldgcp + 1; currentgcp = coldgcp + 1;
cnewgcp = coldgcp + 1; cnewgcp = coldgcp + 1;
...@@ -692,6 +693,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal) ...@@ -692,6 +693,7 @@ void Dbdih::execCOPY_GCIREQ(Signal* signal)
ok = true; ok = true;
jam(); jam();
cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED; cgcpParticipantState = GCP_PARTICIPANT_COPY_GCI_RECEIVED;
c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
setNodeInfo(signal); setNodeInfo(signal);
break; break;
}//if }//if
...@@ -7749,6 +7751,8 @@ void Dbdih::execCOPY_GCICONF(Signal* signal) ...@@ -7749,6 +7751,8 @@ void Dbdih::execCOPY_GCICONF(Signal* signal)
signal->theData[1] = coldgcp; signal->theData[1] = coldgcp;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
c_newest_restorable_gci = coldgcp;
CRASH_INSERTION(7004); CRASH_INSERTION(7004);
emptyWaitGCPMasterQueue(signal); emptyWaitGCPMasterQueue(signal);
cgcpStatus = GCP_READY; cgcpStatus = GCP_READY;
...@@ -9155,7 +9159,7 @@ void Dbdih::checkTcCounterLab(Signal* signal) ...@@ -9155,7 +9159,7 @@ void Dbdih::checkTcCounterLab(Signal* signal)
}//if }//if
c_lcpState.ctimer += 32; c_lcpState.ctimer += 32;
if ((c_nodeStartMaster.blockLcp == true) || if ((c_nodeStartMaster.blockLcp == true) ||
((c_lcpState.lcpStartGcp + 1) > currentgcp)) { (c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
jam(); jam();
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
// No reason to start juggling the states and checking for start of LCP if // No reason to start juggling the states and checking for start of LCP if
...@@ -9238,7 +9242,6 @@ void Dbdih::execTCGETOPSIZECONF(Signal* signal) ...@@ -9238,7 +9242,6 @@ void Dbdih::execTCGETOPSIZECONF(Signal* signal)
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
c_lcpState.ctimer = 0; c_lcpState.ctimer = 0;
c_lcpState.keepGci = coldgcp; c_lcpState.keepGci = coldgcp;
c_lcpState.lcpStartGcp = currentgcp;
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
/* UPDATE THE NEW LATEST LOCAL CHECKPOINT ID. */ /* UPDATE THE NEW LATEST LOCAL CHECKPOINT ID. */
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
...@@ -9310,7 +9313,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId) ...@@ -9310,7 +9313,7 @@ void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
cnoOfActiveTables++; cnoOfActiveTables++;
FragmentstorePtr fragPtr; FragmentstorePtr fragPtr;
getFragstore(tabPtr.p, fragId, fragPtr); getFragstore(tabPtr.p, fragId, fragPtr);
checkKeepGci(fragPtr.p->storedReplicas); checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
fragId++; fragId++;
if (fragId >= tabPtr.p->totalfragments) { if (fragId >= tabPtr.p->totalfragments) {
jam(); jam();
...@@ -10168,6 +10171,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal) ...@@ -10168,6 +10171,7 @@ void Dbdih::allNodesLcpCompletedLab(Signal* signal)
signal->theData[0] = EventReport::LocalCheckpointCompleted; //Event type signal->theData[0] = EventReport::LocalCheckpointCompleted; //Event type
signal->theData[1] = SYSFILE->latestLCP_ID; signal->theData[1] = SYSFILE->latestLCP_ID;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
c_lcpState.lcpStopGcp = c_newest_restorable_gci;
/** /**
* Start checking for next LCP * Start checking for next LCP
...@@ -10522,7 +10526,8 @@ void Dbdih::checkEscalation() ...@@ -10522,7 +10526,8 @@ void Dbdih::checkEscalation()
/* DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL */ /* DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL */
/* CHECKPOINT. */ /* CHECKPOINT. */
/*************************************************************************/ /*************************************************************************/
void Dbdih::checkKeepGci(Uint32 replicaStartIndex) void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*,
Uint32 replicaStartIndex)
{ {
ReplicaRecordPtr ckgReplicaPtr; ReplicaRecordPtr ckgReplicaPtr;
ckgReplicaPtr.i = replicaStartIndex; ckgReplicaPtr.i = replicaStartIndex;
...@@ -10544,7 +10549,6 @@ void Dbdih::checkKeepGci(Uint32 replicaStartIndex) ...@@ -10544,7 +10549,6 @@ void Dbdih::checkKeepGci(Uint32 replicaStartIndex)
if (oldestRestorableGci > c_lcpState.oldestRestorableGci) { if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
jam(); jam();
c_lcpState.oldestRestorableGci = oldestRestorableGci; c_lcpState.oldestRestorableGci = oldestRestorableGci;
ndbrequire(((int)c_lcpState.oldestRestorableGci) >= 0);
}//if }//if
ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica; ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
}//while }//while
...@@ -10838,7 +10842,7 @@ void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr, ...@@ -10838,7 +10842,7 @@ void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
do { do {
ndbrequire(lcpNo < MAX_LCP_STORED); ndbrequire(lcpNo < MAX_LCP_STORED);
if (fmgReplicaPtr.p->lcpStatus[lcpNo] == ZVALID && if (fmgReplicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
fmgReplicaPtr.p->maxGciStarted[lcpNo] <= coldgcp) fmgReplicaPtr.p->maxGciStarted[lcpNo] < c_newest_restorable_gci)
{ {
jam(); jam();
keepGci = fmgReplicaPtr.p->maxGciCompleted[lcpNo]; keepGci = fmgReplicaPtr.p->maxGciCompleted[lcpNo];
...@@ -10960,7 +10964,7 @@ void Dbdih::initCommonData() ...@@ -10960,7 +10964,7 @@ void Dbdih::initCommonData()
c_lcpState.clcpDelay = 0; c_lcpState.clcpDelay = 0;
c_lcpState.lcpStart = ZIDLE; c_lcpState.lcpStart = ZIDLE;
c_lcpState.lcpStartGcp = 0; c_lcpState.lcpStopGcp = 0;
c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__); c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
c_lcpState.currentFragment.tableId = 0; c_lcpState.currentFragment.tableId = 0;
c_lcpState.currentFragment.fragmentId = 0; c_lcpState.currentFragment.fragmentId = 0;
...@@ -10996,6 +11000,7 @@ void Dbdih::initCommonData() ...@@ -10996,6 +11000,7 @@ void Dbdih::initCommonData()
csystemnodes = 0; csystemnodes = 0;
c_updateToLock = RNIL; c_updateToLock = RNIL;
currentgcp = 0; currentgcp = 0;
c_newest_restorable_gci = 0;
cverifyQueueCounter = 0; cverifyQueueCounter = 0;
cwaitLcpSr = false; cwaitLcpSr = false;
...@@ -11067,6 +11072,7 @@ void Dbdih::initRestartInfo() ...@@ -11067,6 +11072,7 @@ void Dbdih::initRestartInfo()
currentgcp = 2; currentgcp = 2;
cnewgcp = 2; cnewgcp = 2;
crestartGci = 1; crestartGci = 1;
c_newest_restorable_gci = 1;
SYSFILE->keepGCI = 1; SYSFILE->keepGCI = 1;
SYSFILE->oldestRestorableGCI = 1; SYSFILE->oldestRestorableGCI = 1;
...@@ -13038,9 +13044,9 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ...@@ -13038,9 +13044,9 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
if (signal->theData[0] == 7001) { if (signal->theData[0] == 7001) {
infoEvent("c_lcpState.keepGci = %d", infoEvent("c_lcpState.keepGci = %d",
c_lcpState.keepGci); c_lcpState.keepGci);
infoEvent("c_lcpState.lcpStatus = %d, clcpStartGcp = %d", infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
c_lcpState.lcpStatus, c_lcpState.lcpStatus,
c_lcpState.lcpStartGcp); c_lcpState.lcpStopGcp);
infoEvent("cgcpStartCounter = %d, cimmediateLcpStart = %d", infoEvent("cgcpStartCounter = %d, cimmediateLcpStart = %d",
cgcpStartCounter, c_lcpState.immediateLcpStart); cgcpStartCounter, c_lcpState.immediateLcpStart);
}//if }//if
...@@ -13221,8 +13227,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal) ...@@ -13221,8 +13227,8 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal)
infoEvent("lcpStatus = %d (update place = %d) ", infoEvent("lcpStatus = %d (update place = %d) ",
c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace); c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
infoEvent infoEvent
("lcpStart = %d lcpStartGcp = %d keepGci = %d oldestRestorable = %d", ("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
c_lcpState.lcpStart, c_lcpState.lcpStartGcp, c_lcpState.lcpStart, c_lcpState.lcpStopGcp,
c_lcpState.keepGci, c_lcpState.oldestRestorableGci); c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
infoEvent infoEvent
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment