Commit 217e7f22 authored by unknown's avatar unknown

bug#9961 - ndb gcp stop

  add lots of printouts when stop is detected


ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  If waiting for GCP_SAVE_REQ
    just kill nodes we're waiting for
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  Add lots of printouts when crashing due to GCP stop
parent 80be3c78
...@@ -10290,7 +10290,24 @@ void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr) ...@@ -10290,7 +10290,24 @@ void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr)
* GCP stop detected, * GCP stop detected,
* send SYSTEM_ERROR to all other alive nodes * send SYSTEM_ERROR to all other alive nodes
*/ */
void Dbdih::crashSystemAtGcpStop(Signal* signal){ void Dbdih::crashSystemAtGcpStop(Signal* signal)
{
if(cgcpStatus == GCP_NODE_FINISHED)
{
/**
* We're waiting for a GCP save conf
*/
ndbrequire(!c_GCP_SAVEREQ_Counter.done());
NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter);
signal->theData[0] = 2305;
sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
infoEvent("Detected GCP stop...sending kill to %s",
c_GCP_SAVEREQ_Counter.getText());
ndbout_c("Detected GCP stop...sending kill to %s",
c_GCP_SAVEREQ_Counter.getText());
return;
}
NodeRecordPtr nodePtr; NodeRecordPtr nodePtr;
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) { for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
jam(); jam();
......
...@@ -169,6 +169,8 @@ void Dblqh::execTUP_COM_UNBLOCK(Signal* signal) ...@@ -169,6 +169,8 @@ void Dblqh::execTUP_COM_UNBLOCK(Signal* signal)
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
void Dblqh::systemError(Signal* signal) void Dblqh::systemError(Signal* signal)
{ {
signal->theData[0] = 2304;
execDUMP_STATE_ORD(signal);
progError(0, 0); progError(0, 0);
}//Dblqh::systemError() }//Dblqh::systemError()
...@@ -12598,6 +12600,22 @@ void Dblqh::lastWriteInFileLab(Signal* signal) ...@@ -12598,6 +12600,22 @@ void Dblqh::lastWriteInFileLab(Signal* signal)
void Dblqh::writePageZeroLab(Signal* signal) void Dblqh::writePageZeroLab(Signal* signal)
{ {
if (false && logPartPtr.p->logPartState == LogPartRecord::FILE_CHANGE_PROBLEM)
{
if (logPartPtr.p->firstLogQueue == RNIL)
{
jam();
logPartPtr.p->logPartState = LogPartRecord::IDLE;
ndbout_c("resetting logPartState to IDLE");
}
else
{
jam();
logPartPtr.p->logPartState = LogPartRecord::ACTIVE;
ndbout_c("resetting logPartState to ACTIVE");
}
}
logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING; logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/* IT COULD HAVE ARRIVED PAGE WRITES TO THE CURRENT FILE WHILE WE WERE */ /* IT COULD HAVE ARRIVED PAGE WRITES TO THE CURRENT FILE WHILE WE WERE */
...@@ -15661,6 +15679,7 @@ void Dblqh::warningHandlerLab(Signal* signal) ...@@ -15661,6 +15679,7 @@ void Dblqh::warningHandlerLab(Signal* signal)
void Dblqh::systemErrorLab(Signal* signal) void Dblqh::systemErrorLab(Signal* signal)
{ {
systemError(signal);
progError(0, 0); progError(0, 0);
/*************************************************************************>*/ /*************************************************************************>*/
/* WE WANT TO INVOKE AN IMMEDIATE ERROR HERE SO WE GET THAT BY */ /* WE WANT TO INVOKE AN IMMEDIATE ERROR HERE SO WE GET THAT BY */
...@@ -18526,8 +18545,60 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) ...@@ -18526,8 +18545,60 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
return; return;
} }
Uint32 arg= dumpState->args[0];
if(arg == 2304 || arg == 2305)
{
jam();
Uint32 i;
GcpRecordPtr gcp; gcp.i = RNIL;
for(i = 0; i<4; i++)
{
logPartPtr.i = i;
ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
ndbout_c("LP %d state: %d WW_Gci: %d gcprec: %d flq: %d currfile: %d tailFileNo: %d",
i,
logPartPtr.p->logPartState,
logPartPtr.p->waitWriteGciLog,
logPartPtr.p->gcprec,
logPartPtr.p->firstLogQueue,
logPartPtr.p->currentLogfile,
logPartPtr.p->logTailFileNo);
if(gcp.i == RNIL && logPartPtr.p->gcprec != RNIL)
gcp.i = logPartPtr.p->gcprec;
LogFileRecordPtr logFilePtr;
Uint32 first= logFilePtr.i= logPartPtr.p->firstLogfile;
do
{
ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
ndbout_c(" file %d(%d) FileChangeState: %d logFileStatus: %d",
logFilePtr.p->fileNo,
logFilePtr.i,
logFilePtr.p->fileChangeState,
logFilePtr.p->logFileStatus);
logFilePtr.i = logFilePtr.p->nextLogFile;
} while(logFilePtr.i != first);
}
if(gcp.i != RNIL)
{
ptrCheckGuard(gcp, cgcprecFileSize, gcpRecord);
for(i = 0; i<4; i++)
{
ndbout_c(" GCP %d file: %d state: %d sync: %d",
i, gcp.p->gcpFilePtr[i], gcp.p->gcpLogPartState[i],
gcp.p->gcpSyncReady[i]);
}
}
if(arg== 2305)
{
progError(__LINE__, ERR_SYSTEM_ERROR,
"Shutting down node due to failed handling of GCP_SAVEREQ");
}
}
}//Dblqh::execDUMP_STATE_ORD() }//Dblqh::execDUMP_STATE_ORD()
void Dblqh::execSET_VAR_REQ(Signal* signal) void Dblqh::execSET_VAR_REQ(Signal* signal)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment