Commit 354fa102 authored by unknown's avatar unknown

Bug #12992 Cluster StopOnError = Y restarts ndbd indefinitly

parent 4345853e
......@@ -2493,6 +2493,14 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){
const Uint32 start = currentBlockIndex;
if (currentStartPhase == ZSTART_PHASE_6)
{
// Ndbd has passed the critical startphases.
// Change error handler from "startup" state
// to normal state.
ErrorReporter::setErrorHandlerShutdownType();
}
for(; currentBlockIndex < ALL_BLOCKS_SZ; currentBlockIndex++){
jam();
if(ALL_BLOCKS[currentBlockIndex].NextSP == currentStartPhase){
......
......@@ -152,6 +152,14 @@ ErrorReporter::formatMessage(ErrorCategory type,
return;
}
NdbShutdownType ErrorReporter::s_errorHandlerShutdownType = NST_ErrorHandler;
void
ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst)
{
s_errorHandlerShutdownType = nst;
}
void
ErrorReporter::handleAssert(const char* message, const char* file, int line)
{
......@@ -170,7 +178,7 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line)
WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage,
theEmulatedJamIndex, theEmulatedJam);
NdbShutdown(NST_ErrorHandler);
NdbShutdown(s_errorHandlerShutdownType);
}
void
......@@ -182,7 +190,7 @@ ErrorReporter::handleThreadAssert(const char* message,
BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s",
file, line, message);
NdbShutdown(NST_ErrorHandler);
NdbShutdown(s_errorHandlerShutdownType);
}//ErrorReporter::handleThreadAssert()
......@@ -201,6 +209,8 @@ ErrorReporter::handleError(ErrorCategory type, int messageID,
if(messageID == ERR_ERROR_INSERT){
NdbShutdown(NST_ErrorInsert);
} else {
if (nst == NST_ErrorHandler)
nst = s_errorHandlerShutdownType;
NdbShutdown(nst);
}
}
......
......@@ -26,6 +26,7 @@
class ErrorReporter
{
public:
static void setErrorHandlerShutdownType(NdbShutdownType nst = NST_ErrorHandler);
static void handleAssert(const char* message,
const char* file,
int line);
......@@ -57,6 +58,7 @@ public:
static const char* formatTimeStampString();
private:
static enum NdbShutdownType s_errorHandlerShutdownType;
};
#endif
......@@ -45,8 +45,14 @@ extern NdbMutex * theShutdownMutex;
void catchsigs(bool ignore); // for process signal handling
#define MAX_FAILED_STARTUPS 3
// Flag set by child through SIGUSR1 to signal a failed startup
static bool failed_startup_flag = false;
// Counter for consecutive failed startups
static Uint32 failed_startups = 0;
extern "C" void handler_shutdown(int signum); // for process signal handling
extern "C" void handler_error(int signum); // for process signal handling
extern "C" void handler_sigusr1(int signum); // child signalling failed restart
// Shows system information
void systemInfo(const Configuration & conf,
......@@ -92,6 +98,8 @@ int main(int argc, char** argv)
}
#ifndef NDB_WIN32
signal(SIGUSR1, handler_sigusr1);
for(pid_t child = fork(); child != 0; child = fork()){
/**
* Parent
......@@ -137,6 +145,20 @@ int main(int argc, char** argv)
*/
exit(0);
}
if (!failed_startup_flag)
{
// Reset the counter for consecutive failed startups
failed_startups = 0;
}
else if (failed_startups >= MAX_FAILED_STARTUPS && !theConfig->stopOnError())
{
/**
* Error shutdown && stopOnError()
*/
g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting", failed_startups);
exit(0);
}
failed_startup_flag = false;
g_eventLogger.info("Ndb has terminated (pid %d) restarting", child);
theConfig->fetch_configuration();
}
......@@ -170,6 +192,9 @@ int main(int argc, char** argv)
/**
* Do startup
*/
ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup);
switch(globalData.theRestartFlag){
case initial_state:
globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
......@@ -359,3 +384,15 @@ handler_error(int signum){
BaseString::snprintf(errorData, 40, "Signal %d received", signum);
ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__);
}
extern "C"
void
handler_sigusr1(int signum)
{
if (!failed_startup_flag)
{
failed_startups++;
failed_startup_flag = true;
}
g_eventLogger.info("Received signal %d. Ndbd failed startup (%u).", signum, failed_startups);
}
......@@ -154,6 +154,9 @@ NdbShutdown(NdbShutdownType type,
case NST_ErrorHandlerSignal:
g_eventLogger.info("Error handler signal %s system", shutting);
break;
case NST_ErrorHandlerStartup:
g_eventLogger.info("Error handler startup %s system", shutting);
break;
case NST_Restart:
g_eventLogger.info("Restarting system");
break;
......@@ -229,6 +232,9 @@ NdbShutdown(NdbShutdownType type,
}
if(type != NST_Normal && type != NST_Restart){
// Signal parent that error occured during startup
if (type == NST_ErrorHandlerStartup)
kill(getppid(), SIGUSR1);
g_eventLogger.info("Error handler shutdown completed - %s", exitAbort);
#if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
signal(6, SIG_DFL);
......
......@@ -83,7 +83,8 @@ enum NdbShutdownType {
NST_ErrorHandler,
NST_ErrorHandlerSignal,
NST_Restart,
NST_ErrorInsert
NST_ErrorInsert,
NST_ErrorHandlerStartup
};
enum NdbRestartType {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment