Commit f91b91cf authored by unknown's avatar unknown

Bug #22379 im_daemon_life_cycle.test fails on merge of 5.1 -> 5.1-engines

Remove race situations that occur when removing pidfiles. Primarily each process should remove its own
pidfile, secondly it should be removed by the process that created it and _only_ if it's
certain the process is dead. Third, mysql-test-run.pl will remove the pidfile when process has been killed.
- Set state of an instance to STARTING _before_ calling instance->start()
- Check that pidfile of instance has been created before changing STARTING => STARTED
- Only remove the pidfile if IM kills an instance with SIGKILL, otherwise the instance will remove it itself


server-tools/instance-manager/guardian.cc:
  If state of an instance is STARTING, chech that the instance pidfile has been created
  before changing state to STARTED
  Set state to STARTING before calling instance->start(), it can take some time
  before it is fully started and during that time it should be in state STARTING
server-tools/instance-manager/instance.cc:
  Only remove the pid file of instance manager when a SIGKILL has 
  been performed sucessfully
server-tools/instance-manager/instance_options.cc:
  Check that fscanf returns 1 which is the number of args that should be scanned from
  the pid file
parent 14bebaa2
...@@ -109,21 +109,36 @@ void Guardian_thread::process_instance(Instance *instance, ...@@ -109,21 +109,36 @@ void Guardian_thread::process_instance(Instance *instance,
} }
if (instance->is_running()) if (instance->is_running())
{
/* The instance can be contacted on it's port */
/* If STARTING also check that pidfile has been created */
if (current_node->state == STARTING &&
current_node->instance->options.get_pid() == 0)
{
/* Pid file not created yet, don't go to STARTED state yet */
}
else
{ {
/* clear status fields */ /* clear status fields */
log_info("guardian: instance %s is running, set state to STARTED",
instance->options.instance_name);
current_node->restart_counter= 0; current_node->restart_counter= 0;
current_node->crash_moment= 0; current_node->crash_moment= 0;
current_node->state= STARTED; current_node->state= STARTED;
} }
}
else else
{ {
switch (current_node->state) { switch (current_node->state) {
case NOT_STARTED: case NOT_STARTED:
instance->start();
current_node->last_checked= current_time;
log_info("guardian: starting instance %s", log_info("guardian: starting instance %s",
instance->options.instance_name); instance->options.instance_name);
/* NOTE, set state to STARTING _before_ start() is called */
current_node->state= STARTING; current_node->state= STARTING;
instance->start();
current_node->last_checked= current_time;
break; break;
case STARTED: /* fallthrough */ case STARTED: /* fallthrough */
case STARTING: /* let the instance start or crash */ case STARTING: /* let the instance start or crash */
......
...@@ -571,18 +571,19 @@ void Instance::kill_instance(int signum) ...@@ -571,18 +571,19 @@ void Instance::kill_instance(int signum)
/* if there are no pid, everything seems to be fine */ /* if there are no pid, everything seems to be fine */
if ((pid= options.get_pid()) != 0) /* get pid from pidfile */ if ((pid= options.get_pid()) != 0) /* get pid from pidfile */
{ {
/* if (kill(pid, signum) == 0)
If we cannot kill mysqld, then it has propably crashed. {
Let us try to remove staled pidfile and return successfully /* Kill suceeded */
as mysqld is probably stopped. if (signum == SIGKILL) /* really killed instance with SIGKILL */
*/ {
if (!kill(pid, signum)) log_error("The instance %s is being stopped forcibly. Normally" \
options.unlink_pidfile(); "it should not happen. Probably the instance has been" \
else if (signum == SIGKILL) /* really killed instance with SIGKILL */ "hanging. You should also check your IM setup",
log_error("The instance %s is being stopped forsibly. Normally \
it should not happed. Probably the instance has been \
hanging. You should also check your IM setup",
options.instance_name); options.instance_name);
/* After sucessful hard kill the pidfile need to be removed */
options.unlink_pidfile();
}
}
} }
return; return;
} }
......
...@@ -377,7 +377,8 @@ pid_t Instance_options::get_pid() ...@@ -377,7 +377,8 @@ pid_t Instance_options::get_pid()
{ {
pid_t pid; pid_t pid;
fscanf(pid_file_stream, "%i", &pid); if (fscanf(pid_file_stream, "%i", &pid) != 1)
pid= -1;
my_fclose(pid_file_stream, MYF(0)); my_fclose(pid_file_stream, MYF(0));
return pid; return pid;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment