Commit 7949ffc8 authored by unknown's avatar unknown

InnoDB: tolerate system clock glitches a little better

in the error monitor thread.  (Bug #5898)


innobase/include/sync0arr.h:
  sync_array_print_long_waits(): return error status
innobase/srv/srv0srv.c:
  srv_error_monitor_thread(): Keep track on successive fatal timeouts,
  and crash the server only if the timeouts have been exceeded for
  several times in succession.
innobase/sync/sync0arr.c:
  sync_array_print_long_waits(): return error status
parent 1601646a
...@@ -97,9 +97,11 @@ sync_arr_wake_threads_if_sema_free(void); ...@@ -97,9 +97,11 @@ sync_arr_wake_threads_if_sema_free(void);
/************************************************************************** /**************************************************************************
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
void ibool
sync_array_print_long_waits(void); sync_array_print_long_waits(void);
/*=============================*/ /*=============================*/
/* out: TRUE if fatal semaphore wait threshold
was exceeded */
/************************************************************************ /************************************************************************
Validates the integrity of the wait array. Checks Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */ that the number of reserved cells equals the count variable. */
......
...@@ -1754,7 +1754,8 @@ srv_error_monitor_thread( ...@@ -1754,7 +1754,8 @@ srv_error_monitor_thread(
/* in: a dummy parameter required by /* in: a dummy parameter required by
os_thread_create */ os_thread_create */
{ {
ulint cnt = 0; /* number of successive fatal timeouts observed */
ulint fatal_cnt = 0;
#ifdef UNIV_DEBUG_THREAD_CREATION #ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Error monitor thread starts, id %lu\n", fprintf(stderr, "Error monitor thread starts, id %lu\n",
...@@ -1763,8 +1764,6 @@ srv_error_monitor_thread( ...@@ -1763,8 +1764,6 @@ srv_error_monitor_thread(
loop: loop:
srv_error_monitor_active = TRUE; srv_error_monitor_active = TRUE;
cnt++;
os_thread_sleep(2000000); os_thread_sleep(2000000);
if (difftime(time(NULL), srv_last_monitor_time) > 60) { if (difftime(time(NULL), srv_last_monitor_time) > 60) {
...@@ -1774,7 +1773,20 @@ loop: ...@@ -1774,7 +1773,20 @@ loop:
srv_refresh_innodb_monitor_stats(); srv_refresh_innodb_monitor_stats();
} }
sync_array_print_long_waits(); if (sync_array_print_long_waits()) {
fatal_cnt++;
if (fatal_cnt > 5) {
fprintf(stderr,
"InnoDB: Error: semaphore wait has lasted > %lu seconds\n"
"InnoDB: We intentionally crash the server, because it appears to be hung.\n",
srv_fatal_semaphore_wait_threshold);
ut_error;
}
} else {
fatal_cnt = 0;
}
/* Flush stderr so that a database user gets the output /* Flush stderr so that a database user gets the output
to possible MySQL error file */ to possible MySQL error file */
......
...@@ -889,15 +889,18 @@ sync_arr_wake_threads_if_sema_free(void) ...@@ -889,15 +889,18 @@ sync_arr_wake_threads_if_sema_free(void)
/************************************************************************** /**************************************************************************
Prints warnings of long semaphore waits to stderr. */ Prints warnings of long semaphore waits to stderr. */
void ibool
sync_array_print_long_waits(void) sync_array_print_long_waits(void)
/*=============================*/ /*=============================*/
/* out: TRUE if fatal semaphore wait threshold
was exceeded */
{ {
sync_cell_t* cell; sync_cell_t* cell;
ibool old_val; ibool old_val;
ibool noticed = FALSE; ibool noticed = FALSE;
ulint i; ulint i;
ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
ibool fatal = FALSE;
for (i = 0; i < sync_primary_wait_array->n_cells; i++) { for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
...@@ -914,13 +917,7 @@ sync_array_print_long_waits(void) ...@@ -914,13 +917,7 @@ sync_array_print_long_waits(void)
if (cell->wait_object != NULL if (cell->wait_object != NULL
&& difftime(time(NULL), cell->reservation_time) && difftime(time(NULL), cell->reservation_time)
> fatal_timeout) { > fatal_timeout) {
fatal = TRUE;
fprintf(stderr,
"InnoDB: Error: semaphore wait has lasted > %lu seconds\n"
"InnoDB: We intentionally crash the server, because it appears to be hung.\n",
fatal_timeout);
ut_error;
} }
} }
...@@ -948,6 +945,8 @@ sync_array_print_long_waits(void) ...@@ -948,6 +945,8 @@ sync_array_print_long_waits(void)
fprintf(stderr, fprintf(stderr,
"InnoDB: ###### Diagnostic info printed to the standard error stream\n"); "InnoDB: ###### Diagnostic info printed to the standard error stream\n");
} }
return(fatal);
} }
/************************************************************************** /**************************************************************************
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment