Bug #19395 mysqld does not always detect cluster shutdown

parent d0fd5ecd
...@@ -182,6 +182,8 @@ static const char * ndb_connected_host= 0; ...@@ -182,6 +182,8 @@ static const char * ndb_connected_host= 0;
static long ndb_connected_port= 0; static long ndb_connected_port= 0;
static long ndb_number_of_replicas= 0; static long ndb_number_of_replicas= 0;
long ndb_number_of_storage_nodes= 0; long ndb_number_of_storage_nodes= 0;
long ndb_number_of_ready_storage_nodes= 0;
long ndb_connect_count= 0;
static int update_status_variables(Ndb_cluster_connection *c) static int update_status_variables(Ndb_cluster_connection *c)
{ {
...@@ -190,6 +192,8 @@ static int update_status_variables(Ndb_cluster_connection *c) ...@@ -190,6 +192,8 @@ static int update_status_variables(Ndb_cluster_connection *c)
ndb_connected_host= c->get_connected_host(); ndb_connected_host= c->get_connected_host();
ndb_number_of_replicas= 0; ndb_number_of_replicas= 0;
ndb_number_of_storage_nodes= c->no_db_nodes(); ndb_number_of_storage_nodes= c->no_db_nodes();
ndb_number_of_ready_storage_nodes= c->get_no_ready();
ndb_connect_count= c->get_connect_count();
return 0; return 0;
} }
...@@ -9361,11 +9365,15 @@ ndbcluster_show_status(THD* thd, stat_print_fn *stat_print, ...@@ -9361,11 +9365,15 @@ ndbcluster_show_status(THD* thd, stat_print_fn *stat_print,
"cluster_node_id=%u, " "cluster_node_id=%u, "
"connected_host=%s, " "connected_host=%s, "
"connected_port=%u, " "connected_port=%u, "
"number_of_storage_nodes=%u", "number_of_storage_nodes=%u, "
"number_of_ready_storage_nodes=%u, "
"connect_count=%u",
ndb_cluster_node_id, ndb_cluster_node_id,
ndb_connected_host, ndb_connected_host,
ndb_connected_port, ndb_connected_port,
ndb_number_of_storage_nodes); ndb_number_of_storage_nodes,
ndb_number_of_ready_storage_nodes,
ndb_connect_count);
if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name), if (stat_print(thd, ndbcluster_hton.name, strlen(ndbcluster_hton.name),
"connection", strlen("connection"), "connection", strlen("connection"),
buf, buflen)) buf, buflen))
......
...@@ -1776,7 +1776,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb, ...@@ -1776,7 +1776,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
break; break;
case NDBEVENT::TE_CLUSTER_FAILURE: case NDBEVENT::TE_CLUSTER_FAILURE:
if (ndb_extra_logging) if (ndb_extra_logging)
sql_print_information("NDB Binlog: cluster failure for %s.", schema_share->key); sql_print_information("NDB Binlog: cluster failure for %s at epoch %u.",
schema_share->key, (unsigned) pOp->getGCI());
// fall through // fall through
case NDBEVENT::TE_DROP: case NDBEVENT::TE_DROP:
if (ndb_extra_logging && if (ndb_extra_logging &&
...@@ -1785,7 +1786,6 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb, ...@@ -1785,7 +1786,6 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
"read only on reconnect."); "read only on reconnect.");
free_share(&schema_share); free_share(&schema_share);
schema_share= 0; schema_share= 0;
ndb_binlog_tables_inited= FALSE;
close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE); close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
// fall through // fall through
case NDBEVENT::TE_ALTER: case NDBEVENT::TE_ALTER:
...@@ -2829,7 +2829,8 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb, ...@@ -2829,7 +2829,8 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb,
{ {
case NDBEVENT::TE_CLUSTER_FAILURE: case NDBEVENT::TE_CLUSTER_FAILURE:
if (ndb_extra_logging) if (ndb_extra_logging)
sql_print_information("NDB Binlog: cluster failure for %s.", share->key); sql_print_information("NDB Binlog: cluster failure for %s at epoch %u.",
share->key, (unsigned) pOp->getGCI());
if (apply_status_share == share) if (apply_status_share == share)
{ {
if (ndb_extra_logging && if (ndb_extra_logging &&
...@@ -2838,7 +2839,6 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb, ...@@ -2838,7 +2839,6 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb,
"read only on reconnect."); "read only on reconnect.");
free_share(&apply_status_share); free_share(&apply_status_share);
apply_status_share= 0; apply_status_share= 0;
ndb_binlog_tables_inited= FALSE;
} }
DBUG_PRINT("info", ("CLUSTER FAILURE EVENT: " DBUG_PRINT("info", ("CLUSTER FAILURE EVENT: "
"%s received share: 0x%lx op: %lx share op: %lx " "%s received share: 0x%lx op: %lx share op: %lx "
...@@ -2854,7 +2854,6 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb, ...@@ -2854,7 +2854,6 @@ ndb_binlog_thread_handle_non_data_event(THD *thd, Ndb *ndb,
"read only on reconnect."); "read only on reconnect.");
free_share(&apply_status_share); free_share(&apply_status_share);
apply_status_share= 0; apply_status_share= 0;
ndb_binlog_tables_inited= FALSE;
} }
/* ToDo: remove printout */ /* ToDo: remove printout */
if (ndb_extra_logging) if (ndb_extra_logging)
...@@ -3267,24 +3266,6 @@ pthread_handler_t ndb_binlog_thread_func(void *arg) ...@@ -3267,24 +3266,6 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
pthread_mutex_unlock(&injector_mutex); pthread_mutex_unlock(&injector_mutex);
pthread_cond_signal(&injector_cond); pthread_cond_signal(&injector_cond);
thd->proc_info= "Waiting for ndbcluster to start";
pthread_mutex_lock(&injector_mutex);
while (!schema_share ||
(ndb_binlog_running && !apply_status_share))
{
/* ndb not connected yet */
struct timespec abstime;
set_timespec(abstime, 1);
pthread_cond_timedwait(&injector_cond, &injector_mutex, &abstime);
if (abort_loop)
{
pthread_mutex_unlock(&injector_mutex);
goto err;
}
}
pthread_mutex_unlock(&injector_mutex);
/* /*
Main NDB Injector loop Main NDB Injector loop
*/ */
...@@ -3298,15 +3279,28 @@ pthread_handler_t ndb_binlog_thread_func(void *arg) ...@@ -3298,15 +3279,28 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
set_thd_ndb(thd, thd_ndb); set_thd_ndb(thd, thd_ndb);
thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP; thd_ndb->options|= TNO_NO_LOG_SCHEMA_OP;
thd->query_id= 0; // to keep valgrind quiet thd->query_id= 0; // to keep valgrind quiet
restart:
{ {
static char db[]= ""; thd->proc_info= "Waiting for ndbcluster to start";
thd->db= db;
if (ndb_binlog_running) pthread_mutex_lock(&injector_mutex);
open_binlog_index(thd, &binlog_tables, &binlog_index); while (!schema_share ||
thd->db= db; (ndb_binlog_running && !apply_status_share))
{
/* ndb not connected yet */
struct timespec abstime;
set_timespec(abstime, 1);
pthread_cond_timedwait(&injector_cond, &injector_mutex, &abstime);
if (abort_loop)
{
pthread_mutex_unlock(&injector_mutex);
goto err;
}
}
pthread_mutex_unlock(&injector_mutex);
} }
restart:
{ {
// wait for the first event // wait for the first event
thd->proc_info= "Waiting for first event from ndbcluster"; thd->proc_info= "Waiting for first event from ndbcluster";
...@@ -3337,7 +3331,13 @@ restart: ...@@ -3337,7 +3331,13 @@ restart:
} }
} }
} }
{
static char db[]= "";
thd->db= db;
if (ndb_binlog_running)
open_binlog_index(thd, &binlog_tables, &binlog_index);
thd->db= db;
}
do_ndbcluster_binlog_close_connection= BCCC_running; do_ndbcluster_binlog_close_connection= BCCC_running;
for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) && for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) && ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
...@@ -3686,7 +3686,12 @@ restart: ...@@ -3686,7 +3686,12 @@ restart:
ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch; ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
} }
if (do_ndbcluster_binlog_close_connection == BCCC_restart) if (do_ndbcluster_binlog_close_connection == BCCC_restart)
{
ndb_binlog_tables_inited= FALSE;
close_thread_tables(thd);
binlog_index= 0;
goto restart; goto restart;
}
err: err:
DBUG_PRINT("info",("Shutting down cluster binlog thread")); DBUG_PRINT("info",("Shutting down cluster binlog thread"));
thd->proc_info= "Shutting down"; thd->proc_info= "Shutting down";
......
...@@ -1107,9 +1107,10 @@ NdbEventBuffer::flushIncompleteEvents(Uint64 gci) ...@@ -1107,9 +1107,10 @@ NdbEventBuffer::flushIncompleteEvents(Uint64 gci)
for(i = 0; i < sz; i++) for(i = 0; i < sz; i++)
{ {
Gci_container* tmp = array + i; Gci_container* tmp = array + i;
if (tmp->m_gci < gci) if (tmp->m_gci && tmp->m_gci < gci)
{ {
// we have found an old not-completed gci, remove it // we have found an old not-completed gci, remove it
ndbout_c("ndb: flushing incomplete epoch %lld (<%lld)", tmp->m_gci, gci);
if(!tmp->m_data.is_empty()) if(!tmp->m_data.is_empty())
{ {
free_list(tmp->m_data); free_list(tmp->m_data);
...@@ -1257,7 +1258,6 @@ NdbEventBuffer::deleteUsedEventOperations() ...@@ -1257,7 +1258,6 @@ NdbEventBuffer::deleteUsedEventOperations()
op->m_prev->m_next = op->m_next; op->m_prev->m_next = op->m_next;
else else
m_dropped_ev_op = op->m_next; m_dropped_ev_op = op->m_next;
ndbout_c("deleting NdbEventOperation %p", op->m_facade);
delete op->m_facade; delete op->m_facade;
} }
} }
...@@ -2506,7 +2506,6 @@ NdbEventBuffer::dropEventOperation(NdbEventOperation* tOp) ...@@ -2506,7 +2506,6 @@ NdbEventBuffer::dropEventOperation(NdbEventOperation* tOp)
{ {
DBUG_PRINT("info", ("deleting op: %p", op)); DBUG_PRINT("info", ("deleting op: %p", op));
DBUG_ASSERT(op->m_node_bit_mask.isclear()); DBUG_ASSERT(op->m_node_bit_mask.isclear());
ndbout_c("deleting NdbEventOperation %p", op->m_facade);
delete op->m_facade; delete op->m_facade;
} }
else else
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <UtilBuffer.hpp> #include <UtilBuffer.hpp>
#define NDB_EVENT_OP_MAGIC_NUMBER 0xA9F301B4 #define NDB_EVENT_OP_MAGIC_NUMBER 0xA9F301B4
#define EVENT_DEBUG //#define EVENT_DEBUG
#ifdef EVENT_DEBUG #ifdef EVENT_DEBUG
#define DBUG_ENTER_EVENT(A) DBUG_ENTER(A) #define DBUG_ENTER_EVENT(A) DBUG_ENTER(A)
#define DBUG_RETURN_EVENT(A) DBUG_RETURN(A) #define DBUG_RETURN_EVENT(A) DBUG_RETURN(A)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment