Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail

- adoptions to handle binlog
parent d4c739fd
......@@ -3,9 +3,9 @@ create table t1 (a int key) engine=ndbcluster;
begin;
insert into t1 values (1);
insert into t1 values (2);
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from ndbcluster
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
commit;
ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
ERROR HY000: Got error 4350 'Transaction already aborted' from NDBCLUSTER
drop table t1;
create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
......@@ -14,6 +14,8 @@ a b
1 1
2 1
3 1
show tables like 't2';
Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
......@@ -22,14 +24,12 @@ a
2
3
select * from t2 order by a limit 3;
ERROR HY000: Can't lock file (errno: 241)
select * from t2 order by a limit 3;
a
1
2
3
show tables;
Tables_in_test
show tables like 't2';
Tables_in_test (t2)
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
......
......@@ -36,23 +36,26 @@ select * from t2 order by a limit 3;
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--sleep 3
--connection server2
show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
# server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
--connection server1
--error 1015
select * from t2 order by a limit 3;
select * from t2 order by a limit 3;
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
# to ensure mysqld has connected again, and recreated system tables
--sleep 3
--connection server1
show tables;
show tables like 't2';
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
......
......@@ -1775,6 +1775,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
// skip
break;
case NDBEVENT::TE_CLUSTER_FAILURE:
if (ndb_extra_logging)
sql_print_information("NDB Binlog: cluster failure for %s.", schema_share->key);
// fall through
case NDBEVENT::TE_DROP:
if (ndb_extra_logging &&
......@@ -1784,6 +1786,7 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
free_share(&schema_share);
schema_share= 0;
ndb_binlog_tables_inited= FALSE;
close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
// fall through
case NDBEVENT::TE_ALTER:
ndb_handle_schema_change(thd, ndb, pOp, tmp_share);
......@@ -2101,7 +2104,14 @@ add_binlog_index_err:
Functions for start, stop, wait for ndbcluster binlog thread
*********************************************************************/
static int do_ndbcluster_binlog_close_connection= 0;
enum Binlog_thread_state
{
BCCC_running= 0,
BCCC_exit= 1,
BCCC_restart= 2
};
static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart;
int ndbcluster_binlog_start()
{
......@@ -2139,7 +2149,7 @@ static void ndbcluster_binlog_close_connection(THD *thd)
DBUG_ENTER("ndbcluster_binlog_close_connection");
const char *save_info= thd->proc_info;
thd->proc_info= "ndbcluster_binlog_close_connection";
do_ndbcluster_binlog_close_connection= 1;
do_ndbcluster_binlog_close_connection= BCCC_exit;
while (ndb_binlog_thread_running > 0)
sleep(1);
thd->proc_info= save_info;
......@@ -3296,10 +3306,48 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
thd->db= db;
}
for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); )
restart:
{
// wait for the first event
thd->proc_info= "Waiting for first event from ndbcluster";
DBUG_PRINT("info", ("Waiting for the first event"));
int schema_res= 0;
Uint64 schema_gci= 0;
while (schema_res == 0 && !abort_loop)
{
schema_res= s_ndb->pollEvents(100, &schema_gci);
}
// now check that we have epochs consistant with what we had before the restart
if (schema_res > 0)
{
if (schema_gci < ndb_latest_handled_binlog_epoch)
{
sql_print_error("NDB Binlog: cluster has been restarted --initial or with older filesystem. "
"ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
"RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch.",
(unsigned) ndb_latest_handled_binlog_epoch, (unsigned) schema_gci);
g_latest_trans_gci= 0;
ndb_latest_handled_binlog_epoch= 0;
ndb_latest_applied_binlog_epoch= 0;
ndb_latest_received_binlog_epoch= 0;
}
}
}
do_ndbcluster_binlog_close_connection= BCCC_running;
for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
do_ndbcluster_binlog_close_connection != BCCC_restart; )
{
#ifndef DBUG_OFF
if (do_ndbcluster_binlog_close_connection)
{
DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, "
"ndb_latest_handled_binlog_epoch: %llu, "
"g_latest_trans_gci: %llu", do_ndbcluster_binlog_close_connection,
ndb_latest_handled_binlog_epoch, g_latest_trans_gci));
}
#endif
#ifdef RUN_NDB_BINLOG_TIMER
main_timer.stop();
sql_print_information("main_timer %ld ms", main_timer.elapsed_ms());
......@@ -3324,7 +3372,13 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
ndb_latest_received_binlog_epoch= gci;
while (gci > schema_gci && schema_res >= 0)
{
static char buf[64];
thd->proc_info= "Waiting for schema epoch";
my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned) schema_gci, (unsigned) gci);
thd->proc_info= buf;
schema_res= s_ndb->pollEvents(10, &schema_gci);
}
if ((abort_loop || do_ndbcluster_binlog_close_connection) &&
(ndb_latest_handled_binlog_epoch >= g_latest_trans_gci ||
......@@ -3360,10 +3414,31 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
while (pOp != NULL)
{
if (!pOp->hasError())
{
ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp,
&post_epoch_log_list,
&post_epoch_unlock_list,
&mem_root);
DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
"<empty>"));
DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
"<empty>"));
if (i_ndb->getEventOperation() == NULL &&
s_ndb->getEventOperation() == NULL &&
do_ndbcluster_binlog_close_connection == BCCC_running)
{
DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
do_ndbcluster_binlog_close_connection= BCCC_restart;
if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
{
sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
"as latest received epoch is %lld",
g_latest_trans_gci, ndb_latest_received_binlog_epoch);
}
}
}
else
sql_print_error("NDB: error %lu (%s) on handling "
"binlog schema event",
......@@ -3532,6 +3607,25 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row);
// reset to catch errors
i_ndb->setDatabaseName("");
DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
"<empty>"));
DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
"<empty>"));
if (i_ndb->getEventOperation() == NULL &&
s_ndb->getEventOperation() == NULL &&
do_ndbcluster_binlog_close_connection == BCCC_running)
{
DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
do_ndbcluster_binlog_close_connection= BCCC_restart;
if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
{
sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
"as latest received epoch is %lld",
g_latest_trans_gci, ndb_latest_received_binlog_epoch);
}
}
}
pOp= i_ndb->nextEvent();
......@@ -3587,6 +3681,8 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
*root_ptr= old_root;
ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
}
if (do_ndbcluster_binlog_close_connection != BCCC_exit)
goto restart;
err:
DBUG_PRINT("info",("Shutting down cluster binlog thread"));
thd->proc_info= "Shutting down";
......
......@@ -71,6 +71,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
m_connect_count = 0;
m_cluster_state = CS_waiting_for_clean_cache;
DBUG_VOID_RETURN;
}
......@@ -175,6 +176,16 @@ ClusterMgr::threadMain( ){
int send_heartbeat_now= global_flag_send_heartbeat_now;
global_flag_send_heartbeat_now= 0;
if (m_cluster_state == CS_waiting_for_clean_cache)
{
theFacade.m_globalDictCache.lock();
unsigned sz= theFacade.m_globalDictCache.get_size();
theFacade.m_globalDictCache.unlock();
if (sz)
goto next;
m_cluster_state = CS_waiting_for_first_connect;
}
theFacade.lock_mutex();
for (int i = 1; i < MAX_NODES; i++){
/**
......@@ -223,6 +234,7 @@ ClusterMgr::threadMain( ){
*/
theFacade.unlock_mutex();
next:
// Sleep for 100 ms between each Registration Heartbeat
Uint64 before = now;
NdbSleep_MilliSleep(100);
......@@ -450,6 +462,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
theFacade.m_globalDictCache.invalidate_all();
theFacade.m_globalDictCache.unlock();
m_connect_count ++;
m_cluster_state = CS_waiting_for_clean_cache;
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
......
......@@ -57,6 +57,11 @@ private:
class TransporterFacade & theFacade;
public:
enum Cluster_state {
CS_waiting_for_clean_cache = 0,
CS_waiting_for_first_connect,
CS_connected
};
struct Node {
Node();
bool defined;
......@@ -86,7 +91,7 @@ private:
Uint32 noOfConnectedNodes;
Node theNodes[MAX_NODES];
NdbThread* theClusterMgrThread;
enum Cluster_state m_cluster_state;
/**
* Used for controlling start/stop of the thread
*/
......
......@@ -1287,6 +1287,7 @@ NdbEventOperation* Ndb::createEventOperation(const char* eventName)
int Ndb::dropEventOperation(NdbEventOperation* tOp)
{
DBUG_ENTER("Ndb::dropEventOperation");
DBUG_PRINT("info", ("name: %s", tOp->getEvent()->getTable()->getName()));
// remove it from list
NdbEventOperationImpl *op=
NdbEventBuffer::getEventOperationImpl(tOp);
......@@ -1297,6 +1298,8 @@ int Ndb::dropEventOperation(NdbEventOperation* tOp)
else
theImpl->m_ev_op= op->m_next;
DBUG_PRINT("info", ("first: %s",
theImpl->m_ev_op ? theImpl->m_ev_op->getEvent()->getTable()->getName() : "<empty>"));
assert(theImpl->m_ev_op == 0 || theImpl->m_ev_op->m_prev == 0);
theEventBuffer->dropEventOperation(tOp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment