Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail

- adoptions to handle binlog

Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail
- adoptions to handle binlog
c5987ab6 · tomas@poseidon.ndb.mysql.com · d4c739fd · c5987ab6 · c5987ab6 · c5987ab6
Commit c5987ab6 authored May 16, 2006 by tomas@poseidon.ndb.mysql.com
6 changed files
--- a/mysql-test/r/ndb_autodiscover3.result
+++ b/mysql-test/r/ndb_autodiscover3.result
@@ -3,9 +3,9 @@ create table t1 (a int key) engine=ndbcluster;
 begin;
 insert into t1 values (1);
 insert into t1 values (2);
-ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from ndbcluster
+ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from NDBCLUSTER
 commit;
-ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
+ERROR HY000: Got error 4350 'Transaction already aborted' from NDBCLUSTER
 drop table t1;
 create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
 insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
@@ -14,6 +14,8 @@ a	b
 1	1
 2	1
 3	1
+show tables like 't2';
+Tables_in_test (t2)
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;
@@ -22,14 +24,12 @@ a
 2
 3
 select * from t2 order by a limit 3;
-ERROR HY000: Can't lock file (errno: 241)
-select * from t2 order by a limit 3;
 a
 1
 2
 3
-show tables;
-Tables_in_test
+show tables like 't2';
+Tables_in_test (t2)
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;

--- a/mysql-test/t/ndb_autodiscover3.test
+++ b/mysql-test/t/ndb_autodiscover3.test
@@ -36,23 +36,26 @@ select * from t2 order by a limit 3;

 --exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
 --exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3

 --connection server2
+show tables like 't2';
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;

 # server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
 --connection server1
--error 1015
-select * from t2 order by a limit 3;
 select * from t2 order by a limit 3;

 --exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
 --exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
+# to ensure mysqld has connected again, and recreated system tables
+--sleep 3

 --connection server1
-show tables;
+show tables like 't2';
 create table t2 (a int key) engine=ndbcluster;
 insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
 select * from t2 order by a limit 3;

--- a/sql/ha_ndbcluster_binlog.cc
+++ b/sql/ha_ndbcluster_binlog.cc
@@ -1775,6 +1775,8 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
      // skip
      break;
    case NDBEVENT::TE_CLUSTER_FAILURE:
+      if (ndb_extra_logging)
+        sql_print_information("NDB Binlog: cluster failure for %s.", schema_share->key);
      // fall through
    case NDBEVENT::TE_DROP:
      if (ndb_extra_logging &&
@@ -1784,6 +1786,7 @@ ndb_binlog_thread_handle_schema_event(THD *thd, Ndb *ndb,
      free_share(&schema_share);
      schema_share= 0;
      ndb_binlog_tables_inited= FALSE;
+      close_cached_tables((THD*) 0, 0, (TABLE_LIST*) 0, FALSE);
      // fall through
    case NDBEVENT::TE_ALTER:
      ndb_handle_schema_change(thd, ndb, pOp, tmp_share);
@@ -2101,7 +2104,14 @@ add_binlog_index_err:
  Functions for start, stop, wait for ndbcluster binlog thread
 *********************************************************************/

-static int do_ndbcluster_binlog_close_connection= 0;
+enum Binlog_thread_state
+{
+  BCCC_running= 0,
+  BCCC_exit= 1,
+  BCCC_restart= 2
+};
+
+static enum Binlog_thread_state do_ndbcluster_binlog_close_connection= BCCC_restart;

 int ndbcluster_binlog_start()
 {
@@ -2139,7 +2149,7 @@ static void ndbcluster_binlog_close_connection(THD *thd)
  DBUG_ENTER("ndbcluster_binlog_close_connection");
  const char *save_info= thd->proc_info;
  thd->proc_info= "ndbcluster_binlog_close_connection";
-  do_ndbcluster_binlog_close_connection= 1;
+  do_ndbcluster_binlog_close_connection= BCCC_exit;
  while (ndb_binlog_thread_running > 0)
    sleep(1);
  thd->proc_info= save_info;
@@ -3296,10 +3306,48 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    thd->db= db;
  }

-  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
-            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci); )
+restart:
  {
+    // wait for the first event
+    thd->proc_info= "Waiting for first event from ndbcluster";
+    DBUG_PRINT("info", ("Waiting for the first event"));
+    int schema_res= 0;
+    Uint64 schema_gci= 0;
+    while (schema_res == 0 && !abort_loop)
+    {
+      schema_res= s_ndb->pollEvents(100, &schema_gci);
+    }
+    // now check that we have epochs consistant with what we had before the restart
+    if (schema_res > 0)
+    {
+      if (schema_gci < ndb_latest_handled_binlog_epoch)
+      {
+        sql_print_error("NDB Binlog: cluster has been restarted --initial or with older filesystem. "
+                        "ndb_latest_handled_binlog_epoch: %u, while current epoch: %u. "
+                        "RESET MASTER should be issued. Resetting ndb_latest_handled_binlog_epoch.",
+                        (unsigned) ndb_latest_handled_binlog_epoch, (unsigned) schema_gci);
+        g_latest_trans_gci= 0;
+        ndb_latest_handled_binlog_epoch= 0;
+        ndb_latest_applied_binlog_epoch= 0;
+        ndb_latest_received_binlog_epoch= 0;
+      }
+    }
+  }

+  do_ndbcluster_binlog_close_connection= BCCC_running;
+  for ( ; !((abort_loop || do_ndbcluster_binlog_close_connection) &&
+            ndb_latest_handled_binlog_epoch >= g_latest_trans_gci) &&
+          do_ndbcluster_binlog_close_connection != BCCC_restart; )
+  {
+#ifndef DBUG_OFF
+    if (do_ndbcluster_binlog_close_connection)
+    {
+      DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection: %d, "
+                          "ndb_latest_handled_binlog_epoch: %llu, "
+                          "g_latest_trans_gci: %llu", do_ndbcluster_binlog_close_connection,
+                          ndb_latest_handled_binlog_epoch, g_latest_trans_gci));
+    }
+#endif
 #ifdef RUN_NDB_BINLOG_TIMER
    main_timer.stop();
    sql_print_information("main_timer %ld ms",  main_timer.elapsed_ms());
@@ -3324,7 +3372,13 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    ndb_latest_received_binlog_epoch= gci;

    while (gci > schema_gci && schema_res >= 0)
+    {
+      static char buf[64];
+      thd->proc_info= "Waiting for schema epoch";
+      my_snprintf(buf, sizeof(buf), "%s %u(%u)", thd->proc_info, (unsigned) schema_gci, (unsigned) gci);
+      thd->proc_info= buf;
      schema_res= s_ndb->pollEvents(10, &schema_gci);
+    }

    if ((abort_loop || do_ndbcluster_binlog_close_connection) &&
        (ndb_latest_handled_binlog_epoch >= g_latest_trans_gci ||
@@ -3360,10 +3414,31 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
      while (pOp != NULL)
      {
        if (!pOp->hasError())
+        {
          ndb_binlog_thread_handle_schema_event(thd, s_ndb, pOp,
                                                &post_epoch_log_list,
                                                &post_epoch_unlock_list,
                                                &mem_root);
+          DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+                              s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                              "<empty>"));
+          DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+                              i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                              "<empty>"));
+          if (i_ndb->getEventOperation() == NULL &&
+              s_ndb->getEventOperation() == NULL &&
+              do_ndbcluster_binlog_close_connection == BCCC_running)
+          {
+            DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
+            do_ndbcluster_binlog_close_connection= BCCC_restart;
+            if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
+            {
+              sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
+                              "as latest received epoch is %lld",
+                              g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+            }
+          }
+        }
        else
          sql_print_error("NDB: error %lu (%s) on handling "
                          "binlog schema event",
@@ -3532,6 +3607,25 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
            ndb_binlog_thread_handle_non_data_event(thd, i_ndb, pOp, row);
            // reset to catch errors
            i_ndb->setDatabaseName("");
+            DBUG_PRINT("info", ("s_ndb first: %s", s_ndb->getEventOperation() ?
+                                s_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                                "<empty>"));
+            DBUG_PRINT("info", ("i_ndb first: %s", i_ndb->getEventOperation() ?
+                                i_ndb->getEventOperation()->getEvent()->getTable()->getName() :
+                                "<empty>"));
+            if (i_ndb->getEventOperation() == NULL &&
+                s_ndb->getEventOperation() == NULL &&
+                do_ndbcluster_binlog_close_connection == BCCC_running)
+            {
+              DBUG_PRINT("info", ("do_ndbcluster_binlog_close_connection= BCCC_restart"));
+              do_ndbcluster_binlog_close_connection= BCCC_restart;
+              if (ndb_latest_received_binlog_epoch < g_latest_trans_gci && ndb_binlog_running)
+              {
+                sql_print_error("NDB Binlog: latest transaction in epoch %lld not in binlog "
+                                "as latest received epoch is %lld",
+                                g_latest_trans_gci, ndb_latest_received_binlog_epoch);
+              }
+            }
          }

          pOp= i_ndb->nextEvent();
@@ -3587,6 +3681,8 @@ pthread_handler_t ndb_binlog_thread_func(void *arg)
    *root_ptr= old_root;
    ndb_latest_handled_binlog_epoch= ndb_latest_received_binlog_epoch;
  }
+  if (do_ndbcluster_binlog_close_connection != BCCC_exit)
+    goto restart;
 err:
  DBUG_PRINT("info",("Shutting down cluster binlog thread"));
  thd->proc_info= "Shutting down";

--- a/storage/ndb/src/ndbapi/ClusterMgr.cpp
+++ b/storage/ndb/src/ndbapi/ClusterMgr.cpp
@@ -71,6 +71,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
  noOfConnectedNodes= 0;
  theClusterMgrThread= 0;
  m_connect_count = 0;
+  m_cluster_state = CS_waiting_for_clean_cache;
  DBUG_VOID_RETURN;
 }

@@ -175,6 +176,16 @@ ClusterMgr::threadMain( ){
    int send_heartbeat_now= global_flag_send_heartbeat_now;
    global_flag_send_heartbeat_now= 0;

+    if (m_cluster_state == CS_waiting_for_clean_cache)
+    {
+      theFacade.m_globalDictCache.lock();
+      unsigned sz= theFacade.m_globalDictCache.get_size();
+      theFacade.m_globalDictCache.unlock();
+      if (sz)
+        goto next;
+      m_cluster_state = CS_waiting_for_first_connect;
+    }
+
    theFacade.lock_mutex();
    for (int i = 1; i < MAX_NODES; i++){
      /**
@@ -223,6 +234,7 @@ ClusterMgr::threadMain( ){
     */
    theFacade.unlock_mutex();
    
+next:
    // Sleep for 100 ms between each Registration Heartbeat
    Uint64 before = now;
    NdbSleep_MilliSleep(100); 
@@ -450,6 +462,7 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
    theFacade.m_globalDictCache.invalidate_all();
    theFacade.m_globalDictCache.unlock();
    m_connect_count ++;
+    m_cluster_state = CS_waiting_for_clean_cache;
    NFCompleteRep rep;
    for(Uint32 i = 1; i<MAX_NODES; i++){
      if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){

--- a/storage/ndb/src/ndbapi/ClusterMgr.hpp
+++ b/storage/ndb/src/ndbapi/ClusterMgr.hpp
@@ -57,6 +57,11 @@ private:
  class TransporterFacade & theFacade;
  
 public:
+  enum Cluster_state {
+    CS_waiting_for_clean_cache = 0,
+    CS_waiting_for_first_connect,
+    CS_connected
+  };
  struct Node {
    Node();
    bool defined;
@@ -86,7 +91,7 @@ private:
  Uint32        noOfConnectedNodes;
  Node          theNodes[MAX_NODES];
  NdbThread*    theClusterMgrThread;
-  
+  enum Cluster_state m_cluster_state;
  /**
   * Used for controlling start/stop of the thread
   */

--- a/storage/ndb/src/ndbapi/Ndb.cpp
+++ b/storage/ndb/src/ndbapi/Ndb.cpp
@@ -1287,6 +1287,7 @@ NdbEventOperation* Ndb::createEventOperation(const char* eventName)
 int Ndb::dropEventOperation(NdbEventOperation* tOp)
 {
  DBUG_ENTER("Ndb::dropEventOperation");
+  DBUG_PRINT("info", ("name: %s", tOp->getEvent()->getTable()->getName()));
  // remove it from list
  NdbEventOperationImpl *op=
    NdbEventBuffer::getEventOperationImpl(tOp);
@@ -1297,6 +1298,8 @@ int Ndb::dropEventOperation(NdbEventOperation* tOp)
  else
    theImpl->m_ev_op= op->m_next;

+  DBUG_PRINT("info", ("first: %s",
+                      theImpl->m_ev_op ? theImpl->m_ev_op->getEvent()->getTable()->getName() : "<empty>"));
  assert(theImpl->m_ev_op == 0 || theImpl->m_ev_op->m_prev == 0);

  theEventBuffer->dropEventOperation(tOp);