Bug #16875 Using stale MySQLD FRM files can cause restored cluster to fail

- invalidate ndb dict cache on cluster disconnect (ClusterMgr.cpp)
- add check for correct frm on external lock when table cache is found invalid
parent 133a5d17
drop table if exists t1;
create table t1 (a int key) engine=ndbcluster;
begin;
insert into t1 values (1);
insert into t1 values (2);
ERROR HY000: Got temporary error 4025 'Node failure caused abort of transaction' from ndbcluster
commit;
ERROR HY000: Got error 4350 'Transaction already aborted' from ndbcluster
drop table t1;
-- source include/have_ndb.inc
-- source include/have_multi_ndb.inc
-- source include/not_embedded.inc
--disable_warnings
drop table if exists t1, t2;
--enable_warnings
#
# Transaction ongoing while cluster is restarted
#
--connection server1
create table t1 (a int key) engine=ndbcluster;
begin;
insert into t1 values (1);
--exec $NDB_MGM --no-defaults -e "all restart" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
--error 1297
insert into t1 values (2);
--error 1296
commit;
drop table t1;
#
# Stale cache after restart -i
#
--connection server1
create table t2 (a int, b int, primary key(a,b)) engine=ndbcluster;
insert into t2 values (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1);
select * from t2 order by a limit 3;
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
--connection server2
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
# server 1 should have a stale cache, and in this case wrong frm, transaction must be retried
--connection server1
--error 1015
select * from t2 order by a limit 3;
select * from t2 order by a limit 3;
--exec $NDB_MGM --no-defaults -e "all restart -i" >> $NDB_TOOLS_OUTPUT
--exec $NDB_TOOLS_DIR/ndb_waiter --no-defaults >> $NDB_TOOLS_OUTPUT
--connection server1
show tables;
create table t2 (a int key) engine=ndbcluster;
insert into t2 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from t2 order by a limit 3;
# server 2 should have a stale cache, but with right frm, transaction need not be retried
--connection server2
select * from t2 order by a limit 3;
drop table t2;
# End of 4.1 tests
......@@ -83,6 +83,7 @@ public:
void set_optimized_node_selection(int val);
unsigned no_db_nodes();
unsigned get_connect_count() const;
#endif
private:
......
......@@ -70,6 +70,7 @@ ClusterMgr::ClusterMgr(TransporterFacade & _facade):
noOfAliveNodes= 0;
noOfConnectedNodes= 0;
theClusterMgrThread= 0;
m_connect_count = 0;
DBUG_VOID_RETURN;
}
......@@ -456,6 +457,10 @@ ClusterMgr::reportNodeFailed(NodeId nodeId){
theNode.nfCompleteRep = false;
if(noOfAliveNodes == 0){
theFacade.m_globalDictCache.lock();
theFacade.m_globalDictCache.invalidate_all();
theFacade.m_globalDictCache.unlock();
m_connect_count ++;
NFCompleteRep rep;
for(Uint32 i = 1; i<MAX_NODES; i++){
if(theNodes[i].defined && theNodes[i].nfCompleteRep == false){
......
......@@ -78,6 +78,7 @@ public:
const Node & getNodeInfo(NodeId) const;
Uint32 getNoOfConnectedNodes() const;
Uint32 m_connect_count;
private:
Uint32 noOfAliveNodes;
......
......@@ -255,6 +255,42 @@ GlobalDictCache::drop(NdbTableImpl * tab)
abort();
}
unsigned
GlobalDictCache::get_size()
{
NdbElement_t<Vector<TableVersion> > * curr = m_tableHash.getNext(0);
int sz = 0;
while(curr != 0){
sz += curr->theData->size();
curr = m_tableHash.getNext(curr);
}
return sz;
}
void
GlobalDictCache::invalidate_all()
{
DBUG_ENTER("GlobalDictCache::invalidate_all");
NdbElement_t<Vector<TableVersion> > * curr = m_tableHash.getNext(0);
while(curr != 0){
Vector<TableVersion> * vers = curr->theData;
if (vers->size())
{
TableVersion * ver = & vers->back();
ver->m_impl->m_status = NdbDictionary::Object::Invalid;
ver->m_status = DROPPED;
if (ver->m_refCount == 0)
{
delete ver->m_impl;
vers->erase(vers->size() - 1);
}
}
curr = m_tableHash.getNext(curr);
}
DBUG_VOID_RETURN;
}
void
GlobalDictCache::release(NdbTableImpl * tab){
unsigned i;
......
......@@ -71,6 +71,9 @@ public:
void alter_table_rep(const char * name,
Uint32 tableId, Uint32 tableVersion, bool altered);
unsigned get_size();
void invalidate_all();
public:
enum Status {
OK = 0,
......
......@@ -264,6 +264,12 @@ TransporterFacade::unlock_mutex()
#include "ClusterMgr.hpp"
inline
unsigned Ndb_cluster_connection_impl::get_connect_count() const
{
return TransporterFacade::instance()->theClusterMgr->m_connect_count;
}
inline
bool
TransporterFacade::check_send_size(Uint32 node_id, Uint32 send_size)
......
......@@ -236,6 +236,12 @@ Ndb_cluster_connection::wait_until_ready(int timeout,
} while (1);
}
unsigned Ndb_cluster_connection::get_connect_count() const
{
return m_impl.get_connect_count();
}
/*
......
......@@ -49,6 +49,7 @@ class Ndb_cluster_connection_impl : public Ndb_cluster_connection
void init_get_next_node(Ndb_cluster_connection_node_iter &iter);
Uint32 get_next_node(Ndb_cluster_connection_node_iter &iter);
inline unsigned get_connect_count() const;
private:
friend class Ndb;
friend class NdbImpl;
......
......@@ -3306,8 +3306,23 @@ int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
m_table= (void *)tab;
m_table_version = tab->getObjectVersion();
if (!(my_errno= build_index_list(ndb, table, ILBP_OPEN)))
if ((my_errno= build_index_list(ndb, table, ILBP_OPEN)))
DBUG_RETURN(my_errno);
const void *data, *pack_data;
uint length, pack_length;
if (readfrm(table->path, &data, &length) ||
packfrm(data, length, &pack_data, &pack_length) ||
pack_length != tab->getFrmLength() ||
memcmp(pack_data, tab->getFrmData(), pack_length))
{
my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
NdbError err= ndb->getNdbError(NDB_INVALID_SCHEMA_OBJECT);
DBUG_RETURN(ndb_to_mysql_error(&err));
}
my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
}
m_table_info= tab_info;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment