Bug#30273 - merge tables: Can't lock file (errno: 155)

The patch for Bug 26379 (Combination of FLUSH TABLE and
REPAIR TABLE corrupts a MERGE table) fixed this bug too.
However it revealed a new bug that crashed the server.

Flushing a merge table at the moment when it is between open
and attach of children crashed the server.

The flushing thread wants to abort locks on the flushed table.
It calls ha_myisammrg::lock_count() and ha_myisammrg::store_lock()
on the TABLE object of the other thread.

Changed ha_myisammrg::lock_count() and ha_myisammrg::store_lock()
to accept non-attached children. ha_myisammrg::lock_count() returns
the number of MyISAM tables in the MERGE table so that the memory
allocation done by get_lock_data() is done correctly, even if the
children become attached before ha_myisammrg::store_lock() is
called. ha_myisammrg::store_lock() will not return any lock if the
children are not attached.

This is however a change in the handler interface. lock_count()
can now return a higher number than store_lock() stores locks.
This is more safe than the reverse implementation would be.
get_lock_data() in the SQL layer is adjusted accordingly. It sets
MYSQL_LOCK::lock_count based on the number of locks returned by
the handler::store_lock() calls, not based on the numbers returned
by the handler::lock_count() calls. The latter are only used for
allocation of memory now.

No test case. The test suite cannot reliably run FLUSH between
lock_count() and store_lock() of another thread. The bug report
contains a program that can repeat the problem with some
probability.
parent 413112fb
...@@ -74,6 +74,7 @@ typedef struct st_myrg_info ...@@ -74,6 +74,7 @@ typedef struct st_myrg_info
LIST open_list; LIST open_list;
QUEUE by_key; QUEUE by_key;
ulong *rec_per_key_part; /* for sql optimizing */ ulong *rec_per_key_part; /* for sql optimizing */
pthread_mutex_t mutex;
} MYRG_INFO; } MYRG_INFO;
......
...@@ -1637,13 +1637,22 @@ public: ...@@ -1637,13 +1637,22 @@ public:
virtual int repair_partitions(THD *thd) virtual int repair_partitions(THD *thd)
{ return HA_ERR_WRONG_COMMAND; } { return HA_ERR_WRONG_COMMAND; }
/* lock_count() can be more than one if the table is a MERGE */ /**
@note lock_count() can return > 1 if the table is MERGE or partitioned.
*/
virtual uint lock_count(void) const { return 1; } virtual uint lock_count(void) const { return 1; }
/** /**
Is not invoked for non-transactional temporary tables. Is not invoked for non-transactional temporary tables.
@note store_lock() can return more than one lock if the table is MERGE
or partitioned.
@note that one can NOT rely on table->in_use in store_lock(). It may @note that one can NOT rely on table->in_use in store_lock(). It may
refer to a different thread if called from mysql_lock_abort_for_thread(). refer to a different thread if called from mysql_lock_abort_for_thread().
@note If the table is MERGE, store_lock() can return less locks
than lock_count() claimed. This can happen when the MERGE children
are not attached when this is called from another thread.
*/ */
virtual THR_LOCK_DATA **store_lock(THD *thd, virtual THR_LOCK_DATA **store_lock(THD *thd,
THR_LOCK_DATA **to, THR_LOCK_DATA **to,
......
...@@ -847,9 +847,6 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, ...@@ -847,9 +847,6 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
locks= locks_buf= sql_lock->locks= (THR_LOCK_DATA**) (sql_lock + 1); locks= locks_buf= sql_lock->locks= (THR_LOCK_DATA**) (sql_lock + 1);
to= table_buf= sql_lock->table= (TABLE**) (locks + tables * 2); to= table_buf= sql_lock->table= (TABLE**) (locks + tables * 2);
sql_lock->table_count=lock_count; sql_lock->table_count=lock_count;
sql_lock->lock_count=tables;
DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d",
sql_lock->table_count, sql_lock->lock_count));
for (i=0 ; i < count ; i++) for (i=0 ; i < count ; i++)
{ {
...@@ -889,6 +886,23 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count, ...@@ -889,6 +886,23 @@ static MYSQL_LOCK *get_lock_data(THD *thd, TABLE **table_ptr, uint count,
for ( ; org_locks != locks ; org_locks++) for ( ; org_locks != locks ; org_locks++)
(*org_locks)->debug_print_param= (void *) table; (*org_locks)->debug_print_param= (void *) table;
} }
/*
We do not use 'tables', because there are cases where store_lock()
returns less locks than lock_count() claimed. This can happen when
a FLUSH TABLES tries to abort locks from a MERGE table of another
thread. When that thread has just opened the table, but not yet
attached its children, it cannot return the locks. lock_count()
always returns the number of locks that an attached table has.
This is done to avoid the reverse situation: If lock_count() would
return 0 for a non-attached MERGE table, and that table becomes
attached between the calls to lock_count() and store_lock(), then
we would have allocated too little memory for the lock data. Now
we may allocate too much, but better safe than memory overrun.
And in the FLUSH case, the memory is released quickly anyway.
*/
sql_lock->lock_count= locks - locks_buf;
DBUG_PRINT("info", ("sql_lock->table_count %d sql_lock->lock_count %d",
sql_lock->table_count, sql_lock->lock_count));
DBUG_RETURN(sql_lock); DBUG_RETURN(sql_lock);
} }
......
...@@ -8377,7 +8377,7 @@ int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt) ...@@ -8377,7 +8377,7 @@ int abort_and_upgrade_lock(ALTER_PARTITION_PARAM_TYPE *lpt)
We also downgrade locks after the upgrade to WRITE_ONLY We also downgrade locks after the upgrade to WRITE_ONLY
*/ */
/* purecov: begin unused */ /* purecov: begin deadcode */
void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt) void close_open_tables_and_downgrade(ALTER_PARTITION_PARAM_TYPE *lpt)
{ {
VOID(pthread_mutex_lock(&LOCK_open)); VOID(pthread_mutex_lock(&LOCK_open));
......
...@@ -901,7 +901,14 @@ int ha_myisammrg::external_lock(THD *thd, int lock_type) ...@@ -901,7 +901,14 @@ int ha_myisammrg::external_lock(THD *thd, int lock_type)
uint ha_myisammrg::lock_count(void) const uint ha_myisammrg::lock_count(void) const
{ {
DBUG_ASSERT(this->file->children_attached); /*
Return the real lock count even if the children are not attached.
This method is used for allocating memory. If we would return 0
to another thread (e.g. doing FLUSH TABLE), and attach the children
before the other thread calls store_lock(), then we would return
more locks in store_lock() than we claimed by lock_count(). The
other tread would overrun its memory.
*/
return file->tables; return file->tables;
} }
...@@ -911,7 +918,24 @@ THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd, ...@@ -911,7 +918,24 @@ THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd,
enum thr_lock_type lock_type) enum thr_lock_type lock_type)
{ {
MYRG_TABLE *open_table; MYRG_TABLE *open_table;
DBUG_ASSERT(this->file->children_attached);
/*
This method can be called while another thread is attaching the
children. If the processor reorders instructions or write to memory,
'children_attached' could be set before 'open_tables' has all the
pointers to the children. Use of a mutex here and in
myrg_attach_children() forces consistent data.
*/
pthread_mutex_lock(&this->file->mutex);
/*
When MERGE table is open, but not yet attached, other threads
could flush it, which means call mysql_lock_abort_for_thread()
on this threads TABLE. 'children_attached' is FALSE in this
situaton. Since the table is not locked, return no lock data.
*/
if (!this->file->children_attached)
goto end; /* purecov: tested */
for (open_table=file->open_tables ; for (open_table=file->open_tables ;
open_table != file->end_table ; open_table != file->end_table ;
...@@ -921,6 +945,9 @@ THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd, ...@@ -921,6 +945,9 @@ THR_LOCK_DATA **ha_myisammrg::store_lock(THD *thd,
if (lock_type != TL_IGNORE && open_table->table->lock.type == TL_UNLOCK) if (lock_type != TL_IGNORE && open_table->table->lock.type == TL_UNLOCK)
open_table->table->lock.type=lock_type; open_table->table->lock.type=lock_type;
} }
end:
pthread_mutex_unlock(&this->file->mutex);
return to; return to;
} }
......
...@@ -58,6 +58,7 @@ int myrg_close(MYRG_INFO *info) ...@@ -58,6 +58,7 @@ int myrg_close(MYRG_INFO *info)
pthread_mutex_lock(&THR_LOCK_open); pthread_mutex_lock(&THR_LOCK_open);
myrg_open_list=list_delete(myrg_open_list,&info->open_list); myrg_open_list=list_delete(myrg_open_list,&info->open_list);
pthread_mutex_unlock(&THR_LOCK_open); pthread_mutex_unlock(&THR_LOCK_open);
VOID(pthread_mutex_destroy(&info->mutex));
my_free((uchar*) info,MYF(0)); my_free((uchar*) info,MYF(0));
if (error) if (error)
{ {
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
myrg_attach_children(). Please duplicate changes in these myrg_attach_children(). Please duplicate changes in these
functions or make common sub-functions. functions or make common sub-functions.
*/ */
/* purecov: begin unused */ /* purecov: begin deadcode */ /* not used in MySQL server */
MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking) MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking)
{ {
...@@ -171,6 +171,7 @@ MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking) ...@@ -171,6 +171,7 @@ MYRG_INFO *myrg_open(const char *name, int mode, int handle_locking)
VOID(my_close(fd,MYF(0))); VOID(my_close(fd,MYF(0)));
end_io_cache(&file); end_io_cache(&file);
VOID(pthread_mutex_init(&m_info->mutex, MY_MUTEX_INIT_FAST));
m_info->open_list.data=(void*) m_info; m_info->open_list.data=(void*) m_info;
pthread_mutex_lock(&THR_LOCK_open); pthread_mutex_lock(&THR_LOCK_open);
myrg_open_list=list_add(myrg_open_list,&m_info->open_list); myrg_open_list=list_add(myrg_open_list,&m_info->open_list);
...@@ -328,6 +329,7 @@ MYRG_INFO *myrg_parent_open(const char *parent_name, ...@@ -328,6 +329,7 @@ MYRG_INFO *myrg_parent_open(const char *parent_name,
end_io_cache(&file_cache); end_io_cache(&file_cache);
VOID(my_close(fd, MYF(0))); VOID(my_close(fd, MYF(0)));
VOID(pthread_mutex_init(&m_info->mutex, MY_MUTEX_INIT_FAST));
m_info->open_list.data= (void*) m_info; m_info->open_list.data= (void*) m_info;
pthread_mutex_lock(&THR_LOCK_open); pthread_mutex_lock(&THR_LOCK_open);
...@@ -393,6 +395,14 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, ...@@ -393,6 +395,14 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking,
DBUG_ENTER("myrg_attach_children"); DBUG_ENTER("myrg_attach_children");
DBUG_PRINT("myrg", ("handle_locking: %d", handle_locking)); DBUG_PRINT("myrg", ("handle_locking: %d", handle_locking));
/*
This function can be called while another thread is trying to abort
locks of this MERGE table. If the processor reorders instructions or
write to memory, 'children_attached' could be set before
'open_tables' has all the pointers to the children. Use of a mutex
here and in ha_myisammrg::store_lock() forces consistent data.
*/
pthread_mutex_lock(&m_info->mutex);
rc= 1; rc= 1;
errpos= 0; errpos= 0;
file_offset= 0; file_offset= 0;
...@@ -464,6 +474,7 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking, ...@@ -464,6 +474,7 @@ int myrg_attach_children(MYRG_INFO *m_info, int handle_locking,
m_info->keys= min_keys; m_info->keys= min_keys;
m_info->last_used_table= m_info->open_tables; m_info->last_used_table= m_info->open_tables;
m_info->children_attached= TRUE; m_info->children_attached= TRUE;
pthread_mutex_unlock(&m_info->mutex);
DBUG_RETURN(0); DBUG_RETURN(0);
err: err:
...@@ -473,6 +484,7 @@ err: ...@@ -473,6 +484,7 @@ err:
my_free((char*) m_info->rec_per_key_part, MYF(0)); my_free((char*) m_info->rec_per_key_part, MYF(0));
m_info->rec_per_key_part= NULL; m_info->rec_per_key_part= NULL;
} }
pthread_mutex_unlock(&m_info->mutex);
my_errno= save_errno; my_errno= save_errno;
DBUG_RETURN(1); DBUG_RETURN(1);
} }
...@@ -494,6 +506,8 @@ err: ...@@ -494,6 +506,8 @@ err:
int myrg_detach_children(MYRG_INFO *m_info) int myrg_detach_children(MYRG_INFO *m_info)
{ {
DBUG_ENTER("myrg_detach_children"); DBUG_ENTER("myrg_detach_children");
/* For symmetry with myrg_attach_children() we use the mutex here. */
pthread_mutex_lock(&m_info->mutex);
if (m_info->tables) if (m_info->tables)
{ {
/* Do not attach/detach an empty child list. */ /* Do not attach/detach an empty child list. */
...@@ -504,6 +518,7 @@ int myrg_detach_children(MYRG_INFO *m_info) ...@@ -504,6 +518,7 @@ int myrg_detach_children(MYRG_INFO *m_info)
m_info->del= 0; m_info->del= 0;
m_info->data_file_length= 0; m_info->data_file_length= 0;
m_info->options= 0; m_info->options= 0;
pthread_mutex_unlock(&m_info->mutex);
DBUG_RETURN(0); DBUG_RETURN(0);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment