Commit 30df1890 authored by Andrei Elkin's avatar Andrei Elkin

Bug #51648 DBUG_SYNC_POINT is not defined on all platforms and mtr cant pre-check that


DBUG_SYNC_POINT has at least one strong limitation that it's not defined
on all platforms. It has issues cooperating with @@debug.
All in all its functionality is superseded by DEBUG_SYNC facility and
there is no reason to maintain the old less flexible one.

Fixed with adding debug_sync_set_action() function as a facility to set up
a sync-action in the server sources code and re-writing existing simulations
(found 3) to use it.
Couple of tests have been reworked as well.

The patch offers a pattern for setting sync-points in replication threads
where the standard DEBUG_SYNC does not suffice to reach goals.

parent 3f4d8edb
......@@ -3,26 +3,53 @@
# The common part of the "rpl_get_master_version_and_clock" test.
# Restart slave under network disconnection between slave and master
# following the steps:
# 1 - Got DBUG_SYNC_POINT lock
# 2 - Set DBUG_SYNC_POINT before call mysql_real_query(...) function in get_master_version_and_clock(...) function and hang here
# 3 - shutdown master server for simulating network disconnection
# 4 - Release DBUG_SYNC_POINT lock
# 5 - Check if the slave I/O thread tries to reconnect to master.
# 0 - Set DEBUG_SYNC_ACTION to wait
# before call mysql_real_query(...) function in get_master_version_and_clock(...)
# function and hang here
# 1 - activate a sync-point through the $dbug_sync_point argument of the test
# 2 - shutdown master server for simulating network disconnection
# 3 - signal to the IO thread through $debug_sync_action to unhold from the sync-point
# 4 - check if the slave I/O thread tries to reconnect to master.
#
# Note: Please make sure initialize the $debug_lock when call the test script.
# Note: make sure to initialize the $debug_sync_action and $dbug_sync_point
# before calling the test script.
#
# Pattern of usage:
#
# The caller test supplies the DBUG_EXECUTE_IF name
#
# let $dbug_sync_point = 'dbug_execute_if_name';
#
# as well as the action list for DEBUG_SYNC
#
# let $debug_sync_action= 'now signal signal_name';
#
# The $dbug_sync_point becomes the value of @@global.debug generating
# a newly started IO-slave thread's session value.
# Notice incremental operations to add and remove dbug_execute_if_name
# from the global variable allows propagation more dbug arguments
# out of mtr.
# The action list is to fire at proper time according to test logics
# (see pp 0-4 above).
#
connection slave;
if (`SELECT '$debug_lock' = ''`)
if (`SELECT $debug_sync_action = ''`)
{
--die Cannot continue. Please set value for $debug_lock.
--die Cannot continue. Please set value for debug_sync_action.
}
# Restart slave
--disable_warnings
stop slave;
source include/wait_for_slave_to_stop.inc;
eval SET @@global.debug= "+d,$dbug_sync_point";
start slave;
source include/wait_for_slave_to_start.inc;
--echo slave is going to hang in get_master_version_and_clock
connection master;
# Write file to make mysql-test-run.pl expect the "crash", but don't start
# it until it's told to
......@@ -35,7 +62,9 @@ EOF
shutdown_server 10;
connection slave;
eval SELECT RELEASE_LOCK($debug_lock);
--echo slave is unblocked
eval SET DEBUG_SYNC=$debug_sync_action;
# Show slave last IO errno
connection slave;
......@@ -53,6 +82,12 @@ if (`SELECT '$last_io_errno' = '2013' || # CR_SERVER_LOST
--echo NETWORK ERROR
}
# deactivate the sync point of get_master_version_and_clock()
# now to avoid restarting IO-thread to re-enter it.
# There will be a new IO thread forked out with its @@session.debug
# unset.
eval set @@global.debug = "-d,$dbug_sync_point";
# Write file to make mysql-test-run.pl start up the server again
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart
......
......@@ -7,34 +7,24 @@ start slave;
call mtr.add_suppression("Slave I/O: Master command COM_REGISTER_SLAVE failed: .*");
call mtr.add_suppression("Fatal error: The slave I/O thread stops because master and slave have equal MySQL server ids; .*");
call mtr.add_suppression("Slave I/O thread .* register on master");
SELECT IS_FREE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP");
IS_FREE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP")
1
SELECT GET_LOCK("debug_lock.before_get_UNIX_TIMESTAMP", 1000);
GET_LOCK("debug_lock.before_get_UNIX_TIMESTAMP", 1000)
1
set global debug= 'd,debug_lock.before_get_UNIX_TIMESTAMP';
stop slave;
SET @@global.debug= "+d,'debug_lock.before_get_UNIX_TIMESTAMP'";
start slave;
SELECT RELEASE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP");
RELEASE_LOCK("debug_lock.before_get_UNIX_TIMESTAMP")
1
slave is going to hang in get_master_version_and_clock
slave is unblocked
SET DEBUG_SYNC='now SIGNAL signal.get_unix_timestamp';
Check network error happened here
NETWORK ERROR
SELECT IS_FREE_LOCK("debug_lock.before_get_SERVER_ID");
IS_FREE_LOCK("debug_lock.before_get_SERVER_ID")
1
SELECT GET_LOCK("debug_lock.before_get_SERVER_ID", 1000);
GET_LOCK("debug_lock.before_get_SERVER_ID", 1000)
1
set global debug= 'd,debug_lock.before_get_SERVER_ID';
set @@global.debug = "-d,'debug_lock.before_get_UNIX_TIMESTAMP'";
stop slave;
SET @@global.debug= "+d,'debug_lock.before_get_SERVER_ID'";
start slave;
SELECT RELEASE_LOCK("debug_lock.before_get_SERVER_ID");
RELEASE_LOCK("debug_lock.before_get_SERVER_ID")
1
slave is going to hang in get_master_version_and_clock
slave is unblocked
SET DEBUG_SYNC='now SIGNAL signal.get_server_id';
Check network error happened here
NETWORK ERROR
set @@global.debug = "-d,'debug_lock.before_get_SERVER_ID'";
set global debug= '';
reset master;
include/stop_slave.inc
......@@ -43,3 +33,4 @@ start slave;
*** must be having the replicate-same-server-id IO thread error ***
Slave_IO_Errno= 1593
Slave_IO_Error= Fatal error: The slave I/O thread stops because master and slave have equal MySQL server ids; these ids must be different for replication to work (or the --replicate-same-server-id option must be used on slave but this does not always make sense; please check the manual before using it).
SET DEBUG_SYNC= 'RESET';
......@@ -4,11 +4,9 @@ reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
SET DEBUG_SYNC= 'RESET';
include/stop_slave.inc
SELECT GET_LOCK("debug_lock.before_get_running_status_yes", 1000);
GET_LOCK("debug_lock.before_get_running_status_yes", 1000)
1
set global debug= 'd,debug_lock.before_get_running_status_yes';
set global debug= 'd,dbug.before_get_running_status_yes';
Slave_running, Slave_IO_Running, Slave_SQL_Running, must be OFF, NO, NO in three following queries
SHOW STATUS LIKE 'Slave_running';
Variable_name Value
......@@ -22,9 +20,7 @@ Variable_name Value
Slave_running OFF
Slave_IO_Running= No
Slave_SQL_Running= No
SELECT RELEASE_LOCK("debug_lock.before_get_running_status_yes");
RELEASE_LOCK("debug_lock.before_get_running_status_yes")
1
SET DEBUG_SYNC='now SIGNAL signal.io_thread_let_running';
Slave_running, Slave_IO_Running, Slave_SQL_Running must be OFF YES NO in three following queries
SHOW STATUS LIKE 'Slave_running';
Variable_name Value
......@@ -39,4 +35,5 @@ Slave_running ON
Slave_IO_Running= Yes
Slave_SQL_Running= Yes
set global debug= '';
SET DEBUG_SYNC= 'RESET';
End of tests
......@@ -16,8 +16,16 @@
source include/master-slave.inc;
source include/have_debug.inc;
source include/have_debug_sync.inc;
#
# The test is not supposed to have any binglog affairs.
# Hence it's enough it to run only with one binlog format
#
source include/have_binlog_format_mixed.inc;
connection slave;
call mtr.add_suppression("Slave I/O: Master command COM_REGISTER_SLAVE failed: .*");
call mtr.add_suppression("Fatal error: The slave I/O thread stops because master and slave have equal MySQL server ids; .*");
call mtr.add_suppression("Slave I/O thread .* register on master");
......@@ -25,20 +33,16 @@ call mtr.add_suppression("Slave I/O thread .* register on master");
#Test case 1: Try to get the value of the UNIX_TIMESTAMP from master under network disconnection
let $debug_saved= `select @@global.debug`;
let $debug_lock= "debug_lock.before_get_UNIX_TIMESTAMP";
eval SELECT IS_FREE_LOCK($debug_lock);
eval SELECT GET_LOCK($debug_lock, 1000);
set global debug= 'd,debug_lock.before_get_UNIX_TIMESTAMP';
# set up two parameters to pass into extra/rpl_tests/rpl_get_master_version_and_clock
let $dbug_sync_point= 'debug_lock.before_get_UNIX_TIMESTAMP';
let $debug_sync_action= 'now SIGNAL signal.get_unix_timestamp';
source extra/rpl_tests/rpl_get_master_version_and_clock.test;
#Test case 2: Try to get the value of the SERVER_ID from master under network disconnection
connection slave;
let $debug_lock= "debug_lock.before_get_SERVER_ID";
eval SELECT IS_FREE_LOCK($debug_lock);
eval SELECT GET_LOCK($debug_lock, 1000);
set global debug= 'd,debug_lock.before_get_SERVER_ID';
let $dbug_sync_point= 'debug_lock.before_get_SERVER_ID';
let $debug_sync_action= 'now SIGNAL signal.get_server_id';
source extra/rpl_tests/rpl_get_master_version_and_clock.test;
eval set global debug= '$debug_saved';
......@@ -62,4 +66,9 @@ let $last_io_error= query_get_value("show slave status", Last_IO_Error, 1);
echo Slave_IO_Errno= $last_io_errno;
echo Slave_IO_Error= $last_io_error;
# cleanup
# is not really necessary but avoids mtr post-run env check warnings
SET DEBUG_SYNC= 'RESET';
# End of tests
......@@ -5,14 +5,14 @@
#
source include/master-slave.inc;
source include/have_debug.inc;
source include/have_debug_sync.inc;
connection slave;
SET DEBUG_SYNC= 'RESET';
source include/stop_slave.inc;
let $debug_saved= `select @@global.debug`;
let $debug_lock= "debug_lock.before_get_running_status_yes";
eval SELECT GET_LOCK($debug_lock, 1000);
set global debug= 'd,debug_lock.before_get_running_status_yes';
set global debug= 'd,dbug.before_get_running_status_yes'; # to block due-started IO
# Test 1. Slave is stopped
......@@ -40,7 +40,7 @@ echo Slave_SQL_Running= $status;
# Test 3. The slave IO thread is started and got connected to master
# and SQL thread is still not started
eval SELECT RELEASE_LOCK($debug_lock);
SET DEBUG_SYNC='now SIGNAL signal.io_thread_let_running'; # unblock IO thread now
let $slave_param= Slave_IO_Running;
let $slave_param_value= YES;
source include/wait_for_slave_param.inc;
......@@ -72,5 +72,5 @@ echo Slave_SQL_Running= $status;
connection slave;
eval set global debug= '$debug_saved';
SET DEBUG_SYNC= 'RESET';
--echo End of tests
......@@ -1903,4 +1903,42 @@ void debug_sync(THD *thd, const char *sync_point_name, size_t name_len)
DBUG_VOID_RETURN;
}
/**
Define debug sync action.
@param[in] thd thread handle
@param[in] action_str action string
@return status
@retval FALSE ok
@retval TRUE error
@description
The function is similar to @c debug_sync_eval_action but is
to be called immediately from the server code rather than
to be triggered by setting a value to DEBUG_SYNC system variable.
@note
The input string is copied prior to be fed to
@c debug_sync_eval_action to let the latter modify it.
Caution.
The function allocates in THD::mem_root and therefore
is not recommended to be deployed inside big loops.
*/
bool debug_sync_set_action(THD *thd, const char *action_str, size_t len)
{
bool rc;
char *value;
DBUG_ENTER("debug_sync_set_action");
DBUG_ASSERT(thd);
DBUG_ASSERT(action_str);
value= strmake_root(thd->mem_root, action_str, len);
rc= debug_sync_eval_action(thd, value);
DBUG_RETURN(rc);
}
#endif /* defined(ENABLED_DEBUG_SYNC) */
......@@ -50,6 +50,7 @@ extern void debug_sync_end(void);
extern void debug_sync_init_thread(THD *thd);
extern void debug_sync_end_thread(THD *thd);
extern void debug_sync(THD *thd, const char *sync_point_name, size_t name_len);
extern bool debug_sync_set_action(THD *thd, const char *action_str, size_t len);
#else /* defined(ENABLED_DEBUG_SYNC) */
......
......@@ -3362,80 +3362,6 @@ longlong Item_master_pos_wait::val_int()
return event_count;
}
#ifdef EXTRA_DEBUG
void debug_sync_point(const char* lock_name, uint lock_timeout)
{
THD* thd=current_thd;
User_level_lock* ull;
struct timespec abstime;
size_t lock_name_len;
lock_name_len= strlen(lock_name);
pthread_mutex_lock(&LOCK_user_locks);
if (thd->ull)
{
item_user_lock_release(thd->ull);
thd->ull=0;
}
/*
If the lock has not been aquired by some client, we do not want to
create an entry for it, since we immediately release the lock. In
this case, we will not be waiting, but rather, just waste CPU and
memory on the whole deal
*/
if (!(ull= ((User_level_lock*) hash_search(&hash_user_locks,
(uchar*) lock_name,
lock_name_len))))
{
pthread_mutex_unlock(&LOCK_user_locks);
return;
}
ull->count++;
/*
Structure is now initialized. Try to get the lock.
Set up control struct to allow others to abort locks
*/
thd_proc_info(thd, "User lock");
thd->mysys_var->current_mutex= &LOCK_user_locks;
thd->mysys_var->current_cond= &ull->cond;
set_timespec(abstime,lock_timeout);
while (ull->locked && !thd->killed)
{
int error= pthread_cond_timedwait(&ull->cond, &LOCK_user_locks, &abstime);
if (error == ETIMEDOUT || error == ETIME)
break;
}
if (ull->locked)
{
if (!--ull->count)
delete ull; // Should never happen
}
else
{
ull->locked=1;
ull->set_thread(thd);
thd->ull=ull;
}
pthread_mutex_unlock(&LOCK_user_locks);
pthread_mutex_lock(&thd->mysys_var->mutex);
thd_proc_info(thd, 0);
thd->mysys_var->current_mutex= 0;
thd->mysys_var->current_cond= 0;
pthread_mutex_unlock(&thd->mysys_var->mutex);
pthread_mutex_lock(&LOCK_user_locks);
if (thd->ull)
{
item_user_lock_release(thd->ull);
thd->ull=0;
}
pthread_mutex_unlock(&LOCK_user_locks);
}
#endif
/**
Get a user level lock. If the thread has an old lock this is first released.
......
......@@ -580,20 +580,6 @@ protected:
/* Used to check GROUP BY list in the MODE_ONLY_FULL_GROUP_BY mode */
#define UNDEF_POS (-1)
#ifdef EXTRA_DEBUG
/**
Sync points allow us to force the server to reach a certain line of code
and block there until the client tells the server it is ok to go on.
The client tells the server to block with SELECT GET_LOCK()
and unblocks it with SELECT RELEASE_LOCK(). Used for debugging difficult
concurrency problems
*/
#define DBUG_SYNC_POINT(lock_name,lock_timeout) \
debug_sync_point(lock_name,lock_timeout)
void debug_sync_point(const char* lock_name, uint lock_timeout);
#else
#define DBUG_SYNC_POINT(lock_name,lock_timeout)
#endif /* EXTRA_DEBUG */
/* BINLOG_DUMP options */
......
......@@ -44,6 +44,7 @@
#ifdef HAVE_REPLICATION
#include "rpl_tblmap.h"
#include "debug_sync.h"
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
......@@ -981,7 +982,16 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
unavailable (very old master not supporting UNIX_TIMESTAMP()?).
*/
DBUG_SYNC_POINT("debug_lock.before_get_UNIX_TIMESTAMP", 10);
DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
{
const char act[]=
"now "
"wait_for signal.get_unix_timestamp";
DBUG_ASSERT(opt_debug_sync_timeout > 0);
DBUG_ASSERT(!debug_sync_set_action(current_thd,
STRING_WITH_LEN(act)));
};);
master_res= NULL;
if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
(master_res= mysql_store_result(mysql)) &&
......@@ -1020,7 +1030,15 @@ static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
Note: we could have put a @@SERVER_ID in the previous SELECT
UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
*/
DBUG_SYNC_POINT("debug_lock.before_get_SERVER_ID", 10);
DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
{
const char act[]=
"now "
"wait_for signal.get_server_id";
DBUG_ASSERT(opt_debug_sync_timeout > 0);
DBUG_ASSERT(!debug_sync_set_action(current_thd,
STRING_WITH_LEN(act)));
};);
master_res= NULL;
master_row= NULL;
if (!mysql_real_query(mysql,
......@@ -2557,7 +2575,16 @@ pthread_handler_t handle_slave_io(void *arg)
connected:
DBUG_SYNC_POINT("debug_lock.before_get_running_status_yes", 10);
DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
{
const char act[]=
"now "
"wait_for signal.io_thread_let_running";
DBUG_ASSERT(opt_debug_sync_timeout > 0);
DBUG_ASSERT(!debug_sync_set_action(thd,
STRING_WITH_LEN(act)));
};);
// TODO: the assignment below should be under mutex (5.0)
mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
thd->slave_net = &mysql->net;
......
......@@ -1711,7 +1711,6 @@ int log_loaded_block(IO_CACHE* file)
if (mysql_bin_log.write(&b))
DBUG_RETURN(1);
lf_info->wrote_create_file= 1;
DBUG_SYNC_POINT("debug_lock.created_file_event",10);
}
}
DBUG_RETURN(0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment