Commit 04723cf7 authored by monty@mashka.mysql.fi's avatar monty@mashka.mysql.fi

Give better error from reconnect()

Fixed hang in start_slave_threads() when thread dies quickly.
parent 3b81549d
......@@ -50380,6 +50380,8 @@ Fixed some problems with @code{CREATE TABLE ... SELECT function()}.
@code{mysqld} now has the option @code{--temp-pool} enabled by default as this
gives better performance with some operating systems.
@item
Fixed hang in @code{CHANGE MASTER TO} if the slave thread died very quickly.
@item
Big cleanup in replication code (less logging, better error messages, etc..)
@item
If the @code{--code-file} option is specified, the server calls
......@@ -1042,11 +1042,11 @@ int do_let(struct st_query* q)
if (!*p)
die("Missing variable name in let\n");
var_name = p;
while(*p && (*p != '=' || isspace(*p)))
while (*p && (*p != '=' || isspace(*p)))
p++;
var_name_end = p;
if (*p == '=') p++;
while(*p && isspace(*p))
while (*p && isspace(*p))
p++;
var_val_start = p;
return var_set(var_name, var_name_end, var_val_start, q->end);
......@@ -1054,9 +1054,10 @@ int do_let(struct st_query* q)
int do_rpl_probe(struct st_query* q __attribute__((unused)))
{
DBUG_ENTER("do_rpl_probe");
if (mysql_rpl_probe(&cur_con->mysql))
die("Failed in mysql_rpl_probe(): %s", mysql_error(&cur_con->mysql));
return 0;
die("Failed in mysql_rpl_probe(): '%s'", mysql_error(&cur_con->mysql));
DBUG_RETURN(0);
}
int do_enable_rpl_parse(struct st_query* q __attribute__((unused)))
......@@ -1077,7 +1078,7 @@ int do_sleep(struct st_query* q, my_bool real_sleep)
char* p=q->first_argument;
struct timeval t;
int dec_mul = 1000000;
while(*p && isspace(*p)) p++;
while (*p && isspace(*p)) p++;
if (!*p)
die("Missing argument in sleep\n");
t.tv_usec = 0;
......@@ -1097,7 +1098,7 @@ int do_sleep(struct st_query* q, my_bool real_sleep)
else
{
t.tv_sec = atoi(p);
while(*p && *p != '.' && !isspace(*p))
while (*p && *p != '.' && !isspace(*p))
p++;
if (*p == '.')
{
......@@ -1308,7 +1309,7 @@ int select_connection(struct st_query* q)
if (!*p)
die("Missing connection name in connect\n");
name = p;
while(*p && !isspace(*p))
while (*p && !isspace(*p))
p++;
*p = 0;
......@@ -1334,7 +1335,7 @@ int close_connection(struct st_query* q)
if (!*p)
die("Missing connection name in connect\n");
name = p;
while(*p && !isspace(*p))
while (*p && !isspace(*p))
p++;
*p = 0;
......@@ -1790,7 +1791,7 @@ int read_query(struct st_query** q_ptr)
}
}
while(*p && isspace(*p)) p++ ;
while (*p && isspace(*p)) p++ ;
if (*p == '@')
{
p++;
......@@ -2503,7 +2504,7 @@ int main(int argc, char** argv)
if (!processed)
{
current_line_inc = 0;
switch(q->type) {
switch (q->type) {
case Q_WHILE: do_while(q); break;
case Q_END_BLOCK: do_done(q); break;
default: current_line_inc = 1; break;
......
......@@ -1160,14 +1160,15 @@ static void expand_error(MYSQL* mysql, int error)
static int get_master(MYSQL* mysql, MYSQL_RES* res, MYSQL_ROW row)
{
MYSQL* master;
DBUG_ENTER("get_master");
if (mysql_num_fields(res) < 3)
return 1; /* safety */
DBUG_RETURN(1); /* safety */
/* use the same username and password as the original connection */
if (!(master = spawn_init(mysql, row[0], atoi(row[2]), 0, 0)))
return 1;
DBUG_RETURN(1);
mysql->master = master;
return 0;
DBUG_RETURN(0);
}
......@@ -1183,18 +1184,19 @@ static int get_slaves_from_master(MYSQL* mysql)
int error = 1;
int has_auth_info;
int port_ind;
DBUG_ENTER("get_slaves_from_master");
if (!mysql->net.vio && !mysql_real_connect(mysql,0,0,0,0,0,0,0))
{
expand_error(mysql, CR_PROBE_MASTER_CONNECT);
return 1;
DBUG_RETURN(1);
}
if (mysql_query(mysql, "SHOW SLAVE HOSTS") ||
!(res = mysql_store_result(mysql)))
{
expand_error(mysql, CR_PROBE_SLAVE_HOSTS);
return 1;
DBUG_RETURN(1);
}
switch (mysql_num_fields(res)) {
......@@ -1238,15 +1240,17 @@ static int get_slaves_from_master(MYSQL* mysql)
err:
if (res)
mysql_free_result(res);
return error;
DBUG_RETURN(error);
}
int STDCALL mysql_rpl_probe(MYSQL* mysql)
{
MYSQL_RES* res = 0;
MYSQL_RES *res= 0;
MYSQL_ROW row;
int error = 1;
DBUG_ENTER("mysql_rpl_probe");
/*
First determine the replication role of the server we connected to
the most reliable way to do this is to run SHOW SLAVE STATUS and see
......@@ -1259,7 +1263,7 @@ int STDCALL mysql_rpl_probe(MYSQL* mysql)
!(res = mysql_store_result(mysql)))
{
expand_error(mysql, CR_PROBE_SLAVE_STATUS);
return 1;
DBUG_RETURN(1);
}
row= mysql_fetch_row(res);
......@@ -1284,7 +1288,7 @@ int STDCALL mysql_rpl_probe(MYSQL* mysql)
err:
if (res)
mysql_free_result(res);
return error;
DBUG_RETURN(error);
}
......@@ -1979,7 +1983,11 @@ static my_bool mysql_reconnect(MYSQL *mysql)
if (!mysql_real_connect(&tmp_mysql,mysql->host,mysql->user,mysql->passwd,
mysql->db, mysql->port, mysql->unix_socket,
mysql->client_flag))
{
mysql->net.last_errno= tmp_mysql.net.last_errno;
strmov(mysql->net.last_error, tmp_mysql.net.last_error);
DBUG_RETURN(1);
}
tmp_mysql.free_me=mysql->free_me;
mysql->free_me=0;
mysql_close(mysql);
......@@ -2060,7 +2068,7 @@ mysql_close(MYSQL *mysql)
mysql->status=MYSQL_STATUS_READY; /* Force command */
mysql->reconnect=0;
simple_command(mysql,COM_QUIT,NullS,0,1);
end_server(mysql);
end_server(mysql); /* Sets mysql->net.vio= 0 */
}
my_free((gptr) mysql->host_info,MYF(MY_ALLOW_ZERO_PTR));
my_free(mysql->user,MYF(MY_ALLOW_ZERO_PTR));
......@@ -2082,7 +2090,6 @@ mysql_close(MYSQL *mysql)
/* Clear pointers for better safety */
mysql->host_info=mysql->user=mysql->passwd=mysql->db=0;
bzero((char*) &mysql->options,sizeof(mysql->options));
mysql->net.vio = 0;
/* free/close slave list */
if (mysql->rpl_pivot)
......
......@@ -76,7 +76,7 @@ a
testing temporary tables part 2
show slave status;
Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space
127.0.0.1 root MASTER_PORT 60 master-bin.006 838 slave-relay-bin.004 1816 master-bin.006 Yes Yes 0 0 838 1816
127.0.0.1 root MASTER_PORT 60 master-bin.006 838 slave-relay-bin.001 8034 master-bin.006 Yes Yes 0 0 838 8034
lock tables t3 read;
select count(*) from t3 where n >= 4;
count(*)
......
......@@ -9,13 +9,14 @@ sync_with_master;
--replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT
show slave status;
change master to master_log_pos=73;
sleep 5;
slave stop;
change master to master_log_pos=73;
--replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT
show slave status;
slave start;
sleep 2;
sleep 5;
--replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT
show slave status;
change master to master_log_pos=173;
......
......@@ -14,9 +14,11 @@ connect (slave,localhost,root,,test,0,slave.sock);
system cat /dev/null > var/slave-data/master.info;
system chmod 000 var/slave-data/master.info;
connection slave;
!slave start;
--error 1201
slave start;
system chmod 600 var/slave-data/master.info;
!slave start;
--error 1201
slave start;
--replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT
!eval change master to master_host='127.0.0.1',master_port=$MASTER_MYPORT,
master_user='root';
......
......@@ -12,7 +12,7 @@ $opt_f= 0;
$opt_t= 0;
$opt_a = "";
$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4";
$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4" || $^O eq 'darwin';
$LINUX = $^O eq 'linux';
$pscmd = $BSD ? "/bin/ps -auxww" : $LINUX ? "/bin/ps axuw" : "/bin/ps -ef";
......
......@@ -414,10 +414,8 @@ my_bool mc_mysql_reconnect(MYSQL *mysql)
mysql->db, mysql->port, mysql->unix_socket,
mysql->client_flag, mysql->net.read_timeout))
{
#ifdef NOT_USED
mysql->net.last_errno=CR_RECONNECT_FAILED;
strmov(mysql->net.last_error, ER(mysql->net.last_errno));
#endif
mysql->net.last_errno= tmp_mysql.net.last_errno;
strmov(mysql->net.last_error, tmp_mysql.net.last_error);
DBUG_RETURN(1);
}
tmp_mysql.free_me=mysql->free_me;
......@@ -888,7 +886,6 @@ mc_mysql_close(MYSQL *mysql)
/* Clear pointers for better safety */
mysql->host_info=mysql->user=mysql->passwd=mysql->db=0;
bzero((char*) &mysql->options,sizeof(mysql->options));
mysql->net.vio = 0;
#ifdef HAVE_OPENSSL
mysql_ssl_clear(mysql);
#endif /* HAVE_OPENSSL */
......@@ -976,13 +973,13 @@ mc_unpack_fields(MYSQL_DATA *data,MEM_ROOT *alloc,uint fields,
DBUG_RETURN(result);
}
int
mc_mysql_send_query(MYSQL* mysql, const char* query, uint length)
int mc_mysql_send_query(MYSQL* mysql, const char* query, uint length)
{
return mc_simple_command(mysql, COM_QUERY, query, length, 1);
}
int mc_mysql_read_query_result(MYSQL *mysql)
int mc_mysql_read_query_result(MYSQL *mysql)
{
uchar *pos;
ulong field_count;
......
......@@ -443,14 +443,18 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
}
int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock,
int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
pthread_mutex_t *cond_lock,
pthread_cond_t* start_cond,
volatile bool* slave_running,
pthread_cond_t *start_cond,
volatile bool *slave_running,
volatile ulong *slave_run_id,
MASTER_INFO* mi)
{
pthread_t th;
ulong start_id;
DBUG_ASSERT(mi->inited);
DBUG_ENTER("start_slave_thread");
if (start_lock)
pthread_mutex_lock(start_lock);
if (!server_id)
......@@ -460,7 +464,7 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock,
if (start_lock)
pthread_mutex_unlock(start_lock);
sql_print_error("Server id not set, will not start slave");
return ER_BAD_SLAVE;
DBUG_RETURN(ER_BAD_SLAVE);
}
if (*slave_running)
......@@ -469,39 +473,36 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock,
pthread_cond_broadcast(start_cond);
if (start_lock)
pthread_mutex_unlock(start_lock);
return ER_SLAVE_MUST_STOP;
DBUG_RETURN(ER_SLAVE_MUST_STOP);
}
start_id= *slave_run_id;
DBUG_PRINT("info",("Creating new slave thread"));
if (pthread_create(&th, &connection_attrib, h_func, (void*)mi))
{
if (start_lock)
pthread_mutex_unlock(start_lock);
return ER_SLAVE_THREAD;
DBUG_RETURN(ER_SLAVE_THREAD);
}
if (start_cond && cond_lock)
{
THD* thd = current_thd;
while (!*slave_running)
while (start_id == *slave_run_id)
{
DBUG_PRINT("sleep",("Waiting for slave thread to start"));
const char* old_msg = thd->enter_cond(start_cond,cond_lock,
"Waiting for slave thread to start");
pthread_cond_wait(start_cond,cond_lock);
thd->exit_cond(old_msg);
/*
TODO: in a very rare case of init_slave_thread failing, it is
possible that we can get stuck here since slave_running will not
be set. We need to change slave_running to int and have -1 as
error code.
*/
if (thd->killed)
{
pthread_mutex_unlock(cond_lock);
return ER_SERVER_SHUTDOWN;
DBUG_RETURN(ER_SERVER_SHUTDOWN);
}
}
}
if (start_lock)
pthread_mutex_unlock(start_lock);
return 0;
DBUG_RETURN(0);
}
......@@ -535,13 +536,15 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
if (thread_mask & SLAVE_IO)
error=start_slave_thread(handle_slave_io,lock_io,lock_cond_io,
cond_io,&mi->slave_running,
cond_io,
&mi->slave_running, &mi->slave_run_id,
mi);
if (!error && (thread_mask & SLAVE_SQL))
{
error=start_slave_thread(handle_slave_sql,lock_sql,lock_cond_sql,
cond_sql,
&mi->rli.slave_running,mi);
&mi->rli.slave_running, &mi->rli.slave_run_id,
mi);
if (error)
terminate_slave_threads(mi, thread_mask & SLAVE_IO, 0);
}
......@@ -1807,23 +1810,30 @@ This may also be a network problem, or just a bug in the master or slave code.\
/* slave I/O thread */
pthread_handler_decl(handle_slave_io,arg)
{
THD *thd; // needs to be first for thread_stack
MYSQL *mysql;
MASTER_INFO *mi = (MASTER_INFO*)arg;
char llbuff[22];
uint retry_count;
// needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
my_thread_init();
#ifndef DBUG_OFF
slave_begin:
#endif
THD *thd; // needs to be first for thread_stack
MYSQL *mysql = NULL ;
MASTER_INFO* mi = (MASTER_INFO*)arg;
char llbuff[22];
uint retry_count= 0;
DBUG_ASSERT(mi->inited);
mysql= NULL ;
retry_count= 0;
pthread_mutex_lock(&mi->run_lock);
/* Inform waiting threads that slave has started */
mi->slave_run_id++;
#ifndef DBUG_OFF
mi->events_till_abort = abort_slave_event_count;
#endif
// needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
my_thread_init();
thd= new THD; // note that contructor of THD uses DBUG_ !
DBUG_ENTER("handle_slave_io");
THD_CHECK_SENTRY(thd);
......@@ -2071,26 +2081,32 @@ err:
pthread_handler_decl(handle_slave_sql,arg)
{
#ifndef DBUG_OFF
slave_begin:
#endif
THD *thd; /* needs to be first for thread_stack */
char llbuff[22],llbuff1[22];
RELAY_LOG_INFO* rli = &((MASTER_INFO*)arg)->rli;
const char* errmsg=0;
const char *errmsg;
// needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
my_thread_init();
#ifndef DBUG_OFF
slave_begin:
#endif
DBUG_ASSERT(rli->inited);
pthread_mutex_lock(&rli->run_lock);
DBUG_ASSERT(!rli->slave_running);
errmsg= 0;
#ifndef DBUG_OFF
rli->events_till_abort = abort_slave_event_count;
#endif
// needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
my_thread_init();
thd = new THD; // note that contructor of THD uses DBUG_ !
DBUG_ENTER("handle_slave_sql");
thd = new THD; // note that contructor of THD uses DBUG_ !
THD_CHECK_SENTRY(thd);
/* Inform waiting threads that slave has started */
rli->slave_run_id++;
pthread_detach_this_thread();
if (init_slave_thread(thd, SLAVE_THD_SQL))
{
......
......@@ -154,6 +154,7 @@ typedef struct st_relay_log_info
*/
volatile uint32 slave_skip_counter;
volatile ulong abort_pos_wait; /* Incremented on change master */
volatile ulong slave_run_id; /* Incremented on slave start */
pthread_mutex_t log_space_lock;
pthread_cond_t log_space_cond;
THD * sql_thd;
......@@ -171,8 +172,8 @@ typedef struct st_relay_log_info
st_relay_log_info()
:info_fd(-1),cur_log_fd(-1), cur_log_old_open_count(0), abort_pos_wait(0),
inited(0), abort_slave(0), slave_running(0), log_pos_current(0),
skip_log_purge(0)
slave_run_id(0), inited(0), abort_slave(0), slave_running(0),
log_pos_current(0), skip_log_purge(0)
{
relay_log_name[0] = master_log_name[0] = 0;
bzero(&info_file,sizeof(info_file));
......@@ -283,11 +284,13 @@ typedef struct st_master_info
bool inited;
bool old_format; /* master binlog is in 3.23 format */
volatile bool abort_slave, slave_running;
volatile ulong slave_run_id;
bool ignore_stop_event;
st_master_info():fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0),
slave_running(0)
st_master_info()
:fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0),
slave_running(0), slave_run_id(0)
{
host[0] = 0; user[0] = 0; password[0] = 0;
bzero(&file, sizeof(file));
......@@ -360,7 +363,8 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock,
pthread_mutex_t *cond_lock,
pthread_cond_t* start_cond,
volatile bool* slave_running,
volatile bool *slave_running,
volatile ulong *slave_run_id,
MASTER_INFO* mi);
/* If fd is -1, dump to NET */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment