From 02f8c163e65139ef3d2d7a9967611b30ac6c0f72 Mon Sep 17 00:00:00 2001 From: unknown <monty@mashka.mysql.fi> Date: Sat, 24 Aug 2002 05:44:16 +0300 Subject: [PATCH] Give better error from reconnect() Fixed hang in start_slave_threads() when thread dies quickly. Docs/manual.texi: Changelog client/mysqltest.c: Indentation cleanup More DBUG info libmysql/libmysql.c: More DBUG info Give better error from reconnect() mysql-test/r/rpl_rotate_logs.result: Update results mysql-test/t/rpl_log_pos.test: Fix for fast machines mysql-test/t/rpl_rotate_logs.test: Updated test to be more portable scripts/mysql_zap.sh: Update for MacOSX sql/mini_client.cc: Better error messages from reconnect. Indentation cleanups sql/slave.cc: Fixed hang in start_slave_threads() when thread dies quickly. sql/slave.h: Fixed hang in start_slave_threads() when thread dies quickly. --- Docs/manual.texi | 2 + client/mysqltest.c | 21 ++++---- libmysql/libmysql.c | 29 ++++++---- mysql-test/r/rpl_rotate_logs.result | 2 +- mysql-test/t/rpl_log_pos.test | 3 +- mysql-test/t/rpl_rotate_logs.test | 6 ++- scripts/mysql_zap.sh | 2 +- sql/mini_client.cc | 13 ++--- sql/slave.cc | 82 +++++++++++++++++------------ sql/slave.h | 14 +++-- 10 files changed, 102 insertions(+), 72 deletions(-) diff --git a/Docs/manual.texi b/Docs/manual.texi index 543370f906..2209568ea4 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -50380,6 +50380,8 @@ Fixed some problems with @code{CREATE TABLE ... SELECT function()}. @code{mysqld} now has the option @code{--temp-pool} enabled by default as this gives better performance with some operating systems. @item +Fixed hang in @code{CHANGE MASTER TO} if the slave thread died very quickly. +@item Big cleanup in replication code (less logging, better error messages, etc..) @item If the @code{--code-file} option is specified, the server calls diff --git a/client/mysqltest.c b/client/mysqltest.c index cc253e21be..987a614a25 100644 --- a/client/mysqltest.c +++ b/client/mysqltest.c @@ -1042,11 +1042,11 @@ int do_let(struct st_query* q) if (!*p) die("Missing variable name in let\n"); var_name = p; - while(*p && (*p != '=' || isspace(*p))) + while (*p && (*p != '=' || isspace(*p))) p++; var_name_end = p; if (*p == '=') p++; - while(*p && isspace(*p)) + while (*p && isspace(*p)) p++; var_val_start = p; return var_set(var_name, var_name_end, var_val_start, q->end); @@ -1054,9 +1054,10 @@ int do_let(struct st_query* q) int do_rpl_probe(struct st_query* q __attribute__((unused))) { + DBUG_ENTER("do_rpl_probe"); if (mysql_rpl_probe(&cur_con->mysql)) - die("Failed in mysql_rpl_probe(): %s", mysql_error(&cur_con->mysql)); - return 0; + die("Failed in mysql_rpl_probe(): '%s'", mysql_error(&cur_con->mysql)); + DBUG_RETURN(0); } int do_enable_rpl_parse(struct st_query* q __attribute__((unused))) @@ -1077,7 +1078,7 @@ int do_sleep(struct st_query* q, my_bool real_sleep) char* p=q->first_argument; struct timeval t; int dec_mul = 1000000; - while(*p && isspace(*p)) p++; + while (*p && isspace(*p)) p++; if (!*p) die("Missing argument in sleep\n"); t.tv_usec = 0; @@ -1097,7 +1098,7 @@ int do_sleep(struct st_query* q, my_bool real_sleep) else { t.tv_sec = atoi(p); - while(*p && *p != '.' && !isspace(*p)) + while (*p && *p != '.' && !isspace(*p)) p++; if (*p == '.') { @@ -1308,7 +1309,7 @@ int select_connection(struct st_query* q) if (!*p) die("Missing connection name in connect\n"); name = p; - while(*p && !isspace(*p)) + while (*p && !isspace(*p)) p++; *p = 0; @@ -1334,7 +1335,7 @@ int close_connection(struct st_query* q) if (!*p) die("Missing connection name in connect\n"); name = p; - while(*p && !isspace(*p)) + while (*p && !isspace(*p)) p++; *p = 0; @@ -1790,7 +1791,7 @@ int read_query(struct st_query** q_ptr) } } - while(*p && isspace(*p)) p++ ; + while (*p && isspace(*p)) p++ ; if (*p == '@') { p++; @@ -2503,7 +2504,7 @@ int main(int argc, char** argv) if (!processed) { current_line_inc = 0; - switch(q->type) { + switch (q->type) { case Q_WHILE: do_while(q); break; case Q_END_BLOCK: do_done(q); break; default: current_line_inc = 1; break; diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c index 32148baf1d..40b3fb4cc6 100644 --- a/libmysql/libmysql.c +++ b/libmysql/libmysql.c @@ -1160,14 +1160,15 @@ static void expand_error(MYSQL* mysql, int error) static int get_master(MYSQL* mysql, MYSQL_RES* res, MYSQL_ROW row) { MYSQL* master; + DBUG_ENTER("get_master"); if (mysql_num_fields(res) < 3) - return 1; /* safety */ + DBUG_RETURN(1); /* safety */ /* use the same username and password as the original connection */ if (!(master = spawn_init(mysql, row[0], atoi(row[2]), 0, 0))) - return 1; + DBUG_RETURN(1); mysql->master = master; - return 0; + DBUG_RETURN(0); } @@ -1183,18 +1184,19 @@ static int get_slaves_from_master(MYSQL* mysql) int error = 1; int has_auth_info; int port_ind; + DBUG_ENTER("get_slaves_from_master"); if (!mysql->net.vio && !mysql_real_connect(mysql,0,0,0,0,0,0,0)) { expand_error(mysql, CR_PROBE_MASTER_CONNECT); - return 1; + DBUG_RETURN(1); } if (mysql_query(mysql, "SHOW SLAVE HOSTS") || !(res = mysql_store_result(mysql))) { expand_error(mysql, CR_PROBE_SLAVE_HOSTS); - return 1; + DBUG_RETURN(1); } switch (mysql_num_fields(res)) { @@ -1238,15 +1240,17 @@ static int get_slaves_from_master(MYSQL* mysql) err: if (res) mysql_free_result(res); - return error; + DBUG_RETURN(error); } int STDCALL mysql_rpl_probe(MYSQL* mysql) { - MYSQL_RES* res = 0; + MYSQL_RES *res= 0; MYSQL_ROW row; int error = 1; + DBUG_ENTER("mysql_rpl_probe"); + /* First determine the replication role of the server we connected to the most reliable way to do this is to run SHOW SLAVE STATUS and see @@ -1259,7 +1263,7 @@ int STDCALL mysql_rpl_probe(MYSQL* mysql) !(res = mysql_store_result(mysql))) { expand_error(mysql, CR_PROBE_SLAVE_STATUS); - return 1; + DBUG_RETURN(1); } row= mysql_fetch_row(res); @@ -1284,7 +1288,7 @@ int STDCALL mysql_rpl_probe(MYSQL* mysql) err: if (res) mysql_free_result(res); - return error; + DBUG_RETURN(error); } @@ -1979,7 +1983,11 @@ static my_bool mysql_reconnect(MYSQL *mysql) if (!mysql_real_connect(&tmp_mysql,mysql->host,mysql->user,mysql->passwd, mysql->db, mysql->port, mysql->unix_socket, mysql->client_flag)) + { + mysql->net.last_errno= tmp_mysql.net.last_errno; + strmov(mysql->net.last_error, tmp_mysql.net.last_error); DBUG_RETURN(1); + } tmp_mysql.free_me=mysql->free_me; mysql->free_me=0; mysql_close(mysql); @@ -2060,7 +2068,7 @@ mysql_close(MYSQL *mysql) mysql->status=MYSQL_STATUS_READY; /* Force command */ mysql->reconnect=0; simple_command(mysql,COM_QUIT,NullS,0,1); - end_server(mysql); + end_server(mysql); /* Sets mysql->net.vio= 0 */ } my_free((gptr) mysql->host_info,MYF(MY_ALLOW_ZERO_PTR)); my_free(mysql->user,MYF(MY_ALLOW_ZERO_PTR)); @@ -2082,7 +2090,6 @@ mysql_close(MYSQL *mysql) /* Clear pointers for better safety */ mysql->host_info=mysql->user=mysql->passwd=mysql->db=0; bzero((char*) &mysql->options,sizeof(mysql->options)); - mysql->net.vio = 0; /* free/close slave list */ if (mysql->rpl_pivot) diff --git a/mysql-test/r/rpl_rotate_logs.result b/mysql-test/r/rpl_rotate_logs.result index 01e6d2c3a4..d440e157ed 100644 --- a/mysql-test/r/rpl_rotate_logs.result +++ b/mysql-test/r/rpl_rotate_logs.result @@ -76,7 +76,7 @@ a testing temporary tables part 2 show slave status; Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space -127.0.0.1 root MASTER_PORT 60 master-bin.006 838 slave-relay-bin.004 1816 master-bin.006 Yes Yes 0 0 838 1816 +127.0.0.1 root MASTER_PORT 60 master-bin.006 838 slave-relay-bin.001 8034 master-bin.006 Yes Yes 0 0 838 8034 lock tables t3 read; select count(*) from t3 where n >= 4; count(*) diff --git a/mysql-test/t/rpl_log_pos.test b/mysql-test/t/rpl_log_pos.test index cce52dc5da..f585fa233c 100644 --- a/mysql-test/t/rpl_log_pos.test +++ b/mysql-test/t/rpl_log_pos.test @@ -9,13 +9,14 @@ sync_with_master; --replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT show slave status; change master to master_log_pos=73; +sleep 5; slave stop; change master to master_log_pos=73; --replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT show slave status; slave start; -sleep 2; +sleep 5; --replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT show slave status; change master to master_log_pos=173; diff --git a/mysql-test/t/rpl_rotate_logs.test b/mysql-test/t/rpl_rotate_logs.test index fa0c38ae99..cea2f9008d 100644 --- a/mysql-test/t/rpl_rotate_logs.test +++ b/mysql-test/t/rpl_rotate_logs.test @@ -14,9 +14,11 @@ connect (slave,localhost,root,,test,0,slave.sock); system cat /dev/null > var/slave-data/master.info; system chmod 000 var/slave-data/master.info; connection slave; -!slave start; +--error 1201 +slave start; system chmod 600 var/slave-data/master.info; -!slave start; +--error 1201 +slave start; --replace_result 3306 MASTER_PORT 9306 MASTER_PORT 3334 MASTER_PORT 3336 MASTER_PORT !eval change master to master_host='127.0.0.1',master_port=$MASTER_MYPORT, master_user='root'; diff --git a/scripts/mysql_zap.sh b/scripts/mysql_zap.sh index 312d15e34d..f485d16428 100644 --- a/scripts/mysql_zap.sh +++ b/scripts/mysql_zap.sh @@ -12,7 +12,7 @@ $opt_f= 0; $opt_t= 0; $opt_a = ""; -$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4"; +$BSD = -f '/vmunix' || $ENV{"OS"} eq "SunOS4" || $^O eq 'darwin'; $LINUX = $^O eq 'linux'; $pscmd = $BSD ? "/bin/ps -auxww" : $LINUX ? "/bin/ps axuw" : "/bin/ps -ef"; diff --git a/sql/mini_client.cc b/sql/mini_client.cc index 743d522e4b..5bd88e9b09 100644 --- a/sql/mini_client.cc +++ b/sql/mini_client.cc @@ -414,10 +414,8 @@ my_bool mc_mysql_reconnect(MYSQL *mysql) mysql->db, mysql->port, mysql->unix_socket, mysql->client_flag, mysql->net.read_timeout)) { -#ifdef NOT_USED - mysql->net.last_errno=CR_RECONNECT_FAILED; - strmov(mysql->net.last_error, ER(mysql->net.last_errno)); -#endif + mysql->net.last_errno= tmp_mysql.net.last_errno; + strmov(mysql->net.last_error, tmp_mysql.net.last_error); DBUG_RETURN(1); } tmp_mysql.free_me=mysql->free_me; @@ -888,7 +886,6 @@ mc_mysql_close(MYSQL *mysql) /* Clear pointers for better safety */ mysql->host_info=mysql->user=mysql->passwd=mysql->db=0; bzero((char*) &mysql->options,sizeof(mysql->options)); - mysql->net.vio = 0; #ifdef HAVE_OPENSSL mysql_ssl_clear(mysql); #endif /* HAVE_OPENSSL */ @@ -976,13 +973,13 @@ mc_unpack_fields(MYSQL_DATA *data,MEM_ROOT *alloc,uint fields, DBUG_RETURN(result); } -int -mc_mysql_send_query(MYSQL* mysql, const char* query, uint length) +int mc_mysql_send_query(MYSQL* mysql, const char* query, uint length) { return mc_simple_command(mysql, COM_QUERY, query, length, 1); } -int mc_mysql_read_query_result(MYSQL *mysql) + +int mc_mysql_read_query_result(MYSQL *mysql) { uchar *pos; ulong field_count; diff --git a/sql/slave.cc b/sql/slave.cc index 93a5c6171d..27e9030c00 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -443,14 +443,18 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock, } -int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock, +int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock, pthread_mutex_t *cond_lock, - pthread_cond_t* start_cond, - volatile bool* slave_running, + pthread_cond_t *start_cond, + volatile bool *slave_running, + volatile ulong *slave_run_id, MASTER_INFO* mi) { pthread_t th; + ulong start_id; DBUG_ASSERT(mi->inited); + DBUG_ENTER("start_slave_thread"); + if (start_lock) pthread_mutex_lock(start_lock); if (!server_id) @@ -460,7 +464,7 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock, if (start_lock) pthread_mutex_unlock(start_lock); sql_print_error("Server id not set, will not start slave"); - return ER_BAD_SLAVE; + DBUG_RETURN(ER_BAD_SLAVE); } if (*slave_running) @@ -469,39 +473,36 @@ int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock, pthread_cond_broadcast(start_cond); if (start_lock) pthread_mutex_unlock(start_lock); - return ER_SLAVE_MUST_STOP; + DBUG_RETURN(ER_SLAVE_MUST_STOP); } + start_id= *slave_run_id; + DBUG_PRINT("info",("Creating new slave thread")); if (pthread_create(&th, &connection_attrib, h_func, (void*)mi)) { if (start_lock) pthread_mutex_unlock(start_lock); - return ER_SLAVE_THREAD; + DBUG_RETURN(ER_SLAVE_THREAD); } if (start_cond && cond_lock) { THD* thd = current_thd; - while (!*slave_running) + while (start_id == *slave_run_id) { + DBUG_PRINT("sleep",("Waiting for slave thread to start")); const char* old_msg = thd->enter_cond(start_cond,cond_lock, "Waiting for slave thread to start"); pthread_cond_wait(start_cond,cond_lock); thd->exit_cond(old_msg); - /* - TODO: in a very rare case of init_slave_thread failing, it is - possible that we can get stuck here since slave_running will not - be set. We need to change slave_running to int and have -1 as - error code. - */ if (thd->killed) { pthread_mutex_unlock(cond_lock); - return ER_SERVER_SHUTDOWN; + DBUG_RETURN(ER_SERVER_SHUTDOWN); } } } if (start_lock) pthread_mutex_unlock(start_lock); - return 0; + DBUG_RETURN(0); } @@ -535,13 +536,15 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, if (thread_mask & SLAVE_IO) error=start_slave_thread(handle_slave_io,lock_io,lock_cond_io, - cond_io,&mi->slave_running, + cond_io, + &mi->slave_running, &mi->slave_run_id, mi); if (!error && (thread_mask & SLAVE_SQL)) { error=start_slave_thread(handle_slave_sql,lock_sql,lock_cond_sql, cond_sql, - &mi->rli.slave_running,mi); + &mi->rli.slave_running, &mi->rli.slave_run_id, + mi); if (error) terminate_slave_threads(mi, thread_mask & SLAVE_IO, 0); } @@ -1807,23 +1810,30 @@ This may also be a network problem, or just a bug in the master or slave code.\ /* slave I/O thread */ pthread_handler_decl(handle_slave_io,arg) { + THD *thd; // needs to be first for thread_stack + MYSQL *mysql; + MASTER_INFO *mi = (MASTER_INFO*)arg; + char llbuff[22]; + uint retry_count; + + // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff + my_thread_init(); + #ifndef DBUG_OFF slave_begin: #endif - THD *thd; // needs to be first for thread_stack - MYSQL *mysql = NULL ; - MASTER_INFO* mi = (MASTER_INFO*)arg; - char llbuff[22]; - uint retry_count= 0; DBUG_ASSERT(mi->inited); - + mysql= NULL ; + retry_count= 0; + pthread_mutex_lock(&mi->run_lock); + /* Inform waiting threads that slave has started */ + mi->slave_run_id++; + #ifndef DBUG_OFF mi->events_till_abort = abort_slave_event_count; #endif - // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff - my_thread_init(); thd= new THD; // note that contructor of THD uses DBUG_ ! DBUG_ENTER("handle_slave_io"); THD_CHECK_SENTRY(thd); @@ -2071,26 +2081,32 @@ err: pthread_handler_decl(handle_slave_sql,arg) { -#ifndef DBUG_OFF -slave_begin: -#endif THD *thd; /* needs to be first for thread_stack */ char llbuff[22],llbuff1[22]; RELAY_LOG_INFO* rli = &((MASTER_INFO*)arg)->rli; - const char* errmsg=0; + const char *errmsg; + + // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff + my_thread_init(); + +#ifndef DBUG_OFF +slave_begin: +#endif + DBUG_ASSERT(rli->inited); pthread_mutex_lock(&rli->run_lock); DBUG_ASSERT(!rli->slave_running); + errmsg= 0; #ifndef DBUG_OFF rli->events_till_abort = abort_slave_event_count; #endif - - // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff - my_thread_init(); - thd = new THD; // note that contructor of THD uses DBUG_ ! DBUG_ENTER("handle_slave_sql"); + thd = new THD; // note that contructor of THD uses DBUG_ ! THD_CHECK_SENTRY(thd); + /* Inform waiting threads that slave has started */ + rli->slave_run_id++; + pthread_detach_this_thread(); if (init_slave_thread(thd, SLAVE_THD_SQL)) { diff --git a/sql/slave.h b/sql/slave.h index 4be0178517..b527aceb43 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -154,6 +154,7 @@ typedef struct st_relay_log_info */ volatile uint32 slave_skip_counter; volatile ulong abort_pos_wait; /* Incremented on change master */ + volatile ulong slave_run_id; /* Incremented on slave start */ pthread_mutex_t log_space_lock; pthread_cond_t log_space_cond; THD * sql_thd; @@ -171,8 +172,8 @@ typedef struct st_relay_log_info st_relay_log_info() :info_fd(-1),cur_log_fd(-1), cur_log_old_open_count(0), abort_pos_wait(0), - inited(0), abort_slave(0), slave_running(0), log_pos_current(0), - skip_log_purge(0) + slave_run_id(0), inited(0), abort_slave(0), slave_running(0), + log_pos_current(0), skip_log_purge(0) { relay_log_name[0] = master_log_name[0] = 0; bzero(&info_file,sizeof(info_file)); @@ -283,11 +284,13 @@ typedef struct st_master_info bool inited; bool old_format; /* master binlog is in 3.23 format */ volatile bool abort_slave, slave_running; + volatile ulong slave_run_id; bool ignore_stop_event; - st_master_info():fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0), - slave_running(0) + st_master_info() + :fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0), + slave_running(0), slave_run_id(0) { host[0] = 0; user[0] = 0; password[0] = 0; bzero(&file, sizeof(file)); @@ -360,7 +363,8 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start, int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock, pthread_mutex_t *cond_lock, pthread_cond_t* start_cond, - volatile bool* slave_running, + volatile bool *slave_running, + volatile ulong *slave_run_id, MASTER_INFO* mi); /* If fd is -1, dump to NET */ -- 2.30.9