Commit 0b1c0f31 authored by unknown's avatar unknown

Bug#31702 (Missing row on slave causes assertion failure under row-based replication):

When replicating an update pair (before image, after image) under row-based
replication, and the before image is not found on the slave, the after image
was not discared, and was hence read as a before image for the next row.
Eventually, this lead to an after image being read outside the block of rows
in the event, causing an assertion to fire.

This patch fixes this by reading the after image in the event that the row
was not found on the slave, adds some extra debug assertion to catch future
errors earlier, and also adds a few non-debug checks to prevent reading
outside the block of the event.


include/my_base.h:
  Adding error code HA_ERR_CORRUPT_EVENT.
mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result:
  Result change.
mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test:
  Adding test to try to use row-based replication to replicate an
  update of a row that doesn't exist on the slave. We should get
  an apropriate error and the slave should stop.
sql/log_event.cc:
  Adding debug printouts. Adding code to Update_rows_log_event::do_exec_row()
  so that the after image is read (and ignored) in the event of an error in
  finding the row. This is necessary so that the second pair of images is
  read correctly for the next update pair.
  
  Changing logic for ignoring errors to not include update events, since
  a "key not found" error or a "record changed" error is not idempotent
  for updates, just for deletes and inserts.
sql/log_event.h:
  Adding debug assertions to check that row reading is within the events block of rows.
parent 3f284594
...@@ -409,7 +409,9 @@ enum ha_base_keytype { ...@@ -409,7 +409,9 @@ enum ha_base_keytype {
#define HA_ERR_LOGGING_IMPOSSIBLE 170 /* It is not possible to log this #define HA_ERR_LOGGING_IMPOSSIBLE 170 /* It is not possible to log this
statement */ statement */
#define HA_ERR_LAST 170 /*Copy last error nr.*/ #define HA_ERR_CORRUPT_EVENT 171 /* The event was corrupt, leading to
illegal data being read */
#define HA_ERR_LAST 171 /*Copy last error nr.*/
/* Add error numbers before HA_ERR_LAST and change it accordingly. */ /* Add error numbers before HA_ERR_LAST and change it accordingly. */
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1) #define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)
......
...@@ -242,3 +242,34 @@ a b ...@@ -242,3 +242,34 @@ a b
3 1 3 1
4 4 4 4
drop table t1,t2; drop table t1,t2;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
**** On Master ****
SET SESSION BINLOG_FORMAT=ROW;
CREATE TABLE t1 (a INT PRIMARY KEY, b SET('master','slave'));
INSERT INTO t1 VALUES (1,'master,slave'), (2,'master,slave');
**** On Slave ****
UPDATE t1 SET a = 5, b = 'slave' WHERE a = 1;
SELECT * FROM t1 ORDER BY a;
a b
2 master,slave
5 slave
**** On Master ****
UPDATE t1 SET a = 5, b = 'master' WHERE a = 1;
SELECT * FROM t1 ORDER BY a;
a b
2 master,slave
5 master
**** On Slave ****
Last_SQL_Error
Error in Update_rows event: error during transaction execution on table test.t1. Can't find record in 't1'
SELECT * FROM t1 ORDER BY a;
a b
2 master,slave
5 slave
DROP TABLE t1;
**** On Master ****
DROP TABLE t1;
...@@ -223,3 +223,40 @@ connection master; ...@@ -223,3 +223,40 @@ connection master;
drop table t1,t2; drop table t1,t2;
sync_slave_with_master; sync_slave_with_master;
#
# BUG#31702: Missing row on slave causes assertion failure under
# row-based replication
#
disable_query_log;
source include/master-slave-reset.inc;
enable_query_log;
--echo **** On Master ****
connection master;
SET SESSION BINLOG_FORMAT=ROW;
CREATE TABLE t1 (a INT PRIMARY KEY, b SET('master','slave'));
INSERT INTO t1 VALUES (1,'master,slave'), (2,'master,slave');
--echo **** On Slave ****
sync_slave_with_master;
UPDATE t1 SET a = 5, b = 'slave' WHERE a = 1;
SELECT * FROM t1 ORDER BY a;
--echo **** On Master ****
connection master;
UPDATE t1 SET a = 5, b = 'master' WHERE a = 1;
save_master_pos;
SELECT * FROM t1 ORDER BY a;
--echo **** On Slave ****
connection slave;
source include/wait_for_slave_sql_error.inc;
let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Error, 1);
disable_query_log;
eval SELECT "$last_error" AS Last_SQL_Error;
enable_query_log;
SELECT * FROM t1 ORDER BY a;
DROP TABLE t1;
--echo **** On Master ****
connection master;
DROP TABLE t1;
...@@ -6173,14 +6173,19 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -6173,14 +6173,19 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
table->in_use = old_thd; table->in_use = old_thd;
switch (error) switch (error)
{ {
case 0:
break;
/* Some recoverable errors */ /* Some recoverable errors */
case HA_ERR_RECORD_CHANGED: case HA_ERR_RECORD_CHANGED:
case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if case HA_ERR_KEY_NOT_FOUND: /* Idempotency support: OK if
tuple does not exist */ tuple does not exist */
if (get_type_code() != UPDATE_ROWS_EVENT)
{
error= 0; error= 0;
case 0:
break; break;
}
/* Fall through in the event that we have an update event */
default: default:
rli->report(ERROR_LEVEL, thd->net.last_errno, rli->report(ERROR_LEVEL, thd->net.last_errno,
"Error in %s event: row application failed. %s", "Error in %s event: row application failed. %s",
...@@ -6197,6 +6202,10 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) ...@@ -6197,6 +6202,10 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
m_curr_row_end. m_curr_row_end.
*/ */
DBUG_PRINT("info", ("error: %d", error));
DBUG_PRINT("info", ("curr_row: 0x%lu; curr_row_end: 0x%lu; rows_end: 0x%lu",
(ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end));
if (!m_curr_row_end && !error) if (!m_curr_row_end && !error)
unpack_current_row(rli); unpack_current_row(rli);
...@@ -7931,7 +7940,15 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli) ...@@ -7931,7 +7940,15 @@ Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
int error= find_row(rli); int error= find_row(rli);
if (error) if (error)
{
/*
We need to read the second image in the event of error to be
able to skip to the next pair of updates
*/
m_curr_row= m_curr_row_end;
unpack_current_row(rli);
return error; return error;
}
/* /*
This is the situation after locating BI: This is the situation after locating BI:
......
...@@ -37,6 +37,23 @@ ...@@ -37,6 +37,23 @@
#include "rpl_reporting.h" #include "rpl_reporting.h"
#endif #endif
/**
Either assert or return an error.
In debug build, the condition will be checked, but in non-debug
builds, the error code given will be returned instead.
@param COND Condition to check
@param ERRNO Error number to return in non-debug builds
*/
#ifdef DBUG_OFF
#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \
do { if (!(COND)) return ERRNO; } while (0)
#else
#define ASSERT_OR_RETURN_ERROR(COND, ERRNO) \
DBUG_ASSERT(COND)
#endif
#define LOG_READ_EOF -1 #define LOG_READ_EOF -1
#define LOG_READ_BOGUS -2 #define LOG_READ_BOGUS -2
#define LOG_READ_IO -3 #define LOG_READ_IO -3
...@@ -2316,8 +2333,11 @@ protected: ...@@ -2316,8 +2333,11 @@ protected:
int unpack_current_row(const Relay_log_info *const rli) int unpack_current_row(const Relay_log_info *const rli)
{ {
DBUG_ASSERT(m_table); DBUG_ASSERT(m_table);
return ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols, ASSERT_OR_RETURN_ERROR(m_curr_row < m_rows_end, HA_ERR_CORRUPT_EVENT);
int const result= ::unpack_row(rli, m_table, m_width, m_curr_row, &m_cols,
&m_curr_row_end, &m_master_reclength); &m_curr_row_end, &m_master_reclength);
ASSERT_OR_RETURN_ERROR(m_curr_row_end <= m_rows_end, HA_ERR_CORRUPT_EVENT);
return result;
} }
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment