Commit 97e51d24 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-13697 DB_TRX_ID is not always reset

The rollback of the modification of a pre-existing record
should involve a purge-like operation. Before MDEV-12288
the only purge-like operation was the removal of a
delete-marked record.

After MDEV-12288, any rollback of updating an existing record
must reset the DB_TRX_ID column when it is no longer visible
in the purge read view.

row_vers_must_preserve_del_marked(): Remove. It is cleaner to
perform the check directly in row0umod.cc.

row_trx_id_offset(): Auxiliary function to retrieve the byte
offset of DB_TRX_ID in a clustered index leaf page record.

row_undo_mod_must_purge(): Determine if a record should be purged.

row_undo_mod_clust(): For temporary tables, skip the purge checks.
When rolling back an update so that the original record was not
delete-marked, reset DB_TRX_ID if the history is no longer visible.
parent 56074319
...@@ -7,9 +7,18 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1; ...@@ -7,9 +7,18 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = 1; SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL) CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB; ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
connect prevent_purge,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
INSERT INTO t1 VALUES(1,2),(3,4); INSERT INTO t1 VALUES(1,2),(3,4);
UPDATE t1 SET b=-3 WHERE a=3; UPDATE t1 SET b=-3 WHERE a=3;
connect con1,localhost,root;
BEGIN;
UPDATE t1 SET b=4 WHERE a=3;
disconnect prevent_purge;
connection default;
InnoDB 0 transactions not purged InnoDB 0 transactions not purged
disconnect con1;
FLUSH TABLE t1 FOR EXPORT; FLUSH TABLE t1 FOR EXPORT;
Clustered index root page contents: Clustered index root page contents:
N_RECS=2; LEVEL=0 N_RECS=2; LEVEL=0
......
...@@ -15,11 +15,24 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1; ...@@ -15,11 +15,24 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL) CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB; ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
--connect (prevent_purge,localhost,root)
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--connection default
INSERT INTO t1 VALUES(1,2),(3,4); INSERT INTO t1 VALUES(1,2),(3,4);
UPDATE t1 SET b=-3 WHERE a=3; UPDATE t1 SET b=-3 WHERE a=3;
# Initiate a full purge, which should reset all DB_TRX_ID. --connect (con1,localhost,root)
BEGIN;
# For purgeable records, we must record DB_TRX_ID=0 in the undo log!
UPDATE t1 SET b=4 WHERE a=3;
--disconnect prevent_purge
--connection default
# Initiate a full purge, which should reset the DB_TRX_ID except for a=3.
--source include/wait_all_purged.inc --source include/wait_all_purged.inc
# Initiate a ROLLBACK of the update, which should reset the DB_TRX_ID for a=3.
--disconnect con1
FLUSH TABLE t1 FOR EXPORT; FLUSH TABLE t1 FOR EXPORT;
# The following is based on innodb.table_flags: # The following is based on innodb.table_flags:
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation. Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -54,22 +54,6 @@ row_vers_impl_x_locked( ...@@ -54,22 +54,6 @@ row_vers_impl_x_locked(
dict_index_t* index, dict_index_t* index,
const ulint* offsets); const ulint* offsets);
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
@param[in] trx_id transaction id in the version
@param[in] name table name
@param[in,out] mtr mini transaction holding the latch on the
clustered index record; it will also hold
the latch on purge_view
@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
trx_id_t trx_id,
const table_name_t& name,
mtr_t* mtr);
/*****************************************************************//** /*****************************************************************//**
Finds out if a version of the record, where the version >= the current Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check purge view, should have ientry as its secondary index entry. We check
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation. Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -32,6 +32,7 @@ Created 2/27/1997 Heikki Tuuri ...@@ -32,6 +32,7 @@ Created 2/27/1997 Heikki Tuuri
#include "dict0boot.h" #include "dict0boot.h"
#include "trx0undo.h" #include "trx0undo.h"
#include "trx0roll.h" #include "trx0roll.h"
#include "trx0purge.h"
#include "btr0btr.h" #include "btr0btr.h"
#include "mach0data.h" #include "mach0data.h"
#include "ibuf0ibuf.h" #include "ibuf0ibuf.h"
...@@ -148,101 +149,55 @@ row_undo_mod_clust_low( ...@@ -148,101 +149,55 @@ row_undo_mod_clust_low(
return(err); return(err);
} }
/***********************************************************//** /** Get the byte offset of the DB_TRX_ID column
Purges a clustered index record after undo if possible. @param[in] rec clustered index record
This is attempted when the record was inserted by updating a @param[in] index clustered index
delete-marked record and there no longer exist transactions @return the byte offset of DB_TRX_ID, from the start of rec */
that would see the delete-marked record. static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_remove_clust_low(
/*==========================*/
undo_node_t* node, /*!< in: row undo node */
mtr_t* mtr, /*!< in/out: mini-transaction */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{ {
btr_cur_t* btr_cur; ut_ad(index->n_uniq <= MAX_REF_PARTS);
dberr_t err; ulint trx_id_offset = index->trx_id_offset;
ulint trx_id_offset;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
/* Find out if the record has been purged already
or if we can remove it. */
if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
|| row_vers_must_preserve_del_marked(node->new_trx_id,
node->table->name,
mtr)) {
return(DB_SUCCESS);
}
btr_cur = btr_pcur_get_btr_cur(&node->pcur);
trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
if (!trx_id_offset) { if (!trx_id_offset) {
mem_heap_t* heap = NULL; /* Reserve enough offsets for the PRIMARY KEY and 2 columns
ulint trx_id_col; so that we can access DB_TRX_ID, DB_ROLL_PTR. */
const ulint* offsets; ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
ulint len; mem_heap_t* heap = NULL;
const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
trx_id_col = dict_index_get_sys_col_pos( ulint* offsets = rec_get_offsets(rec, index, offsets_, true,
btr_cur_get_index(btr_cur), DATA_TRX_ID); trx_id_pos + 1, &heap);
ut_ad(trx_id_col > 0); ut_ad(!heap);
ut_ad(trx_id_col != ULINT_UNDEFINED); ulint len;
offsets = rec_get_offsets(
btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
NULL, true, trx_id_col + 1, &heap);
trx_id_offset = rec_get_nth_field_offs( trx_id_offset = rec_get_nth_field_offs(
offsets, trx_id_col, &len); offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN); ut_ad(len == DATA_TRX_ID_LEN);
mem_heap_free(heap);
} }
if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset) return trx_id_offset;
!= node->new_trx_id) { }
/* The record must have been purged and then replaced
with a different one. */
return(DB_SUCCESS);
}
/* We are about to remove an old, delete-marked version of the /** Determine if rollback must execute a purge-like operation.
record that may have been delete-marked by a different transaction @param[in,out] node row undo
than the rolling-back one. */ @param[in,out] mtr mini-transaction
ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur), @return whether the record should be purged */
dict_table_is_comp(node->table))); static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
/* In delete-marked records, DB_TRX_ID must {
always refer to an existing update_undo log record. */ ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)); ut_ad(!node->table->is_temporary());
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
? DB_SUCCESS
: DB_FAIL;
} else {
ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
/* This operation is analogous to purge, we can free also btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
inherited externally stored fields. ut_ad(btr_cur->index->is_primary());
We can also assume that the record was complete
(including BLOBs), because it had been delete-marked
after it had been completely inserted. Therefore, we
are passing rollback=false, just like purge does. */
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, mtr_s_lock(&purge_sys.latch, mtr);
false, mtr);
/* The delete operation may fail if we have little if (!purge_sys.view.changes_visible(node->new_trx_id,
file space left: TODO: easiest to crash the database node->table->name)) {
and restart with more file space */ return false;
} }
return(err); const rec_t* rec = btr_cur_get_rec(btr_cur);
return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
== node->new_trx_id;
} }
/***********************************************************//** /***********************************************************//**
...@@ -271,6 +226,7 @@ row_undo_mod_clust( ...@@ -271,6 +226,7 @@ row_undo_mod_clust(
log_free_check(); log_free_check();
pcur = &node->pcur; pcur = &node->pcur;
index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur)); index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
ut_ad(index->is_primary());
mtr.start(); mtr.start();
if (index->table->is_temporary()) { if (index->table->is_temporary()) {
...@@ -364,44 +320,122 @@ row_undo_mod_clust( ...@@ -364,44 +320,122 @@ row_undo_mod_clust(
btr_pcur_commit_specify_mtr(pcur, &mtr); btr_pcur_commit_specify_mtr(pcur, &mtr);
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { if (err != DB_SUCCESS) {
goto func_exit;
}
/* FIXME: Perform the below operations in the above
mini-transaction when possible. */
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
/* In delete-marked records, DB_TRX_ID must
always refer to an existing update_undo log record. */
ut_ad(node->new_trx_id);
mtr.start(); mtr.start();
if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
goto mtr_commit_exit;
}
if (index->table->is_temporary()) { if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);
} else { } else {
if (!row_undo_mod_must_purge(node, &mtr)) {
goto mtr_commit_exit;
}
index->set_modified(mtr); index->set_modified(mtr);
} }
/* It is not necessary to call row_log_table, ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
because the record is delete-marked and would thus dict_table_is_comp(node->table)));
be omitted from the rebuilt copy of the table. */ if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
err = row_undo_mod_remove_clust_low( goto mtr_commit_exit;
node, &mtr, BTR_MODIFY_LEAF); }
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr); btr_pcur_commit_specify_mtr(pcur, &mtr);
/* We may have to modify tree structure: do a mtr.start();
pessimistic descent down the index tree */ if (!btr_pcur_restore_position(
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
pcur, &mtr)) {
goto mtr_commit_exit;
}
mtr.start(); if (index->table->is_temporary()) {
if (index->table->is_temporary()) { mtr.set_log_mode(MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO); } else {
} else { if (!row_undo_mod_must_purge(node, &mtr)) {
index->set_modified(mtr); goto mtr_commit_exit;
} }
index->set_modified(mtr);
}
err = row_undo_mod_remove_clust_low( ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
node, &mtr, dict_table_is_comp(node->table)));
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
/* This operation is analogous to purge, we can free
also inherited externally stored fields. We can also
assume that the record was complete (including BLOBs),
because it had been delete-marked after it had been
completely inserted. Therefore, we are passing
rollback=false, just like purge does. */
btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
false, &mtr);
ut_ad(err == DB_SUCCESS
|| err == DB_OUT_OF_FILE_SPACE);
} else if (!index->table->is_temporary() && node->new_trx_id) {
/* We rolled back a record so that it still exists.
We must reset the DB_TRX_ID if the history is no
longer accessible by any active read view. */
ut_ad(err == DB_SUCCESS mtr.start();
|| err == DB_OUT_OF_FILE_SPACE); if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
goto mtr_commit_exit;
}
rec_t* rec = btr_pcur_get_rec(pcur);
mtr_s_lock(&purge_sys.latch, &mtr);
if (!purge_sys.view.changes_visible(node->new_trx_id,
node->table->name)) {
goto mtr_commit_exit;
} }
btr_pcur_commit_specify_mtr(pcur, &mtr); ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
ut_ad(index->n_uniq <= MAX_REF_PARTS);
/* Reserve enough offsets for the PRIMARY KEY and 2 columns
so that we can access DB_TRX_ID, DB_ROLL_PTR. */
ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
rec_offs_init(offsets_);
offsets = rec_get_offsets(
rec, index, offsets_, true, trx_id_pos + 2, &heap);
ulint len;
ulint trx_id_offset = rec_get_nth_field_offs(
offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
ut_ad(!rec_get_deleted_flag(
rec, dict_table_is_comp(node->table)));
index->set_modified(mtr);
if (page_zip_des_t* page_zip = buf_block_get_page_zip(
btr_pcur_get_block(&node->pcur))) {
page_zip_write_trx_id_and_roll_ptr(
page_zip, rec, offsets, trx_id_pos,
0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
&mtr);
} else {
mlog_write_string(rec + trx_id_offset,
reset_trx_id,
sizeof reset_trx_id, &mtr);
}
}
} else {
goto func_exit;
} }
mtr_commit_exit:
btr_pcur_commit_specify_mtr(pcur, &mtr);
func_exit:
node->state = UNDO_NODE_FETCH_NEXT; node->state = UNDO_NODE_FETCH_NEXT;
if (offsets_heap) { if (offsets_heap) {
......
...@@ -420,29 +420,6 @@ row_vers_impl_x_locked( ...@@ -420,29 +420,6 @@ row_vers_impl_x_locked(
return(trx); return(trx);
} }
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
@param[in] trx_id transaction id in the version
@param[in] name table name
@param[in,out] mtr mini transaction holding the latch on the
clustered index record; it will also hold
the latch on purge_view
@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
trx_id_t trx_id,
const table_name_t& name,
mtr_t* mtr)
{
ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
mtr_s_lock(&purge_sys.latch, mtr);
return(!purge_sys.view.changes_visible(trx_id, name));
}
/** build virtual column value from current cluster index record data /** build virtual column value from current cluster index record data
@param[in,out] row the cluster index row in dtuple form @param[in,out] row the cluster index row in dtuple form
@param[in] clust_index clustered index @param[in] clust_index clustered index
......
...@@ -945,8 +945,8 @@ trx_undo_page_report_modify( ...@@ -945,8 +945,8 @@ trx_undo_page_report_modify(
allowed to ignore blob prefixes if the delete marking was done allowed to ignore blob prefixes if the delete marking was done
by some other trx as it must have committed by now for us to by some other trx as it must have committed by now for us to
allow an over-write. */ allow an over-write. */
if (ignore_prefix) { if (trx_id == trx->id) {
ignore_prefix = (trx_id != trx->id); ignore_prefix = false;
} }
ptr += mach_u64_write_compressed(ptr, trx_id); ptr += mach_u64_write_compressed(ptr, trx_id);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment