Commit a40533b7 authored by Yoni Fogel's avatar Yoni Fogel

[t:5067] Merge #5067 to main. New GC algorithm that calculates...

[t:5067] Merge #5067 to main.  New GC algorithm that calculates "live_list_reverse" on the fly when needed

git-svn-id: file:///svn/toku/tokudb@44749 c7de825b-a66e-492c-adef-691d508d4ae1
parent d6b5f0b4
......@@ -2100,7 +2100,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
u_int32_t index,
LEAFENTRY leaf_entry,
OMT snapshot_xids,
OMT live_list_reverse,
OMT referenced_xids,
OMT live_root_txns,
STAT64INFO_S * delta)
{
......@@ -2129,7 +2129,7 @@ ft_basement_node_gc_once(BASEMENTNODE bn,
&bn->buffer_mempool,
&maybe_free,
snapshot_xids,
live_list_reverse,
referenced_xids,
live_root_txns);
// These will represent the number of bytes and rows changed as
......@@ -2171,7 +2171,7 @@ exit:
static void
basement_node_gc_all_les(BASEMENTNODE bn,
OMT snapshot_xids,
OMT live_list_reverse,
OMT referenced_xids,
OMT live_root_txns,
STAT64INFO_S * delta)
{
......@@ -2184,7 +2184,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
r = toku_omt_fetch(bn->buffer, index, &storedatav);
assert(r == 0);
leaf_entry = storedatav;
ft_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, live_list_reverse, live_root_txns, delta);
ft_basement_node_gc_once(bn, index, leaf_entry, snapshot_xids, referenced_xids, live_root_txns, delta);
// Check if the leaf entry was deleted or not.
if (num_leafentries_before == toku_omt_size(bn->buffer)) {
++index;
......@@ -2197,7 +2197,7 @@ static void
ft_leaf_gc_all_les(FTNODE node,
FT h,
OMT snapshot_xids,
OMT live_list_reverse,
OMT referenced_xids,
OMT live_root_txns)
{
toku_assert_entire_node_in_memory(node);
......@@ -2209,7 +2209,7 @@ ft_leaf_gc_all_les(FTNODE node,
STAT64INFO_S delta;
delta.numrows = 0;
delta.numbytes = 0;
basement_node_gc_all_les(bn, snapshot_xids, live_list_reverse, live_root_txns, &delta);
basement_node_gc_all_les(bn, snapshot_xids, referenced_xids, live_root_txns, &delta);
toku_ft_update_stats(&h->in_memory_stats, delta);
}
}
......@@ -2247,12 +2247,12 @@ toku_bnc_flush_to_child(
TOKULOGGER logger = toku_cachefile_logger(h->cf);
if (child->height == 0 && logger) {
OMT snapshot_txnids = NULL;
OMT live_list_reverse = NULL;
OMT referenced_xids = NULL;
OMT live_root_txns = NULL;
toku_txn_manager_clone_state_for_gc(
logger->txn_manager,
&snapshot_txnids,
&live_list_reverse,
&referenced_xids,
&live_root_txns
);
size_t buffsize = toku_fifo_buffer_size_in_use(bnc->buffer);
......@@ -2260,13 +2260,13 @@ toku_bnc_flush_to_child(
// may be misleading if there's a broadcast message in there
STATUS_VALUE(FT_MSG_BYTES_CURR) -= buffsize;
// Perform the garbage collection.
ft_leaf_gc_all_les(child, h, snapshot_txnids, live_list_reverse, live_root_txns);
ft_leaf_gc_all_les(child, h, snapshot_txnids, referenced_xids, live_root_txns);
// Free the OMT's we used for garbage collecting.
toku_omt_destroy(&snapshot_txnids);
toku_omt_destroy(&live_root_txns);
toku_omt_free_items_pool(live_list_reverse);
toku_omt_destroy(&live_list_reverse);
toku_omt_free_items_pool(referenced_xids);
toku_omt_destroy(&referenced_xids);
}
return 0;
......
......@@ -35,15 +35,6 @@ toku_find_xid_by_xid (OMTVALUE v, void *xidv) {
return 0;
}
int
toku_find_pair_by_xid (OMTVALUE v, void *xidv) {
XID_PAIR pair = v;
TXNID xidfind = (TXNID)xidv;
if (pair->xid1<xidfind) return -1;
if (pair->xid1>xidfind) return +1;
return 0;
}
void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
return malloc_in_memarena(log->rollentry_arena, size);
}
......
......@@ -53,7 +53,6 @@ void toku_maybe_spill_rollbacks(TOKUTXN txn, ROLLBACK_LOG_NODE log);
void toku_txn_maybe_note_ft (TOKUTXN txn, FT h);
int toku_logger_txn_rollback_raw_count(TOKUTXN txn, u_int64_t *raw_count);
int toku_find_pair_by_xid (OMTVALUE v, void *txnv);
int toku_find_xid_by_xid (OMTVALUE v, void *xidv);
PAIR_ATTR rollback_memory_size(ROLLBACK_LOG_NODE log);
......
......@@ -101,11 +101,6 @@ BOOL toku_is_txn_in_live_root_txn_list(OMT live_root_txn_list, TXNID xid);
TXNID toku_get_oldest_in_live_root_txn_list(TOKUTXN txn);
typedef struct {
TXNID xid1;
TXNID xid2;
} XID_PAIR_S, *XID_PAIR;
#include "txn_state.h"
TOKUTXN_STATE toku_txn_get_state(TOKUTXN txn);
......
......@@ -17,12 +17,9 @@ struct txn_manager {
OMT live_txns; // a sorted tree. Old comment said should be a hashtable. Do we still want that?
OMT live_root_txns; // a sorted tree.
OMT snapshot_txnids; //contains TXNID x | x is snapshot txn
//contains TXNID pairs (x,y) | y is oldest txnid s.t. x is in y's live list
// every TXNID that is in some snapshot's live list is used as the key for this OMT, x, as described above.
// The second half of the pair, y, is the youngest snapshot txnid (that is, has the highest LSN), such that x is in its live list.
// So, for example, Say T_800 begins, T_800 commits right after snapshot txn T_1100 begins. Then (800,1100) is in
// this list
OMT live_list_reverse;
// Contains 3-tuples: (TXNID begin_id, TXNID end_id, uint64_t num_live_list_references)
// for committed root transaction ids that are still referenced by a live list.
OMT referenced_xids;
TXNID oldest_living_xid;
time_t oldest_living_starttime; // timestamp in seconds of when txn with oldest_living_xid started
struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover.
......@@ -60,18 +57,27 @@ static BOOL is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) {
return (result != NULL);
}
struct referenced_xid_tuple {
TXNID begin_id;
TXNID end_id;
uint32_t references;
};
//Heaviside function to search through an OMT by a TXNID
static int find_by_xid (OMTVALUE v, void *txnidv);
static void
verify_snapshot_system(TXN_MANAGER txn_manager) {
int num_snapshot_txnids = toku_omt_size(txn_manager->snapshot_txnids);
verify_snapshot_system(TXN_MANAGER txn_manager UU()) {
uint32_t num_snapshot_txnids = toku_omt_size(txn_manager->snapshot_txnids);
TXNID snapshot_txnids[num_snapshot_txnids];
int num_live_txns = toku_omt_size(txn_manager->live_txns);
uint32_t num_live_txns = toku_omt_size(txn_manager->live_txns);
TOKUTXN live_txns[num_live_txns];
int num_live_list_reverse = toku_omt_size(txn_manager->live_list_reverse);
XID_PAIR live_list_reverse[num_live_list_reverse];
uint32_t num_referenced_xid_tuples = toku_omt_size(txn_manager->referenced_xids);
struct referenced_xid_tuple *referenced_xid_tuples[num_referenced_xid_tuples];
int r;
int i;
int j;
uint32_t i;
uint32_t j;
//set up arrays for easier access
for (i = 0; i < num_snapshot_txnids; i++) {
OMTVALUE v;
......@@ -85,11 +91,11 @@ verify_snapshot_system(TXN_MANAGER txn_manager) {
assert_zero(r);
live_txns[i] = v;
}
for (i = 0; i < num_live_list_reverse; i++) {
for (i = 0; i < num_referenced_xid_tuples; i++) {
OMTVALUE v;
r = toku_omt_fetch(txn_manager->live_list_reverse, i, &v);
r = toku_omt_fetch(txn_manager->referenced_xids, i, &v);
assert_zero(r);
live_list_reverse[i] = v;
referenced_xid_tuples[i] = v;
}
{
......@@ -99,7 +105,7 @@ verify_snapshot_system(TXN_MANAGER txn_manager) {
invariant(is_txnid_live(txn_manager, snapshot_xid));
TOKUTXN snapshot_txn;
toku_txn_manager_id2txn_unlocked(txn_manager, snapshot_xid, &snapshot_txn);
int num_live_root_txn_list = toku_omt_size(snapshot_txn->live_root_txn_list);
uint32_t num_live_root_txn_list = toku_omt_size(snapshot_txn->live_root_txn_list);
TXNID live_root_txn_list[num_live_root_txn_list];
{
for (j = 0; j < num_live_root_txn_list; j++) {
......@@ -109,41 +115,96 @@ verify_snapshot_system(TXN_MANAGER txn_manager) {
live_root_txn_list[j] = (TXNID)v;
}
}
{
// Only committed entries have return a youngest.
TXNID youngest = toku_get_youngest_live_list_txnid_for(
snapshot_xid,
txn_manager->snapshot_txnids,
txn_manager->referenced_xids
);
invariant(youngest == TXNID_NONE);
}
for (j = 0; j < num_live_root_txn_list; j++) {
TXNID live_xid = live_root_txn_list[j];
invariant(live_xid <= snapshot_xid);
TXNID youngest = toku_get_youngest_live_list_txnid_for(
live_xid,
txn_manager->live_list_reverse
live_xid,
txn_manager->snapshot_txnids,
txn_manager->referenced_xids
);
invariant(youngest!=TXNID_NONE);
invariant(youngest>=snapshot_xid);
if (is_txnid_live(txn_manager, live_xid)) {
// Only committed entries have return a youngest.
invariant(youngest == TXNID_NONE);
}
else {
invariant(youngest != TXNID_NONE);
// This snapshot reads 'live_xid' so it's youngest cannot be older than snapshot_xid.
invariant(youngest >= snapshot_xid);
}
}
}
}
{
//Verify live_list_reverse
for (i = 0; i < num_live_list_reverse; i++) {
XID_PAIR pair = live_list_reverse[i];
invariant(pair->xid1 <= pair->xid2);
// Verify referenced_xids.
for (i = 0; i < num_referenced_xid_tuples; i++) {
struct referenced_xid_tuple *tuple = referenced_xid_tuples[i];
invariant(tuple->begin_id < tuple->end_id);
invariant(tuple->references > 0);
{
//verify pair->xid2 is in snapshot_xids
u_int32_t index;
OMTVALUE v2;
//verify neither pair->begin_id nor end_id is in live_list
r = toku_omt_find_zero(txn_manager->live_txns,
find_by_xid,
(OMTVALUE) tuple->begin_id, NULL, NULL);
invariant(r == DB_NOTFOUND);
r = toku_omt_find_zero(txn_manager->live_txns,
find_by_xid,
(OMTVALUE) tuple->end_id, NULL, NULL);
invariant(r == DB_NOTFOUND);
}
{
//verify neither pair->begin_id nor end_id is in snapshot_xids
r = toku_omt_find_zero(txn_manager->snapshot_txnids,
toku_find_xid_by_xid,
(OMTVALUE) pair->xid2, &v2, &index);
assert_zero(r);
(OMTVALUE) tuple->begin_id, NULL, NULL);
invariant(r == DB_NOTFOUND);
r = toku_omt_find_zero(txn_manager->snapshot_txnids,
toku_find_xid_by_xid,
(OMTVALUE) tuple->end_id, NULL, NULL);
invariant(r == DB_NOTFOUND);
}
for (j = 0; j < num_live_txns; j++) {
TOKUTXN txn = live_txns[j];
if (txn->snapshot_type != TXN_SNAPSHOT_NONE) {
BOOL expect = txn->snapshot_txnid64 >= pair->xid1 &&
txn->snapshot_txnid64 <= pair->xid2;
BOOL found = toku_is_txn_in_live_root_txn_list(txn->live_root_txn_list, pair->xid1);
invariant((expect==FALSE) == (found==FALSE));
{
// Verify number of references is correct
uint32_t refs_found = 0;
for (j = 0; j < num_snapshot_txnids; j++) {
TXNID snapshot_xid = snapshot_txnids[j];
TOKUTXN snapshot_txn;
toku_txn_manager_id2txn_unlocked(txn_manager, snapshot_xid, &snapshot_txn);
if (toku_is_txn_in_live_root_txn_list(snapshot_txn->live_root_txn_list, tuple->begin_id)) {
refs_found++;
}
invariant(!toku_is_txn_in_live_root_txn_list(
snapshot_txn->live_root_txn_list,
tuple->end_id));
}
invariant(refs_found == tuple->references);
}
{
// Verify youngest makes sense.
TXNID youngest = toku_get_youngest_live_list_txnid_for(
tuple->begin_id,
txn_manager->snapshot_txnids,
txn_manager->referenced_xids
);
invariant(youngest != TXNID_NONE);
invariant(youngest > tuple->begin_id);
invariant(youngest < tuple->end_id);
// Youngest must be found, and must be a snapshot txn
r = toku_omt_find_zero(txn_manager->snapshot_txnids,
toku_find_xid_by_xid,
(OMTVALUE) youngest, NULL, NULL);
invariant_zero(r);
}
}
}
......@@ -192,18 +253,17 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
int r = 0;
TXN_MANAGER XCALLOC(txn_manager);
toku_mutex_init(&txn_manager->txn_manager_lock, NULL);
r = toku_omt_create(&txn_manager->live_txns);
r = toku_omt_create(&txn_manager->live_txns);
assert_zero(r);
r = toku_omt_create(&txn_manager->live_root_txns);
assert_zero(r);
r = toku_omt_create(&txn_manager->snapshot_txnids);
assert_zero(r);
r = toku_omt_create(&txn_manager->live_list_reverse);
r = toku_omt_create(&txn_manager->referenced_xids);
assert_zero(r);
txn_manager->oldest_living_xid = TXNID_NONE_LIVING;
txn_manager->oldest_living_starttime = 0;
txn_manager->last_xid = 0;
//TODO(yoni): #5062 get this from somewhere
toku_list_init(&txn_manager->prepared_txns);
toku_list_init(&txn_manager->prepared_and_returned_txns);
......@@ -216,15 +276,15 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager) {
toku_omt_destroy(&txn_manager->live_txns);
toku_omt_destroy(&txn_manager->live_root_txns);
toku_omt_destroy(&txn_manager->snapshot_txnids);
toku_omt_destroy(&txn_manager->live_list_reverse);
toku_omt_destroy(&txn_manager->referenced_xids);
toku_cond_destroy(&txn_manager->wait_for_unpin_of_txn);
toku_free(txn_manager);
}
static TXNID txn_manager_get_oldest_living_xid_unlocked(
TXN_MANAGER txn_manager,
TXN_MANAGER txn_manager,
time_t * oldest_living_starttime
)
)
{
TXNID rval = 0;
rval = txn_manager->oldest_living_xid;
......@@ -265,54 +325,6 @@ snapshot_txnids_note_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
return r;
}
// If live txn is not in reverse live list, then add it.
// If live txn is in reverse live list, update it by setting second xid in pair to new txn that is being started.
static int
live_list_reverse_note_txn_start_iter(OMTVALUE live_xidv, u_int32_t UU(index), void*txnv) {
TOKUTXN txn = txnv;
TXNID xid = txn->txnid64; // xid of new txn that is being started
TXNID live_xid = (TXNID)live_xidv; // xid on the new txn's live list
OMTVALUE pairv;
XID_PAIR pair;
uint32_t idx;
int r;
OMT reverse = txn->logger->txn_manager->live_list_reverse;
r = toku_omt_find_zero(reverse, toku_find_pair_by_xid, (void *)live_xid, &pairv, &idx);
if (r==0) {
pair = pairv;
invariant(pair->xid1 == live_xid); //sanity check
invariant(pair->xid2 < xid); //Must be older
pair->xid2 = txn->txnid64;
}
else {
invariant(r==DB_NOTFOUND);
//Make new entry
XMALLOC(pair);
pair->xid1 = live_xid;
pair->xid2 = txn->txnid64;
r = toku_omt_insert_at(reverse, pair, idx);
assert_zero(r);
}
return r;
}
// Maintain the reverse live list. The reverse live list is a list of xid pairs. The first xid in the pair
// is a txn that was live when some txn began, and the second xid in the pair is the newest still-live xid to
// have that first xid in its live list. (The first xid may be closed, it only needed to be live when the
// second txn began.)
// When a new txn begins, we need to scan the live list of this new txn. For each live txn, we either
// add it to the reverse live list (if it is not already there), or update to the reverse live list so
// that this new txn is the second xid in the pair associated with the txn in the live list.
static int
live_list_reverse_note_txn_start(TOKUTXN txn) {
int r;
r = toku_omt_iterate(txn->live_root_txn_list, live_list_reverse_note_txn_start_iter, txn);
assert_zero(r);
return r;
}
static TXNID
max_xid(TXNID a, TXNID b) {
return a < b ? b : a;
......@@ -388,21 +400,19 @@ int toku_txn_manager_start_txn(
// 2. if the transaction is creating a snapshot:
// - create a live list for the transaction
// - add the id to the list of snapshot ids
// - make the necessary modifications to the live_list_reverse
//
// The order of operations is important here, and must be taken
// into account when the transaction is closed. The txn is added
// to the live_root_txns first (if it is a root txn). This has the implication
// that a root level snapshot transaction is in its own live list. This fact
// is taken into account when the transaction is closed.
//
// add ancestor information, and maintain global live root txn list
if (parent == NULL) {
//Add txn to list (omt) of live root txns
r = toku_omt_insert_at(
txn_manager->live_root_txns,
(OMTVALUE) txn->txnid64,
txn_manager->live_root_txns,
(OMTVALUE) txn->txnid64,
toku_omt_size(txn_manager->live_root_txns)
); //We know it is the newest one.
assert_zero(r);
......@@ -417,8 +427,6 @@ int toku_txn_manager_start_txn(
assert_zero(r);
r = snapshot_txnids_note_txn(txn_manager, txn);
assert_zero(r);
r = live_list_reverse_note_txn_start(txn);
assert_zero(r);
}
// in this case, it is a child transaction that specified its snapshot to be that
// of the root transaction
......@@ -439,69 +447,84 @@ int toku_txn_manager_start_txn(
return 0;
}
// For each xid on the closing txn's live list, find the corresponding entry in the reverse live list.
// There must be one.
// If the second xid in the pair is not the xid of the closing transaction, then the second xid must be newer
// than the closing txn, and there is nothing to be done (except to assert the invariant).
// If the second xid in the pair is the xid of the closing transaction, then we need to find the next oldest
// txn. If the live_xid is in the live list of the next oldest txn, then set the next oldest txn as the
// second xid in the pair, otherwise delete the entry from the reverse live list.
static int
live_list_reverse_note_txn_end_iter(OMTVALUE live_xidv, u_int32_t UU(index), void*txnv) {
TOKUTXN txn = txnv;
TXNID xid = txn->txnid64; // xid of txn that is closing
TXNID live_xid = (TXNID)live_xidv; // xid on closing txn's live list
OMTVALUE pairv;
XID_PAIR pair;
uint32_t idx;
find_tuple_by_xid (OMTVALUE v, void *xidv) {
struct referenced_xid_tuple *tuple = v;
TXNID xidfind = (TXNID)xidv;
if (tuple->begin_id < xidfind) return -1;
if (tuple->begin_id > xidfind) return +1;
return 0;
}
TXNID
toku_get_youngest_live_list_txnid_for(TXNID xc, OMT snapshot_txnids, OMT referenced_xids) {
OMTVALUE tuplev;
struct referenced_xid_tuple *tuple;
int r;
OMT reverse = txn->logger->txn_manager->live_list_reverse;
r = toku_omt_find_zero(reverse, toku_find_pair_by_xid, (void *)live_xid, &pairv, &idx);
invariant(r==0);
pair = pairv;
invariant(pair->xid1 == live_xid); //sanity check
if (pair->xid2 == xid) {
//There is a record that needs to be either deleted or updated
TXNID olderxid;
OMTVALUE olderv;
uint32_t olderidx;
OMT snapshot = txn->logger->txn_manager->snapshot_txnids;
BOOL should_delete = TRUE;
// find the youngest txn in snapshot that is older than xid
r = toku_omt_find(snapshot, toku_find_xid_by_xid, (OMTVALUE) xid, -1, &olderv, &olderidx);
if (r==0) {
//There is an older txn
olderxid = (TXNID) olderv;
invariant(olderxid < xid);
if (olderxid >= live_xid) {
//older txn is new enough, we need to update.
pair->xid2 = olderxid;
should_delete = FALSE;
}
}
else {
invariant(r==DB_NOTFOUND);
}
if (should_delete) {
//Delete record
toku_free(pair);
r = toku_omt_delete_at(reverse, idx);
invariant(r==0);
}
TXNID rval = TXNID_NONE;
r = toku_omt_find_zero(referenced_xids, find_tuple_by_xid, (OMTVALUE)xc, &tuplev, NULL);
if (r == DB_NOTFOUND) {
goto done;
}
else {
invariant(pair->xid2 > xid);
tuple = tuplev;
TXNID endid = tuple->end_id;
TXNID live;
OMTVALUE livev;
r = toku_omt_find(snapshot_txnids, toku_find_xid_by_xid, (OMTVALUE)endid, -1, &livev, NULL);
if (r == DB_NOTFOUND) {
goto done;
}
return r;
live = (TXNID)livev;
invariant(live < tuple->end_id);
if (live > tuple->begin_id) {
rval = live;
}
done:
return rval;
}
static int
referenced_xids_note_snapshot_txn_end_iter(OMTVALUE live_xidv, u_int32_t UU(index), void *referenced_xidsv) {
OMT referenced_xids = referenced_xidsv;
TXNID live_xid = (TXNID)live_xidv; // xid on closing txn's live list
int r;
uint32_t idx;
struct referenced_xid_tuple *tuple;
OMTVALUE tuplev;
r = toku_omt_find_zero(
referenced_xids,
find_tuple_by_xid,
(OMTVALUE)live_xid,
&tuplev,
&idx);
if (r == DB_NOTFOUND) {
goto done;
}
invariant_zero(r);
invariant(tuplev != NULL);
tuple = tuplev;
invariant(tuple->references > 0);
if (--tuple->references == 0) {
r = toku_omt_delete_at(referenced_xids, idx);
lazy_assert_zero(r);
}
done:
return 0;
}
// When txn ends, update reverse live list. To do that, examine each txn in this (closing) txn's live list.
static int
live_list_reverse_note_txn_end(TOKUTXN txn) {
referenced_xids_note_snapshot_txn_end(TXN_MANAGER mgr, OMT live_root_txn_list) {
int r;
r = toku_omt_iterate(txn->live_root_txn_list, live_list_reverse_note_txn_end_iter, txn);
r = toku_omt_iterate(
live_root_txn_list,
referenced_xids_note_snapshot_txn_end_iter,
mgr->referenced_xids);
invariant(r==0);
return r;
}
......@@ -533,40 +556,57 @@ void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
invariant_zero(r);
}
if (txn->parent==NULL) {
bool is_snapshot = (txn->snapshot_type != TXN_SNAPSHOT_NONE && (txn->parent==NULL || txn->snapshot_type == TXN_SNAPSHOT_CHILD));
uint32_t index_in_snapshot_txnids;
if (is_snapshot) {
OMTVALUE v;
u_int32_t idx;
//Remove txn from list of live root txns
r = toku_omt_find_zero(txn_manager->live_root_txns, toku_find_xid_by_xid, (OMTVALUE)txn->txnid64, &v, &idx);
//Free memory used for snapshot_txnids
r = toku_omt_find_zero(txn_manager->snapshot_txnids, toku_find_xid_by_xid, (OMTVALUE) txn->txnid64, &v, &index_in_snapshot_txnids);
invariant_zero(r);
TXNID xid = (TXNID) v;
invariant(xid == txn->txnid64);
r = toku_omt_delete_at(txn_manager->live_root_txns, idx);
r = toku_omt_delete_at(txn_manager->snapshot_txnids, index_in_snapshot_txnids);
invariant_zero(r);
referenced_xids_note_snapshot_txn_end(txn_manager, txn->live_root_txn_list);
//Free memory used for live root txns local list
invariant(toku_omt_size(txn->live_root_txn_list) > 0);
toku_omt_destroy(&txn->live_root_txn_list);
}
//
// if this txn created a snapshot, make necessary modifications to list of snapshot txnids and live_list_reverse
// the order of operations is important. We first remove the txnid from the list of snapshot txnids. This is
// necessary because root snapshot transactions are in their own live lists. If we do not remove
// the txnid from the snapshot txnid list first, then when we go to make the modifications to
// live_list_reverse, we have trouble. We end up never removing (id, id) from live_list_reverse
//
if (txn->snapshot_type != TXN_SNAPSHOT_NONE && (txn->parent==NULL || txn->snapshot_type == TXN_SNAPSHOT_CHILD)) {
u_int32_t idx;
if (txn->parent==NULL) {
OMTVALUE v;
//Free memory used for snapshot_txnids
r = toku_omt_find_zero(txn_manager->snapshot_txnids, toku_find_xid_by_xid, (OMTVALUE) txn->txnid64, &v, &idx);
u_int32_t idx;
//Remove txn from list of live root txns
r = toku_omt_find_zero(txn_manager->live_root_txns, toku_find_xid_by_xid, (OMTVALUE)txn->txnid64, &v, &idx);
invariant_zero(r);
TXNID xid = (TXNID) v;
invariant(xid == txn->txnid64);
r = toku_omt_delete_at(txn_manager->snapshot_txnids, idx);
r = toku_omt_delete_at(txn_manager->live_root_txns, idx);
invariant_zero(r);
live_list_reverse_note_txn_end(txn);
//Free memory used for live root txns local list
invariant(toku_omt_size(txn->live_root_txn_list) > 0);
toku_omt_destroy(&txn->live_root_txn_list);
if (!is_snapshot) {
// If it's a snapshot, we already calculated index_in_snapshot_txnids.
r = toku_omt_find_zero(txn_manager->snapshot_txnids, toku_find_xid_by_xid, (OMTVALUE) txn->txnid64, NULL, &index_in_snapshot_txnids);
invariant(r == DB_NOTFOUND);
}
uint32_t num_references = toku_omt_size(txn_manager->snapshot_txnids) - index_in_snapshot_txnids;
if (num_references > 0) {
// This transaction exists in a live list of another transaction.
struct referenced_xid_tuple *XMALLOC(tuple);
tuple->begin_id = txn->txnid64;
tuple->end_id = ++txn_manager->last_xid;
tuple->references = num_references;
r = toku_omt_insert(
txn_manager->referenced_xids,
tuple,
find_tuple_by_xid,
(OMTVALUE)txn->txnid64,
NULL);
lazy_assert_zero(r);
}
}
assert(txn_manager->oldest_living_xid <= txn->txnid64);
......@@ -594,20 +634,20 @@ void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
}
void toku_txn_manager_clone_state_for_gc(
TXN_MANAGER txn_manager,
OMT* snapshot_xids,
OMT* live_list_reverse,
TXN_MANAGER txn_manager,
OMT* snapshot_xids,
OMT* referenced_xids,
OMT* live_root_txns
)
)
{
int r = 0;
toku_mutex_lock(&txn_manager->txn_manager_lock);
r = toku_omt_clone_noptr(snapshot_xids,
txn_manager->snapshot_txnids);
assert_zero(r);
r = toku_omt_clone_pool(live_list_reverse,
txn_manager->live_list_reverse,
sizeof(XID_PAIR_S));
r = toku_omt_clone_pool(referenced_xids,
txn_manager->referenced_xids,
sizeof(struct referenced_xid_tuple));
assert_zero(r);
r = toku_omt_clone_noptr(live_root_txns,
txn_manager->live_root_txns);
......@@ -619,7 +659,7 @@ void toku_txn_manager_clone_state_for_gc(
static int
find_by_xid (OMTVALUE v, void *txnidv) {
TOKUTXN txn = v;
TXNID txnidfind = *(TXNID*)txnidv;
TXNID txnidfind = (TXNID)txnidv;
if (txn->txnid64<txnidfind) return -1;
if (txn->txnid64>txnidfind) return +1;
return 0;
......@@ -627,7 +667,7 @@ find_by_xid (OMTVALUE v, void *txnidv) {
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID txnid, TOKUTXN *result) {
OMTVALUE txnfound;
int r = toku_omt_find_zero(txn_manager->live_txns, find_by_xid, &txnid, &txnfound, NULL);
int r = toku_omt_find_zero(txn_manager->live_txns, find_by_xid, (OMTVALUE)txnid, &txnfound, NULL);
if (r==0) {
TOKUTXN txn = txnfound;
assert(txn->txnid64==txnid);
......
......@@ -47,9 +47,9 @@ int toku_txn_manager_start_txn(
void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_clone_state_for_gc(
TXN_MANAGER txn_manager,
OMT* snapshot_xids,
OMT* live_list_reverse,
TXN_MANAGER txn_manager,
OMT* snapshot_xids,
OMT* referenced_xids,
OMT* live_root_txns
);
......@@ -61,8 +61,8 @@ int toku_txn_manager_get_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid
u_int32_t toku_txn_manager_num_live_txns(TXN_MANAGER txn_manager);
int toku_txn_manager_iter_over_live_txns(
TXN_MANAGER txn_manager,
int (*f)(OMTVALUE, u_int32_t, void*),
TXN_MANAGER txn_manager,
int (*f)(OMTVALUE, u_int32_t, void*),
void* v
);
......@@ -71,10 +71,10 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
int toku_txn_manager_recover_txn(
TXN_MANAGER txn_manager,
struct tokulogger_preplist preplist[/*count*/],
long count,
long *retp, /*out*/
TXN_MANAGER txn_manager,
struct tokulogger_preplist preplist[/*count*/],
long count,
long *retp, /*out*/
u_int32_t flags
);
......@@ -91,6 +91,7 @@ TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr);
// Test-only function
void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT snapshot_txnids, OMT referenced_xids);
#if defined(__cplusplus) || defined(__cilkplusplus)
}
#endif
......
......@@ -27,6 +27,7 @@
#include "xids.h"
#include "ft_msg.h"
#include "ule.h"
#include "txn_manager.h"
#include "ule-internal.h"
......@@ -188,36 +189,16 @@ get_next_older_txnid(TXNID xc, OMT omt) {
return xid;
}
TXNID
toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse) {
OMTVALUE pairv;
XID_PAIR pair;
uint32_t idx;
TXNID rval;
int r;
r = toku_omt_find_zero(live_list_reverse, toku_find_pair_by_xid, (void *)xc, &pairv, &idx);
if (r==0) {
pair = pairv;
invariant(pair->xid1 == xc); //sanity check
rval = pair->xid2;
}
else {
invariant(r==DB_NOTFOUND);
rval = TXNID_NONE;
}
return rval;
}
//
// This function returns TRUE if live transaction TL1 is allowed to read a value committed by
// transaction xc, false otherwise.
//
static BOOL
xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT live_list_reverse) {
xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT snapshot_txnids, OMT referenced_xids) {
BOOL rval;
if (tl1 < xc) rval = FALSE; //cannot read a newer txn
else {
TXNID x = toku_get_youngest_live_list_txnid_for(xc, live_list_reverse);
TXNID x = toku_get_youngest_live_list_txnid_for(xc, snapshot_txnids, referenced_xids);
if (x == TXNID_NONE) rval = TRUE; //Not in ANY live list, tl1 can read it.
else rval = tl1 > x; //Newer than the 'newest one that has it in live list'
// we know tl1 > xc
......@@ -230,7 +211,7 @@ xid_reads_committed_xid(TXNID tl1, TXNID xc, OMT live_list_reverse) {
}
static void
garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_root_txns) {
garbage_collection(ULE ule, OMT snapshot_xids, OMT referenced_xids, OMT live_root_txns) {
if (ule->num_cuxrs == 1) goto done;
// will fail if too many num_cuxrs
BOOL necessary_static[MAX_TRANSACTION_RECORDS];
......@@ -266,7 +247,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r
continue;
}
tl1 = toku_get_youngest_live_list_txnid_for(xc, live_list_reverse);
tl1 = toku_get_youngest_live_list_txnid_for(xc, snapshot_xids, referenced_xids);
if (tl1 == xc) {
// if tl1 == xc, that means xc should be live and show up in
// live_root_txns, which we check above. So, if we get
......@@ -294,7 +275,7 @@ garbage_collection(ULE ule, OMT snapshot_xids, OMT live_list_reverse, OMT live_r
curr_committed_entry--;
while (curr_committed_entry > 0) {
xc = ule->uxrs[curr_committed_entry].xid;
if (xid_reads_committed_xid(tl1, xc, live_list_reverse)) {
if (xid_reads_committed_xid(tl1, xc, snapshot_xids, referenced_xids)) {
break;
}
curr_committed_entry--;
......@@ -387,7 +368,7 @@ apply_msg_to_leafentry(FT_MSG msg, // message to apply to leafentry
// the memory completely, if we removed the leaf entry.
// -- snapshot_xids : we use these in memory transaction ids to
// determine what to garbage collect.
// -- live_list_reverse : list of in memory active transactions.
// -- referenced_xids : list of in memory active transactions.
// NOTE: it is not a good idea to garbage collect a leaf
// entry with only one committed value.
int
......@@ -398,15 +379,15 @@ garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
OMT referenced_xids,
OMT live_root_txns) {
int r = 0;
ULE_S ule;
le_unpack(&ule, old_leaf_entry);
assert(snapshot_xids);
assert(live_list_reverse);
assert(referenced_xids);
assert(live_root_txns);
garbage_collection(&ule, snapshot_xids, live_list_reverse, live_root_txns);
garbage_collection(&ule, snapshot_xids, referenced_xids, live_root_txns);
r = le_pack(&ule,
new_leaf_entry_memory_size,
new_leaf_entry,
......
......@@ -69,11 +69,9 @@ int garbage_collect_leafentry(LEAFENTRY old_leaf_entry,
struct mempool *mp,
void **maybe_free,
OMT snapshot_xids,
OMT live_list_reverse,
OMT referenced_xids,
OMT live_root_txns);
TXNID toku_get_youngest_live_list_txnid_for(TXNID xc, OMT live_list_reverse);
#if defined(__cplusplus) || defined(__cilkplusplus)
}
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment