Commit f12bdc75 authored by John Esmet's avatar John Esmet

Pass down txn manager state to message application, which it can use to

run full garbage collection when a leafentry has > 1 committed entry.
parent f7323d26
......@@ -1065,6 +1065,10 @@ typedef enum {
LE_MAX_PROVISIONAL_XR,
LE_EXPANDED,
LE_MAX_MEMSIZE,
LE_APPLY_GC_BYTES_IN,
LE_APPLY_GC_BYTES_OUT,
LE_NORMAL_GC_BYTES_IN,
LE_NORMAL_GC_BYTES_OUT,
LE_STATUS_NUM_ROWS
} le_status_entry;
......@@ -1196,6 +1200,7 @@ toku_ft_bn_apply_cmd_once (
LEAFENTRY le,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdonep,
STAT64INFO stats_to_update
);
......@@ -1209,6 +1214,7 @@ toku_ft_bn_apply_cmd (
FT_MSG cmd,
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1222,6 +1228,7 @@ toku_ft_leaf_apply_cmd (
int target_childnum,
FT_MSG cmd,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
uint64_t *workdone,
STAT64INFO stats_to_update
);
......@@ -1236,6 +1243,7 @@ toku_ft_node_put_cmd (
FT_MSG cmd,
bool is_fresh,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
size_t flow_deltas[],
STAT64INFO stats_to_update
);
......
This diff is collapsed.
......@@ -230,6 +230,7 @@ int toku_testsetup_insert_to_leaf (FT_HANDLE brt, BLOCKNUM blocknum, const char
&cmd,
true,
make_gc_info(true),
nullptr,
zero_flow_deltas,
NULL
);
......
......@@ -2925,7 +2925,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
.xids = lbuf->xids,
.u = { .id = { &thekey, &theval } } };
uint64_t workdone=0;
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), &workdone, stats_to_update);
toku_ft_bn_apply_cmd_once(BLB(leafnode,0), &cmd, idx, NULL, TXNID_NONE, make_gc_info(true), nullptr, &workdone, stats_to_update);
}
static int write_literal(struct dbout *out, void*data, size_t len) {
......
......@@ -249,6 +249,7 @@ toku_le_apply_msg(FT_MSG msg,
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p);
......
......@@ -291,6 +291,7 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
txn_manager->last_xid = 0;
txn_manager->last_xid_seen_for_recover = TXNID_NONE;
txn_manager->last_calculated_oldest_referenced_xid = TXNID_NONE;
*txn_managerp = txn_manager;
}
......@@ -324,6 +325,10 @@ toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) {
return rval;
}
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager) {
return txn_manager->last_calculated_oldest_referenced_xid;
}
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids);
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){
(*referenced_xids)[index] = live_xid->txnid.parent_id64;
......@@ -371,7 +376,7 @@ max_xid(TXNID a, TXNID b) {
return a < b ? b : a;
}
static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
static void set_oldest_referenced_xid(TXN_MANAGER txn_manager) {
TXNID oldest_referenced_xid = TXNID_MAX;
int r;
if (txn_manager->live_root_ids.size() > 0) {
......@@ -397,8 +402,8 @@ static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
if (txn_manager->last_xid < oldest_referenced_xid) {
oldest_referenced_xid = txn_manager->last_xid;
}
paranoid_invariant(oldest_referenced_xid != TXNID_MAX);
return oldest_referenced_xid;
invariant(oldest_referenced_xid != TXNID_MAX);
txn_manager->last_calculated_oldest_referenced_xid = oldest_referenced_xid;
}
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
......@@ -672,7 +677,7 @@ void toku_txn_manager_start_txn(
r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx);
invariant_zero(r);
}
txn->oldest_referenced_xid = get_oldest_referenced_xid_unlocked(txn_manager);
set_oldest_referenced_xid(txn_manager);
if (needs_snapshot) {
txn_manager_create_snapshot_unlocked(
......@@ -825,7 +830,22 @@ void toku_txn_manager_clone_state_for_gc(
txn_manager_unlock(txn_manager);
}
void txn_manager_state::init(TXN_MANAGER txn_manager) {
invariant_notnull(txn_manager);
toku_txn_manager_clone_state_for_gc(
txn_manager,
&snapshot_xids,
&referenced_xids,
&live_root_txns
);
oldest_referenced_xid_for_simple_gc = txn_manager->last_calculated_oldest_referenced_xid;
}
void txn_manager_state::destroy() {
snapshot_xids.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
}
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) {
TOKUTXN txn;
......
......@@ -121,6 +121,28 @@ struct txn_manager {
TXNID last_xid;
TXNID last_xid_seen_for_recover;
TXNID last_calculated_oldest_referenced_xid;
};
struct txn_manager_state {
// a snapshot of the txn manager's mvcc state
xid_omt_t snapshot_xids;
rx_omt_t referenced_xids;
xid_omt_t live_root_txns;
// the oldest xid in any live list
//
// suitible for simple garbage collection that cleans up multiple committed
// transaction records into one. not suitible for implicit promotions, which
// must be correct in the face of abort messages - see ftnode->oldest_referenced_xid
TXNID oldest_referenced_xid_for_simple_gc;
txn_manager_state() { }
void init(TXN_MANAGER txn_manager);
void destroy();
private:
txn_manager_state(txn_manager_state &rhs); // shouldn't need to copy construct
};
......@@ -129,6 +151,8 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_referenced_xid_estimate(TXN_MANAGER txn_manager);
void toku_txn_manager_handle_snapshot_create_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
......
......@@ -116,7 +116,7 @@ PATENT RIGHTS GRANT:
#include "ule-internal.h"
#include <util/status.h>
#include <util/scoped_malloc.h>
#include <util/partitioned_counter.h>
#define ULE_DEBUG 0
......@@ -141,6 +141,10 @@ status_init(void) {
STATUS_INIT(LE_MAX_PROVISIONAL_XR, nullptr, UINT64, "max provisional xr", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_EXPANDED, nullptr, UINT64, "expanded", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_MAX_MEMSIZE, nullptr, UINT64, "max memsize", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_APPLY_GC_BYTES_IN, nullptr, PARCOUNT, "size of leafentries before garbage collection (during message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_APPLY_GC_BYTES_OUT, nullptr, PARCOUNT, "size of leafentries after garbage collection (during message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_NORMAL_GC_BYTES_IN, nullptr, PARCOUNT, "size of leafentries before garbage collection (outside message application)", TOKU_ENGINE_STATUS);
STATUS_INIT(LE_NORMAL_GC_BYTES_OUT,nullptr, PARCOUNT, "size of leafentries after garbage collection (outside message application)", TOKU_ENGINE_STATUS);
le_status.initialized = true;
}
#undef STATUS_INIT
......@@ -153,6 +157,14 @@ toku_le_get_status(LE_STATUS statp) {
}
#define STATUS_VALUE(x) le_status.status[x].value.num
#define STATUS_INC(x, d) \
do { \
if (le_status.status[x].type == PARCOUNT) { \
increment_partitioned_counter(le_status.status[x].value.parcount, d); \
} else { \
toku_sync_fetch_and_add(&le_status.status[x].value.num, d); \
} \
} while (0)
///////////////////////////////////////////////////////////////////////////////////
......@@ -441,6 +453,18 @@ ule_garbage_collect(ULE ule, const xid_omt_t &snapshot_xids, const rx_omt_t &ref
done:;
}
static size_t ule_packed_memsize(ULE ule) {
// Returns: The size 'ule' would be when packed into a leafentry, or 0 if the
// topmost committed value is a delete.
if (ule->num_cuxrs == 1 && ule->num_puxrs == 0) {
UXR uxr = ule_get_innermost_uxr(ule);
if (uxr_is_delete(uxr)) {
return 0;
}
}
return le_memsize_from_ule(ule);
}
/////////////////////////////////////////////////////////////////////////////////
// This is the big enchilada. (Bring Tums.) Note that this level of abstraction
// has no knowledge of the inner structure of either leafentry or msg. It makes
......@@ -462,6 +486,7 @@ toku_le_apply_msg(FT_MSG msg,
uint32_t idx, // index in data_buffer where leafentry is stored (and should be replaced
TXNID oldest_referenced_xid,
GC_INFO gc_info,
txn_manager_state *txn_state_for_gc,
LEAFENTRY *new_leafentry_p,
int64_t * numbytes_delta_p) { // change in total size of key and val, not including any overhead
paranoid_invariant_notnull(new_leafentry_p);
......@@ -486,7 +511,27 @@ toku_le_apply_msg(FT_MSG msg,
oldnumbytes = ule_get_innermost_numbytes(&ule, keylen);
}
msg_modify_ule(&ule, msg); // modify unpacked leafentry
ule_simple_garbage_collection(&ule, oldest_referenced_xid, gc_info);
// - we may be able to immediately promote the newly-apllied outermost provisonal uxr
// - either way, run simple gc first, and then full gc if there are still some committed uxrs.
ule_try_promote_provisional_outermost(&ule, oldest_referenced_xid);
ule_simple_garbage_collection(&ule,
txn_state_for_gc != nullptr ?
txn_state_for_gc->oldest_referenced_xid_for_simple_gc :
oldest_referenced_xid,
gc_info);
if (ule.num_cuxrs > 1 && txn_state_for_gc != nullptr) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
txn_state_for_gc->snapshot_xids,
txn_state_for_gc->referenced_xids,
txn_state_for_gc->live_root_txns
);
size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc);
}
int rval = le_pack(
&ule, // create packed leafentry
data_buffer,
......@@ -578,7 +623,18 @@ toku_le_garbage_collect(LEAFENTRY old_leaf_entry,
// garbage in leafentries.
TXNID oldest_possible_live_xid = oldest_referenced_xid_known;
ule_try_promote_provisional_outermost(&ule, oldest_possible_live_xid);
ule_garbage_collect(&ule, snapshot_xids, referenced_xids, live_root_txns);
// No need to run simple gc here if we're going straight for full gc.
if (ule.num_cuxrs > 1) {
size_t size_before_gc = ule_packed_memsize(&ule);
ule_garbage_collect(&ule,
snapshot_xids,
referenced_xids,
live_root_txns);
size_t size_after_gc = ule_packed_memsize(&ule);
STATUS_INC(LE_APPLY_GC_BYTES_IN, size_before_gc);
STATUS_INC(LE_APPLY_GC_BYTES_OUT, size_after_gc);
}
int r = le_pack(
&ule,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment