Commit 6a8d530b authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

#4771 split locktree.h into public and private .h files refs[t:4771]

git-svn-id: file:///svn/toku/tokudb@42872 c7de825b-a66e-492c-adef-691d508d4ae1
parent 65f8f6e4
......@@ -34,8 +34,8 @@ locktree.$(OEXT) locktree_nooverlap.$(OEXT) $(OBJS): CPPFLAGS+=-I$(TOKUROOT)incl
$(LOCKTREE): $(LOCKTREE_TLOG)
cp $< $@
.PHONY: build check
build: $(LIBRARIES)
.PHONY: build local check
build local: $(LIBRARIES)
locktree_nooverlap.$(OEXT): CPPFLAGS+=-DTOKU_RT_NOOVERLAPS
locktree_nooverlap.$(OEXT): locktree.c $(DEPEND_COMPILE)
......
#if !defined(TOKU_LOCKTREE_INTERNAL_H)
#define TOKU_LOCKTREE_INTERNAL_H
#include <rangetree.h>
#include <lth.h>
#include <rth.h>
#include <idlth.h>
#include <omt.h>
#define TOKU_LT_USE_BORDERWRITE 1
struct __toku_ltm {
/** The maximum number of locks allowed for the environment. */
uint64_t locks_limit;
/** The current number of locks for the environment. */
uint64_t curr_locks;
/** The maximum amount of memory for locks allowed for the environment. */
uint64_t lock_memory_limit;
/** The current amount of memory for locks for the environment. */
uint64_t curr_lock_memory;
/** Status / accountability information */
LTM_STATUS_S status;
/** The list of lock trees it manages. */
toku_lth* lth;
/** List of lock-tree DB mappings. Upon a request for a lock tree given
a DB, if an object for that DB exists in this list, then the lock tree
is retrieved from this list, otherwise, a new lock tree is created
and the new mapping of DB and Lock tree is stored here */
toku_idlth* idlth;
/** The panic function */
int (*panic)(DB*, int);
toku_pthread_mutex_t mutex;
bool mutex_locked;
struct timeval lock_wait_time;
};
/** \brief The lock tree structure */
struct __toku_lock_tree {
/** Lock tree manager */
toku_ltm* mgr;
/** The database for which this locktree will be handling locks */
DB* db;
#if TOKU_LT_USE_BORDERWRITE
toku_range_tree* borderwrite; /**< See design document */
#endif
toku_rth* rth; /**< Stores local(read|write)set tables */
/** Whether lock escalation is allowed. */
bool lock_escalation_allowed;
/** Function to retrieve the key compare function from the database. */
toku_dbt_cmp compare_fun;
/** The number of references held by DB instances and transactions to this lock tree*/
uint32_t ref_count;
/** DICTIONARY_ID associated with the lock tree */
DICTIONARY_ID dict_id;
OMT dbs; //The extant dbs using this lock tree.
OMT lock_requests;
toku_rth* txns_to_unlock; // set of txn's that could not release their locks because there was no db for the comparison function
toku_pthread_mutex_t mutex;
bool mutex_locked;
/** A temporary area where we store the results of various find on
the range trees that this lock tree owns
Memory ownership:
- tree->buf is an array of toku_range's, which the lt owns
The contents of tree->buf are volatile (this is a buffer space
that we pass around to various functions, and every time we
invoke a new function, its previous contents may become
meaningless)
- tree->buf[i].left, .right are toku_points (ultimately a struct),
also owned by lt. We gave a pointer only to this memory to the
range tree earlier when we inserted a range, but the range tree
does not own it!
- tree->buf[i].{left,right}.key_payload is owned by
the lt, we made copies from the DB at some point
*/
toku_range* buf;
uint32_t buflen; /**< The length of buf */
toku_range* bw_buf;
uint32_t bw_buflen;
toku_range* verify_buf;
uint32_t verify_buflen;
};
toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
#include "txnid_set.h"
// internal function that finds all transactions that conflict with a given lock request
// for read lock requests
// conflicts = all transactions in the BWT that conflict with the lock request
// for write lock requests
// conflicts = all transactions in the GRT that conflict with the lock request UNION
// all transactions in the BWT that conflict with the lock request
// adds all of the conflicting transactions to the conflicts transaction set
// returns an error code (0 == success)
int toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock_request, txnid_set *conflicts);
// returns the lock request state
toku_lock_request_state toku_lock_request_get_state(toku_lock_request *lock_request);
/**
\brief A 2D BDB-inspired point.
Observe the toku_point, and marvel!
It makes the pair (key, data) into a 1-dimensional point,
on which a total order is defined by toku_lt_point_cmp.
Additionally, we have points at +infty and -infty as
key_payload = (void*) toku_lt_infinity or
key_payload = (void*) toku_lt_neg infinity
*/
struct __toku_point {
toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp
is defined */
void* key_payload; /**< The key ... */
uint32_t key_len; /**< and its length */
};
#if !defined(__TOKU_POINT)
#define __TOKU_POINT
typedef struct __toku_point toku_point;
#endif
int toku_lt_point_cmp(const toku_point* x, const toku_point* y);
#endif
......@@ -11,6 +11,7 @@
#include <toku_portability.h>
#include "memory.h"
#include <locktree.h>
#include <locktree-internal.h>
#include <ydb-internal.h>
#include <brt-internal.h>
#include <toku_stdint.h>
......@@ -87,16 +88,17 @@ toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS statp) {
*statp = mgr->status;
}
static inline int lt_panic(toku_lock_tree *tree, int r) {
static inline int
lt_panic(toku_lock_tree *tree, int r) {
return tree->mgr->panic(tree->db, r);
}
// forward defs of lock request tree functions
static void toku_lock_request_tree_init(toku_lock_tree *tree);
static void toku_lock_request_tree_destroy(toku_lock_tree *tree);
static void toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request);
static void toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request);
static toku_lock_request *toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id);
static void lock_request_tree_init(toku_lock_tree *tree);
static void lock_request_tree_destroy(toku_lock_tree *tree);
static void lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request);
static void lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request);
static toku_lock_request *lock_request_tree_find(toku_lock_tree *tree, TXNID id);
const uint32_t __toku_default_buflen = 2;
......@@ -390,8 +392,8 @@ ltm_incr_lock_memory(toku_ltm *mgr, size_t s) {
(void) __sync_add_and_fetch(&mgr->curr_lock_memory, s);
}
void
toku_ltm_incr_lock_memory(void *extra, size_t s) {
static void
ltm_incr_lock_memory_callback(void *extra, size_t s) {
toku_ltm *mgr = (toku_ltm *) extra;
ltm_incr_lock_memory(mgr, s);
}
......@@ -402,8 +404,8 @@ ltm_decr_lock_memory(toku_ltm *mgr, size_t s) {
(void) __sync_sub_and_fetch(&mgr->curr_lock_memory, s);
}
void
toku_ltm_decr_lock_memory(void *extra, size_t s) {
static void
ltm_decr_lock_memory_callback(void *extra, size_t s) {
toku_ltm *mgr = (toku_ltm *) extra;
ltm_decr_lock_memory(mgr, s);
}
......@@ -503,7 +505,7 @@ lt_selfread(toku_lock_tree* tree, TXNID txn, toku_range_tree** pselfread) {
assert(forest);
if (!forest->self_read) {
r = toku_rt_create(&forest->self_read, toku_lt_point_cmp, lt_txn_cmp, FALSE,
toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, tree->mgr);
ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, tree->mgr);
if (r != 0)
goto cleanup;
assert(forest->self_read);
......@@ -532,7 +534,7 @@ lt_selfwrite(toku_lock_tree* tree, TXNID txn, toku_range_tree** pselfwrite) {
assert(forest);
if (!forest->self_write) {
r = toku_rt_create(&forest->self_write, toku_lt_point_cmp, lt_txn_cmp, FALSE,
toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, tree->mgr);
ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, tree->mgr);
if (r != 0)
goto cleanup;
assert(forest->self_write);
......@@ -589,6 +591,8 @@ lt_rt_dominates(toku_lock_tree* tree, toku_interval* query, toku_range_tree* rt,
return 0;
}
#if TOKU_LT_USE_BORDERWRITE
typedef enum {TOKU_NO_CONFLICT, TOKU_MAYBE_CONFLICT, TOKU_YES_CONFLICT} toku_conflict;
/*
......@@ -629,6 +633,7 @@ lt_borderwrite_conflict(toku_lock_tree* tree, TXNID self,
}
return 0;
}
#endif
/*
Determines whether 'query' meets 'rt'.
......@@ -1358,7 +1363,7 @@ toku_lt_create(toku_lock_tree** ptree,
tmp_tree->compare_fun = compare_fun;
tmp_tree->lock_escalation_allowed = TRUE;
r = toku_rt_create(&tmp_tree->borderwrite, toku_lt_point_cmp, lt_txn_cmp, FALSE,
toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, mgr);
ltm_incr_lock_memory_callback, ltm_decr_lock_memory_callback, mgr);
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->rth);
......@@ -1381,7 +1386,7 @@ toku_lt_create(toku_lock_tree** ptree,
r = toku_omt_create(&tmp_tree->dbs);
if (r != 0)
goto cleanup;
toku_lock_request_tree_init(tmp_tree);
lock_request_tree_init(tmp_tree);
toku_mutex_init(&tmp_tree->mutex, NULL);
tmp_tree->mutex_locked = false;
tmp_tree->ref_count = 1;
......@@ -1421,7 +1426,7 @@ toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id) {
}
static inline void
toku_lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id) {
lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id) {
assert(lt && dict_id.dictid != DICTIONARY_ID_NONE.dictid);
lt->dict_id = dict_id;
}
......@@ -1458,7 +1463,7 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB
r = toku_lt_create(&tree, mgr, compare_fun);
if (r != 0)
goto cleanup;
toku_lt_set_dict_id(tree, dict_id);
lt_set_dict_id(tree, dict_id);
/* add tree to ltm */
r = ltm_add_lt(mgr, tree);
if (r != 0)
......@@ -1515,7 +1520,7 @@ toku_lt_close(toku_lock_tree* tree) {
}
tree->mgr->STATUS_VALUE(LTM_LT_DESTROY)++;
tree->mgr->STATUS_VALUE(LTM_LT_NUM)--;
toku_lock_request_tree_destroy(tree);
lock_request_tree_destroy(tree);
r = toku_rt_close(tree->borderwrite);
if (!first_error && r != 0)
first_error = r;
......@@ -2050,6 +2055,7 @@ toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* k
return toku_lt_acquire_range_write_lock(tree, db, txn, key, key);
}
#if TOKU_LT_USE_BORDERWRITE
static inline int
sweep_border(toku_lock_tree* tree, toku_range* range) {
assert(tree && range);
......@@ -2153,6 +2159,7 @@ lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
return 0;
}
#endif
static inline int
lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
......@@ -2246,7 +2253,7 @@ toku_lt_add_ref(toku_lock_tree* tree) {
}
static void
toku_ltm_stop_managing_lt(toku_ltm* mgr, toku_lock_tree* tree) {
ltm_stop_managing_lt(toku_ltm* mgr, toku_lock_tree* tree) {
ltm_mutex_lock(mgr);
ltm_remove_lt(mgr, tree);
toku_lt_map* map = toku_idlth_find(mgr->idlth, tree->dict_id);
......@@ -2266,7 +2273,7 @@ toku_lt_remove_ref(toku_lock_tree* tree) {
r = 0; goto cleanup;
}
assert(tree->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
toku_ltm_stop_managing_lt(tree->mgr, tree);
ltm_stop_managing_lt(tree->mgr, tree);
r = toku_lt_close(tree);
if (r != 0)
goto cleanup;
......@@ -2327,7 +2334,7 @@ toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
}
static void
toku_lock_request_init_wait(toku_lock_request *lock_request) {
lock_request_init_wait(toku_lock_request *lock_request) {
if (!lock_request->wait_initialized) {
int r = toku_pthread_cond_init(&lock_request->wait, NULL); assert_zero(r);
lock_request->wait_initialized = true;
......@@ -2335,7 +2342,7 @@ toku_lock_request_init_wait(toku_lock_request *lock_request) {
}
static void
toku_lock_request_destroy_wait(toku_lock_request *lock_request) {
lock_request_destroy_wait(toku_lock_request *lock_request) {
if (lock_request->wait_initialized) {
int r = toku_pthread_cond_destroy(&lock_request->wait); assert_zero(r);
lock_request->wait_initialized = false;
......@@ -2378,16 +2385,16 @@ toku_lock_request_destroy(toku_lock_request *lock_request) {
if (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_tree *tree = lock_request->tree;
lt_mutex_lock(tree);
toku_lock_request_tree_delete(lock_request->tree, lock_request);
lock_request_tree_delete(lock_request->tree, lock_request);
lt_mutex_unlock(tree);
}
toku_lock_request_destroy_wait(lock_request);
lock_request_destroy_wait(lock_request);
toku_free(lock_request->key_left_copy.data);
toku_free(lock_request->key_right_copy.data);
}
static void
toku_lock_request_complete(toku_lock_request *lock_request, int complete_r) {
lock_request_complete(toku_lock_request *lock_request, int complete_r) {
lock_request->state = LOCK_REQUEST_COMPLETE;
lock_request->complete_r = complete_r;
}
......@@ -2395,7 +2402,7 @@ toku_lock_request_complete(toku_lock_request *lock_request, int complete_r) {
static const struct timeval max_timeval = { ~0, 0 };
static int
toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time, bool tree_locked) {
lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time, bool tree_locked) {
#if TOKU_LT_DEBUG
if (toku_lt_debug)
printf("%s:%u %lu\n", __FUNCTION__, __LINE__, lock_request->txnid);
......@@ -2411,21 +2418,21 @@ toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree
struct timespec ts = { sec + d_sec, d_usec * 1000 };
if (!tree_locked) lt_mutex_lock(tree);
while (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_init_wait(lock_request);
lock_request_init_wait(lock_request);
tree->mutex_locked = false;
r = pthread_cond_timedwait(&lock_request->wait, &tree->mutex, &ts);
tree->mutex_locked = true;
assert(r == 0 || r == ETIMEDOUT);
if (r == ETIMEDOUT && lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_tree_delete(tree, lock_request);
toku_lock_request_complete(lock_request, DB_LOCK_NOTGRANTED);
lock_request_tree_delete(tree, lock_request);
lock_request_complete(lock_request, DB_LOCK_NOTGRANTED);
}
}
if (!tree_locked) lt_mutex_unlock(tree);
} else {
if (!tree_locked) lt_mutex_lock(tree);
while (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_init_wait(lock_request);
lock_request_init_wait(lock_request);
tree->mutex_locked = false;
r = toku_pthread_cond_wait(&lock_request->wait, &tree->mutex); assert_zero(r);
tree->mutex_locked = true;
......@@ -2438,16 +2445,16 @@ toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree
int
toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time) {
return toku_lock_request_wait_internal(lock_request, tree, wait_time, false);
return lock_request_wait(lock_request, tree, wait_time, false);
}
int
toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree) {
return toku_lock_request_wait_internal(lock_request, tree, &tree->mgr->lock_wait_time, false);
return lock_request_wait(lock_request, tree, &tree->mgr->lock_wait_time, false);
}
void
toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree UU()) {
static void
lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree UU()) {
if (lock_request->wait_initialized) {
int r = toku_pthread_cond_broadcast(&lock_request->wait); assert_zero(r);
}
......@@ -2456,14 +2463,14 @@ toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree U
// a lock request tree contains pending lock requests.
// initialize a lock request tree.
static void
toku_lock_request_tree_init(toku_lock_tree *tree) {
lock_request_tree_init(toku_lock_tree *tree) {
int r = toku_omt_create(&tree->lock_requests); assert_zero(r);
}
// destroy a lock request tree.
// the tree must be empty when destroyed.
static void
toku_lock_request_tree_destroy(toku_lock_tree *tree) {
lock_request_tree_destroy(toku_lock_tree *tree) {
assert(toku_omt_size(tree->lock_requests) == 0);
toku_omt_destroy(&tree->lock_requests);
}
......@@ -2481,7 +2488,7 @@ compare_lock_request(OMTVALUE a, void *b) {
// insert a lock request into the tree.
static void
toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request) {
lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request) {
lock_request->tree = tree;
int r;
OMTVALUE v;
......@@ -2492,7 +2499,7 @@ toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_requ
// delete a lock request from the tree.
static void
toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request) {
lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request) {
int r;
OMTVALUE v;
u_int32_t idx;
......@@ -2504,7 +2511,7 @@ toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_requ
// find a lock request for a given transaction id.
static toku_lock_request *
toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id) {
lock_request_tree_find(toku_lock_tree *tree, TXNID id) {
int r;
OMTVALUE v;
u_int32_t idx;
......@@ -2526,7 +2533,8 @@ copy_dbt(DBT *dest, const DBT *src) {
#if TOKU_LT_DEBUG
#include <ctype.h>
static void print_key(const char *sp, const DBT *k) {
static void
print_key(const char *sp, const DBT *k) {
printf("%s", sp);
if (k == toku_lt_neg_infinity)
printf("-inf");
......@@ -2546,10 +2554,10 @@ static void print_key(const char *sp, const DBT *k) {
}
#endif
static void toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request);
static void lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request);
static int
toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted, bool do_escalation) {
lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted, bool do_escalation) {
assert(lock_request->state == LOCK_REQUEST_INIT);
assert(tree->mutex_locked);
int r = 0;
......@@ -2580,14 +2588,14 @@ toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *
if (!lt_is_infinite(lock_request->key_right))
lock_request->key_right = &lock_request->key_right_copy;
}
toku_lock_request_tree_insert(tree, lock_request);
lock_request_tree_insert(tree, lock_request);
// check for deadlock
toku_lt_check_deadlock(tree, lock_request);
lt_check_deadlock(tree, lock_request);
if (lock_request->state == LOCK_REQUEST_COMPLETE)
r = lock_request->complete_r;
} else
toku_lock_request_complete(lock_request, r);
lock_request_complete(lock_request, r);
return r;
}
......@@ -2595,23 +2603,23 @@ toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *
int
toku_lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted) {
lt_mutex_lock(tree);
int r = toku_lock_request_start_locked(lock_request, tree, copy_keys_if_not_granted, true);
int r = lock_request_start(lock_request, tree, copy_keys_if_not_granted, true);
lt_mutex_unlock(tree);
return r;
}
static int
toku_lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) {
int r = toku_lock_request_start_locked(lock_request, tree, false, true);
lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) {
int r = lock_request_start(lock_request, tree, false, true);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_internal(lock_request, tree, wait_time, true);
r = lock_request_wait(lock_request, tree, wait_time, true);
return r;
}
int
toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) {
lt_mutex_lock(tree);
int r = toku_lt_acquire_lock_request_with_timeout_locked(tree, lock_request, wait_time);
int r = lt_acquire_lock_request_with_timeout_locked(tree, lock_request, wait_time);
lt_mutex_unlock(tree);
return r;
}
......@@ -2634,9 +2642,9 @@ lt_retry_lock_requests(toku_lock_tree *tree) {
assert(lock_request->state == LOCK_REQUEST_PENDING);
lock_request->state = LOCK_REQUEST_INIT;
toku_omt_delete_at(tree->lock_requests, i);
r = toku_lock_request_start_locked(lock_request, tree, false, false);
r = lock_request_start(lock_request, tree, false, false);
if (lock_request->state == LOCK_REQUEST_COMPLETE) {
toku_lock_request_wakeup(lock_request, tree);
lock_request_wakeup(lock_request, tree);
} else {
assert(lock_request->state == LOCK_REQUEST_PENDING);
i++;
......@@ -2644,13 +2652,6 @@ lt_retry_lock_requests(toku_lock_tree *tree) {
}
}
void
toku_lt_retry_lock_requests(toku_lock_tree *tree) {
lt_mutex_lock(tree);
lt_retry_lock_requests(tree);
lt_mutex_unlock(tree);
}
#include <stdbool.h>
#include "wfg.h"
......@@ -2665,7 +2666,7 @@ build_wfg_for_a_lock_request(toku_lock_tree *tree, struct wfg *wfg, toku_lock_re
size_t n_conflicts = txnid_set_size(&conflicts);
for (size_t i = 0; i < n_conflicts; i++) {
TXNID b = txnid_set_get(&conflicts, i);
toku_lock_request *b_lock_request = toku_lock_request_tree_find(tree, b);
toku_lock_request *b_lock_request = lock_request_tree_find(tree, b);
if (b_lock_request) {
bool b_exists = wfg_node_exists(wfg, b);
wfg_add_edge(wfg, a_lock_request->txnid, b);
......@@ -2678,7 +2679,7 @@ build_wfg_for_a_lock_request(toku_lock_tree *tree, struct wfg *wfg, toku_lock_re
// check if a given lock request could deadlock with any granted locks.
static void
toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) {
lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) {
// init the wfg
struct wfg wfg_static;
struct wfg *wfg = &wfg_static; wfg_init(wfg);
......@@ -2695,9 +2696,9 @@ toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request)
// wakeup T's lock request
if (wfg_exist_cycle_from_txnid(wfg, a_lock_request->txnid)) {
assert(a_lock_request->state == LOCK_REQUEST_PENDING);
toku_lock_request_complete(a_lock_request, DB_LOCK_DEADLOCK);
toku_lock_request_tree_delete(tree, a_lock_request);
toku_lock_request_wakeup(a_lock_request, tree);
lock_request_complete(a_lock_request, DB_LOCK_DEADLOCK);
lock_request_tree_delete(tree, a_lock_request);
lock_request_wakeup(a_lock_request, tree);
}
// destroy the wfg
......
......@@ -6,6 +6,10 @@
#if !defined(TOKU_LOCKTREE_H)
#define TOKU_LOCKTREE_H
#include <stdbool.h>
#include <db.h>
#include <brttypes.h>
/**
\file locktree.h
\brief Lock trees: header and comments
......@@ -17,16 +21,6 @@
each other, due to some system error like failed malloc,
we defer to the db panic handler. Pass in another parameter to do this.
*/
#include <stdbool.h>
#include <db.h>
#include <brttypes.h>
#include <rangetree.h>
#include <lth.h>
#include <rth.h>
#include <idlth.h>
#include <omt.h>
#include "toku_pthread.h"
#include "toku_assert.h"
#if defined(__cplusplus)
extern "C" {
......@@ -53,135 +47,77 @@ typedef struct __toku_lock_tree toku_lock_tree;
typedef struct __toku_lth toku_lth;
#endif
#define TOKU_LT_USE_BORDERWRITE 1
typedef struct __toku_ltm toku_ltm;
/** \brief The lock tree structure */
struct __toku_lock_tree {
/** Lock tree manager */
toku_ltm* mgr;
/** The database for which this locktree will be handling locks */
DB* db;
toku_range_tree* borderwrite; /**< See design document */
toku_rth* rth; /**< Stores local(read|write)set tables */
/** Whether lock escalation is allowed. */
bool lock_escalation_allowed;
/** Function to retrieve the key compare function from the database. */
toku_dbt_cmp compare_fun;
/** The number of references held by DB instances and transactions to this lock tree*/
uint32_t ref_count;
/** DICTIONARY_ID associated with the lock tree */
DICTIONARY_ID dict_id;
OMT dbs; //The extant dbs using this lock tree.
OMT lock_requests;
toku_rth* txns_to_unlock; // set of txn's that could not release their locks because there was no db for the comparison function
toku_pthread_mutex_t mutex;
bool mutex_locked;
/** A temporary area where we store the results of various find on
the range trees that this lock tree owns
Memory ownership:
- tree->buf is an array of toku_range's, which the lt owns
The contents of tree->buf are volatile (this is a buffer space
that we pass around to various functions, and every time we
invoke a new function, its previous contents may become
meaningless)
- tree->buf[i].left, .right are toku_points (ultimately a struct),
also owned by lt. We gave a pointer only to this memory to the
range tree earlier when we inserted a range, but the range tree
does not own it!
- tree->buf[i].{left,right}.key_payload is owned by
the lt, we made copies from the DB at some point
*/
toku_range* buf;
uint32_t buflen; /**< The length of buf */
toku_range* bw_buf;
uint32_t bw_buflen;
toku_range* verify_buf;
uint32_t verify_buflen;
};
/* Lock tree manager functions begin here */
typedef enum {
LTM_LOCKS_LIMIT, // number of locks allowed (obsolete)
LTM_LOCKS_CURR, // number of locks in existence
LTM_LOCK_MEMORY_LIMIT, // maximum amount of memory allowed for locks
LTM_LOCK_MEMORY_CURR, // maximum amount of memory allowed for locks
LTM_LOCK_ESCALATION_SUCCESSES, // number of times lock escalation succeeded
LTM_LOCK_ESCALATION_FAILURES, // number of times lock escalation failed
LTM_READ_LOCK, // number of times read lock taken successfully
LTM_READ_LOCK_FAIL, // number of times read lock denied
LTM_OUT_OF_READ_LOCKS, // number of times read lock denied for out_of_locks
LTM_WRITE_LOCK, // number of times write lock taken successfully
LTM_WRITE_LOCK_FAIL, // number of times write lock denied
LTM_OUT_OF_WRITE_LOCKS, // number of times write lock denied for out_of_locks
LTM_LT_CREATE, // number of locktrees created
LTM_LT_CREATE_FAIL, // number of locktrees unable to be created
LTM_LT_DESTROY, // number of locktrees destroyed
LTM_LT_NUM, // number of locktrees (should be created - destroyed)
LTM_LT_NUM_MAX, // max number of locktrees that have existed simultaneously
LTM_STATUS_NUM_ROWS
} ltm_status_entry;
/**
Creates a lock tree manager.
\param pmgr A buffer for the new lock tree manager.
\param locks_limit The maximum number of locks.
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
} LTM_STATUS_S, *LTM_STATUS;
\return
- 0 on success.
- EINVAL if any pointer parameter is NULL.
- May return other errors due to system calls.
*/
int toku_ltm_create(toku_ltm** pmgr,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int));
/** Open the lock tree manager */
int toku_ltm_open(toku_ltm *mgr);
/**
Closes and frees a lock tree manager..
\param mgr The lock tree manager.
struct __toku_ltm {
/** The maximum number of locks allowed for the environment. */
uint64_t locks_limit;
/** The current number of locks for the environment. */
uint64_t curr_locks;
/** The maximum amount of memory for locks allowed for the environment. */
uint64_t lock_memory_limit;
/** The current amount of memory for locks for the environment. */
uint64_t curr_lock_memory;
/** Status / accountability information */
LTM_STATUS_S status;
/** The list of lock trees it manages. */
toku_lth* lth;
/** List of lock-tree DB mappings. Upon a request for a lock tree given
a DB, if an object for that DB exists in this list, then the lock tree
is retrieved from this list, otherwise, a new lock tree is created
and the new mapping of DB and Lock tree is stored here */
toku_idlth* idlth;
/** The panic function */
int (*panic)(DB*, int);
toku_pthread_mutex_t mutex;
bool mutex_locked;
struct timeval lock_wait_time;
};
extern const DBT* const toku_lt_infinity; /**< Special value denoting
+infty */
extern const DBT* const toku_lt_neg_infinity; /**< Special value denoting
-infty */
\return
- 0 on success.
- EINVAL if any pointer parameter is NULL.
- May return other errors due to system calls.
*/
int toku_ltm_close(toku_ltm* mgr);
/**
Sets the maximum number of locks on the lock tree manager.
\param mgr The lock tree manager to which to set locks_limit.
\param locks_limit The new maximum number of locks.
\return
- 0 on success.
- EINVAL if tree is NULL or locks_limit is 0
- EDOM if locks_limit is less than the number of locks held by any lock tree
held by the manager
*/
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit);
\brief A 2D BDB-inspired point.
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit);
Observe the toku_point, and marvel!
It makes the pair (key, data) into a 1-dimensional point,
on which a total order is defined by toku_lt_point_cmp.
Additionally, we have points at +infty and -infty as
key_payload = (void*) toku_lt_infinity or
key_payload = (void*) toku_lt_neg infinity
*/
struct __toku_point {
toku_lock_tree* lt; /**< The lock tree, where toku_lt_point_cmp
is defined */
void* key_payload; /**< The key ... */
uint32_t key_len; /**< and its length */
};
#if !defined(__TOKU_POINT)
#define __TOKU_POINT
typedef struct __toku_point toku_point;
#endif
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit);
// set the default lock timeout. units are milliseconds
void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec);
// get the default lock timeout
void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec);
/**
Gets a lock tree for a given DB with id dict_id
*/
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *dbp, toku_dbt_cmp compare_fun);
void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id);
extern const DBT* const toku_lt_infinity; /**< Special value denoting +infty */
extern const DBT* const toku_lt_neg_infinity; /**< Special value denoting -infty */
/**
Create a lock tree. Should be called only inside DB->open.
......@@ -371,93 +307,14 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
*/
int toku_lt_unlock_txn(toku_lock_tree* tree, TXNID txn);
void toku_lt_retry_lock_requests(toku_lock_tree *tree);
void toku_lt_add_ref(toku_lock_tree* tree);
int toku_lt_remove_ref(toku_lock_tree* tree);
void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db);
toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
void toku_lt_verify(toku_lock_tree *tree, DB *db);
int toku_lt_point_cmp(const toku_point* x, const toku_point* y);
/* Lock tree manager functions begin here */
/**
Creates a lock tree manager.
\param pmgr A buffer for the new lock tree manager.
\param locks_limit The maximum number of locks.
\return
- 0 on success.
- EINVAL if any pointer parameter is NULL.
- May return other errors due to system calls.
*/
int toku_ltm_create(toku_ltm** pmgr,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int));
/** Open the lock tree manager */
int toku_ltm_open(toku_ltm *mgr);
/**
Closes and frees a lock tree manager..
\param mgr The lock tree manager.
\return
- 0 on success.
- EINVAL if any pointer parameter is NULL.
- May return other errors due to system calls.
*/
int toku_ltm_close(toku_ltm* mgr);
/**
Sets the maximum number of locks on the lock tree manager.
\param mgr The lock tree manager to which to set locks_limit.
\param locks_limit The new maximum number of locks.
\return
- 0 on success.
- EINVAL if tree is NULL or locks_limit is 0
- EDOM if locks_limit is less than the number of locks held by any lock tree
held by the manager
*/
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit);
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit);
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit);
void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s);
// set the default lock timeout. units are milliseconds
void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec);
// get the default lock timeout
void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec);
/**
Gets a lock tree for a given DB with id dict_id
*/
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *dbp, toku_dbt_cmp compare_fun);
void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id);
void toku_ltm_incr_lock_memory(void *extra, size_t s);
void toku_ltm_decr_lock_memory(void *extra, size_t s);
typedef enum {
LOCK_REQUEST_INIT = 0,
LOCK_REQUEST_PENDING = 1,
......@@ -471,6 +328,19 @@ typedef enum {
LOCK_REQUEST_WRITE = 2,
} toku_lock_type;
#include "toku_pthread.h"
// a lock request contains the db, the key range, the lock type, and the transaction id that describes a potential row range lock.
// the typical use case is:
// - initialize a lock request
// - start to try to acquire the lock
// - do something else
// - wait for the lock request to be resolved on the wait condition variable and a timeout.
// - destroy the lock request
// a lock request is resolved when its state is no longer pending, or when it becomes granted, or timedout, or deadlocked.
// when resolved, the state of the lock request is changed and any waiting threads are awakened.
// this is exposed so that we can allocate these as local variables. don't touch
typedef struct {
DB *db;
TXNID txnid;
......@@ -484,16 +354,6 @@ typedef struct {
bool wait_initialized;
} toku_lock_request;
// a lock request contains the db, the key range, the lock type, and the transaction id that describes a potential row range lock.
// the typical use case is:
// - initialize a lock request
// - start to try to acquire the lock
// - do something else
// - wait for the lock request to be resolved on the wait condition variable and a timeout.
// - destroy the lock request
// a lock request is resolved when its state is no longer pending, or when it becomes granted, or timedout, or deadlocked.
// when resolved, the state of the lock request is changed and any waiting threads are awakened.
// initialize a lock request (default initializer).
void toku_lock_request_default_init(toku_lock_request *lock_request);
......@@ -519,12 +379,6 @@ int toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree
int toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree);
// wakeup any threads that are waiting on a lock request.
void toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree);
// returns the lock request state
toku_lock_request_state toku_lock_request_get_state(toku_lock_request *lock_request);
// try to acquire a lock described by a lock request. if the lock is granted then return success.
// otherwise wait on the lock request until the lock request is resolved (either granted or
// deadlocks), or the given timer has expired.
......@@ -533,17 +387,33 @@ int toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_re
int toku_lt_acquire_lock_request_with_default_timeout(toku_lock_tree *tree, toku_lock_request *lock_request);
#include "txnid_set.h"
// internal function that finds all transactions that conflict with a given lock request
// for read lock requests
// conflicts = all transactions in the BWT that conflict with the lock request
// for write lock requests
// conflicts = all transactions in the GRT that conflict with the lock request UNION
// all transactions in the BWT that conflict with the lock request
// adds all of the conflicting transactions to the conflicts transaction set
// returns an error code (0 == success)
int toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock_request, txnid_set *conflicts);
typedef enum {
LTM_LOCKS_LIMIT, // number of locks allowed (obsolete)
LTM_LOCKS_CURR, // number of locks in existence
LTM_LOCK_MEMORY_LIMIT, // maximum amount of memory allowed for locks
LTM_LOCK_MEMORY_CURR, // maximum amount of memory allowed for locks
LTM_LOCK_ESCALATION_SUCCESSES, // number of times lock escalation succeeded
LTM_LOCK_ESCALATION_FAILURES, // number of times lock escalation failed
LTM_READ_LOCK, // number of times read lock taken successfully
LTM_READ_LOCK_FAIL, // number of times read lock denied
LTM_OUT_OF_READ_LOCKS, // number of times read lock denied for out_of_locks
LTM_WRITE_LOCK, // number of times write lock taken successfully
LTM_WRITE_LOCK_FAIL, // number of times write lock denied
LTM_OUT_OF_WRITE_LOCKS, // number of times write lock denied for out_of_locks
LTM_LT_CREATE, // number of locktrees created
LTM_LT_CREATE_FAIL, // number of locktrees unable to be created
LTM_LT_DESTROY, // number of locktrees destroyed
LTM_LT_NUM, // number of locktrees (should be created - destroyed)
LTM_LT_NUM_MAX, // max number of locktrees that have existed simultaneously
LTM_STATUS_NUM_ROWS
} ltm_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
} LTM_STATUS_S, *LTM_STATUS;
void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s);
#if defined(__cplusplus)
}
......
......@@ -15,7 +15,6 @@
//Defines bool data type.
#include <db.h>
#include <brttypes.h>
#include <locktree.h>
#if defined(__cplusplus)
extern "C" {
......
#include <toku_portability.h>
#include <string.h>
#include <locktree.h>
#include <locktree-internal.h>
#include <db.h>
#include <brttypes.h>
#include <stdlib.h>
......
......@@ -6,12 +6,12 @@
#include <db.h>
#include "ydb-internal.h"
#include "ydb_row_lock.h"
#include "lth.h"
static int
toku_txn_add_lt(DB_TXN* txn, toku_lock_tree* lt) {
int r = ENOSYS;
assert(txn && lt);
toku_mutex_lock(&lt->mgr->mutex);
toku_lth* lth = db_txn_struct_i(txn)->lth;
// we used to initialize the transaction's lth during begin.
// Now we initialize the lth only if the transaction needs the lth, here
......@@ -33,7 +33,6 @@ toku_txn_add_lt(DB_TXN* txn, toku_lock_tree* lt) {
toku_lt_add_ref(lt);
r = 0;
cleanup:
toku_mutex_unlock(&lt->mgr->mutex);
return r;
}
......
......@@ -8,6 +8,7 @@
#include "checkpoint.h"
#include "log_header.h"
#include "ydb_txn.h"
#include "lth.h"
#include <valgrind/helgrind.h>
static int
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment