Commit b846739a authored by Rich Prohaska's avatar Rich Prohaska

#69 measure long tail lock tree and cache table stalls

parent 0e689c58
......@@ -558,6 +558,10 @@ private:
PARTITIONED_COUNTER m_size_leaf;
PARTITIONED_COUNTER m_size_rollback;
PARTITIONED_COUNTER m_size_cachepressure;
PARTITIONED_COUNTER m_wait_pressure_count;
PARTITIONED_COUNTER m_wait_pressure_time;
PARTITIONED_COUNTER m_long_wait_pressure_count;
PARTITIONED_COUNTER m_long_wait_pressure_time;
KIBBUTZ m_kibbutz;
......
......@@ -146,6 +146,11 @@ status_init(void) {
STATUS_INIT(CT_CLEANER_EXECUTIONS, CACHETABLE_CLEANER_EXECUTIONS, UINT64, "cleaner executions", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_PERIOD, CACHETABLE_CLEANER_PERIOD, UINT64, "cleaner period", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_CLEANER_ITERATIONS, CACHETABLE_CLEANER_ITERATIONS, UINT64, "cleaner iterations", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_COUNT, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "number of long waits on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(CT_LONG_WAIT_PRESSURE_TIME, CACHETABLE_WAIT_PRESSURE_COUNT, UINT64, "long time waiting on cache pressure", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
ct_status.initialized = true;
}
#undef STATUS_INIT
......@@ -3652,6 +3657,10 @@ void evictor::init(long _size_limit, pair_list* _pl, KIBBUTZ _kibbutz, uint32_t
m_size_leaf = create_partitioned_counter();
m_size_rollback = create_partitioned_counter();
m_size_cachepressure = create_partitioned_counter();
m_wait_pressure_count = create_partitioned_counter();
m_wait_pressure_time = create_partitioned_counter();
m_long_wait_pressure_count = create_partitioned_counter();
m_long_wait_pressure_time = create_partitioned_counter();
m_pl = _pl;
m_kibbutz = _kibbutz;
......@@ -3706,6 +3715,11 @@ void evictor::destroy() {
destroy_partitioned_counter(m_size_cachepressure);
m_size_cachepressure = NULL;
destroy_partitioned_counter(m_wait_pressure_count); m_wait_pressure_count = NULL;
destroy_partitioned_counter(m_wait_pressure_time); m_wait_pressure_time = NULL;
destroy_partitioned_counter(m_long_wait_pressure_count); m_long_wait_pressure_count = NULL;
destroy_partitioned_counter(m_long_wait_pressure_time); m_long_wait_pressure_time = NULL;
toku_cond_destroy(&m_flow_control_cond);
toku_cond_destroy(&m_ev_thread_cond);
toku_mutex_destroy(&m_ev_thread_lock);
......@@ -4173,13 +4187,21 @@ void evictor::decrease_size_evicting(long size_evicting_estimate) {
// size_evicting is number of bytes queued up to be evicted
//
void evictor::wait_for_cache_pressure_to_subside() {
uint64_t t0 = toku_current_time_microsec();
toku_mutex_lock(&m_ev_thread_lock);
m_num_sleepers++;
this->signal_eviction_thread();
toku_cond_wait(&m_flow_control_cond, &m_ev_thread_lock);
m_num_sleepers--;
toku_mutex_unlock(&m_ev_thread_lock);
uint64_t t1 = toku_current_time_microsec();
increment_partitioned_counter(m_wait_pressure_count, 1);
uint64_t tdelta = t1 - t0;
increment_partitioned_counter(m_wait_pressure_time, tdelta);
if (tdelta > 1000000) {
increment_partitioned_counter(m_long_wait_pressure_count, 1);
increment_partitioned_counter(m_long_wait_pressure_time, tdelta);
}
}
//
......@@ -4267,6 +4289,10 @@ void evictor::fill_engine_status() {
STATUS_VALUE(CT_SIZE_LEAF) = read_partitioned_counter(m_size_leaf);
STATUS_VALUE(CT_SIZE_ROLLBACK) = read_partitioned_counter(m_size_rollback);
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = read_partitioned_counter(m_size_cachepressure);
STATUS_VALUE(CT_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_wait_pressure_count);
STATUS_VALUE(CT_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_wait_pressure_time);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_COUNT) = read_partitioned_counter(m_long_wait_pressure_count);
STATUS_VALUE(CT_LONG_WAIT_PRESSURE_TIME) = read_partitioned_counter(m_long_wait_pressure_time);
}
////////////////////////////////////////////////////////////////////////////////
......
......@@ -609,6 +609,10 @@ typedef enum {
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
CT_CLEANER_PERIOD,
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
CT_WAIT_PRESSURE_COUNT,
CT_WAIT_PRESSURE_TIME,
CT_LONG_WAIT_PRESSURE_COUNT,
CT_LONG_WAIT_PRESSURE_TIME,
CT_STATUS_NUM_ROWS
} ct_status_entry;
......
......@@ -120,6 +120,10 @@ enum {
LTM_LONG_WAIT_COUNT,
LTM_LONG_WAIT_TIME,
LTM_TIMEOUT_COUNT,
LTM_WAIT_ESCALATION_COUNT,
LTM_WAIT_ESCALATION_TIME,
LTM_LONG_WAIT_ESCALATION_COUNT,
LTM_LONG_WAIT_ESCALATION_TIME,
LTM_STATUS_NUM_ROWS // must be last
};
......@@ -306,6 +310,8 @@ public:
void *extra);
int iterate_pending_lock_requests(lock_request_iterate_callback cb, void *extra);
void escalation_wait(uint64_t t);
private:
static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0;
......@@ -319,6 +325,10 @@ public:
uint64_t m_escalation_count;
tokutime_t m_escalation_time;
uint64_t m_escalation_latest_result;
uint64_t m_wait_escalation_count;
uint64_t m_wait_escalation_time;
uint64_t m_long_wait_escalation_count;
uint64_t m_long_wait_escalation_time;
struct lt_counters m_lt_counters;
......
......@@ -104,6 +104,10 @@ void locktree::manager::create(lt_create_cb create_cb, lt_destroy_cb destroy_cb,
m_current_lock_memory = 0;
m_escalation_count = 0;
m_escalation_time = 0;
m_wait_escalation_count = 0;
m_wait_escalation_time = 0;
m_long_wait_escalation_count = 0;
m_long_wait_escalation_time = 0;
m_escalation_latest_result = 0;
m_lock_wait_time_ms = DEFAULT_LOCK_WAIT_TIME;
m_mem_tracker.set_manager(this);
......@@ -350,6 +354,7 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
// mutex and check again. if we're still out of locks, run escalation.
// return an error if we're still out of locks after escalation.
if (out_of_locks()) {
uint64_t t0 = toku_current_time_microsec();
m_mgr->mutex_lock();
if (out_of_locks()) {
m_mgr->run_escalation();
......@@ -357,6 +362,8 @@ int locktree::manager::memory_tracker::check_current_lock_constraints(void) {
r = TOKUDB_OUT_OF_LOCKS;
}
}
uint64_t t1 = toku_current_time_microsec();
m_mgr->escalation_wait(t1 - t0);
m_mgr->mutex_unlock();
}
return r;
......@@ -404,6 +411,15 @@ int locktree::manager::iterate_pending_lock_requests(
return r;
}
void locktree::manager::escalation_wait(uint64_t t) {
m_wait_escalation_count += 1;
m_wait_escalation_time += t;
if (t >= 1000000) {
m_long_wait_escalation_count += 1;
m_long_wait_escalation_time += t;
}
}
#define STATUS_INIT(k,c,t,l,inc) TOKUDB_STATUS_INIT(status, k, c, t, "locktree: " l, inc)
void locktree::manager::status_init(void) {
......@@ -420,10 +436,15 @@ void locktree::manager::status_init(void) {
STATUS_INIT(LTM_WAIT_COUNT, LOCKTREE_WAIT_COUNT, UINT64, "number of wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_TIME, LOCKTREE_WAIT_TIME, UINT64, "time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks ", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_COUNT, LOCKTREE_LONG_WAIT_COUNT, UINT64, "number of long wait locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_TIME, LOCKTREE_LONG_WAIT_TIME, UINT64, "long time waiting for locks", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_TIMEOUT_COUNT, LOCKTREE_TIMEOUT_COUNT, UINT64, "number of lock timeouts", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_ESCALATION_COUNT, LOCKTREE_WAIT_ESCALATION_COUNT, UINT64, "number of waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_WAIT_ESCALATION_TIME, LOCKTREE_WAIT_ESCALATION_TIME, UINT64, "time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_COUNT, LOCKTREE_LONG_WAIT_ESCALATION_COUNT, UINT64, "number of long waits on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(LTM_LONG_WAIT_ESCALATION_TIME, LOCKTREE_LONG_WAIT_ESCALATION_TIME, UINT64, "long time waiting on lock escalation", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
status.initialized = true;
}
......@@ -441,6 +462,10 @@ void locktree::manager::get_status(LTM_STATUS statp) {
STATUS_VALUE(LTM_ESCALATION_COUNT) = m_escalation_count;
STATUS_VALUE(LTM_ESCALATION_TIME) = m_escalation_time;
STATUS_VALUE(LTM_ESCALATION_LATEST_RESULT) = m_escalation_latest_result;
STATUS_VALUE(LTM_WAIT_ESCALATION_COUNT) = m_wait_escalation_count;
STATUS_VALUE(LTM_WAIT_ESCALATION_TIME) = m_wait_escalation_time;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_COUNT) = m_long_wait_escalation_count;
STATUS_VALUE(LTM_LONG_WAIT_ESCALATION_TIME) = m_long_wait_escalation_time;
mutex_lock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment