Commit 24ca9e75 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

Fix #5295, #5292. {{{svn merge -r46285:46297 ../tokudb.5295b}}}

git-svn-id: file:///svn/toku/tokudb@46309 c7de825b-a66e-492c-adef-691d508d4ae1
parent c28b01aa
......@@ -20,6 +20,7 @@
#include "log-internal.h"
#include "kibbutz.h"
#include "background_job_manager.h"
#include "partitioned_counter.h"
///////////////////////////////////////////////////////////////////////////////////
// Engine status
......@@ -30,12 +31,40 @@
// These should be in the cachetable object, but we make them file-wide so that gdb can get them easily.
// They were left here after engine status cleanup (#2949, rather than moved into the status struct)
// so they are still easily available to the debugger and to save lots of typing.
static uint64_t cachetable_miss;
static uint64_t cachetable_misstime; // time spent waiting for disk read
static uint64_t cachetable_puts; // how many times has a newly created node been put into the cachetable?
static uint64_t cachetable_prefetches; // how many times has a block been prefetched into the cachetable?
static uint64_t cachetable_evictions;
static uint64_t cleaner_executions; // number of times the cleaner thread's loop has executed
// If we had constructors and destructors, this would be cleaner. For now, we initialize with setup_cachetable_statistics().
static PARTITIONED_COUNTER cachetable_miss;
static PARTITIONED_COUNTER cachetable_misstime; // time spent waiting for disk read
static PARTITIONED_COUNTER cachetable_puts; // how many times has a newly created node been put into the cachetable?
static PARTITIONED_COUNTER cachetable_prefetches; // how many times has a block been prefetched into the cachetable?
static PARTITIONED_COUNTER cachetable_evictions;
static PARTITIONED_COUNTER cleaner_executions; // number of times the cleaner thread's loop has executed
static bool cachetables_inited = false;
void toku_cachetables_init(void) {
assert(!cachetables_inited);
cachetables_inited = true;
cachetable_miss = create_partitioned_counter();
cachetable_misstime = create_partitioned_counter();
cachetable_puts = create_partitioned_counter();
cachetable_prefetches = create_partitioned_counter();
cachetable_evictions = create_partitioned_counter();
cleaner_executions = create_partitioned_counter();
}
void toku_cachetables_destroy(void) {
#define DESTROY(x) destroy_partitioned_counter(x); x=NULL;
assert(cachetables_inited);
cachetables_inited = false;
DESTROY(cachetable_miss);
DESTROY(cachetable_misstime);
DESTROY(cachetable_puts);
DESTROY(cachetable_prefetches);
DESTROY(cachetable_evictions);
DESTROY(cleaner_executions);
#undef DESTROY
}
static CACHETABLE_STATUS_S ct_status;
......@@ -183,10 +212,10 @@ toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) {
if (!ct_status.initialized) {
status_init();
}
STATUS_VALUE(CT_MISS) = cachetable_miss;
STATUS_VALUE(CT_MISSTIME) = cachetable_misstime;
STATUS_VALUE(CT_PUTS) = cachetable_puts;
STATUS_VALUE(CT_PREFETCHES) = cachetable_prefetches;
STATUS_VALUE(CT_MISS) = read_partitioned_counter(cachetable_miss);
STATUS_VALUE(CT_MISSTIME) = read_partitioned_counter(cachetable_misstime);
STATUS_VALUE(CT_PUTS) = read_partitioned_counter(cachetable_puts);
STATUS_VALUE(CT_PREFETCHES) = read_partitioned_counter(cachetable_prefetches);
STATUS_VALUE(CT_SIZE_CURRENT) = ct->size_current;
STATUS_VALUE(CT_SIZE_LIMIT) = ct->size_limit;
STATUS_VALUE(CT_SIZE_WRITING) = ct->size_evicting;
......@@ -194,8 +223,8 @@ toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) {
STATUS_VALUE(CT_SIZE_LEAF) = ct->size_leaf;
STATUS_VALUE(CT_SIZE_ROLLBACK) = ct->size_rollback;
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = ct->size_cachepressure;
STATUS_VALUE(CT_EVICTIONS) = cachetable_evictions;
STATUS_VALUE(CT_CLEANER_EXECUTIONS) = cleaner_executions;
STATUS_VALUE(CT_EVICTIONS) = read_partitioned_counter(cachetable_evictions);
STATUS_VALUE(CT_CLEANER_EXECUTIONS) = read_partitioned_counter(cleaner_executions);
STATUS_VALUE(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct);
STATUS_VALUE(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct);
*statp = ct_status;
......@@ -857,7 +886,7 @@ static void cachetable_free_pair(CACHETABLE ct, PAIR p) {
void *write_extraargs = p->write_extraargs;
PAIR_ATTR old_attr = p->attr;
cachetable_evictions++;
increment_partitioned_counter(cachetable_evictions, 1);
cachetable_unlock(ct);
PAIR_ATTR new_attr = p->attr;
// Note that flush_callback is called with write_me false, so the only purpose of this
......@@ -1279,7 +1308,7 @@ static int cachetable_put_internal(
}
}
// flushing could change the table size, but wont' change the fullhash
cachetable_puts++;
increment_partitioned_counter(cachetable_puts, 1);
PAIR p = cachetable_insert_at(
ct,
cachefile,
......@@ -1916,8 +1945,8 @@ int toku_cachetable_get_and_pin_with_dep_pairs (
// The pair being fetched will be marked as pending if a checkpoint happens during the
// fetch because begin_checkpoint will mark as pending any pair that is locked even if it is clean.
cachetable_fetch_pair(ct, cachefile, p, fetch_callback, read_extraargs, true);
cachetable_miss++;
cachetable_misstime += get_tnow() - t0;
increment_partitioned_counter(cachetable_miss, 1);
increment_partitioned_counter(cachetable_misstime, get_tnow() - t0);
goto got_value;
}
got_value:
......@@ -2142,8 +2171,8 @@ int toku_cachetable_get_and_pin_nonblocking (
run_unlockers(unlockers); // we hold the ct mutex.
uint64_t t0 = get_tnow();
cachetable_fetch_pair(ct, cf, p, fetch_callback, read_extraargs, false);
cachetable_miss++;
cachetable_misstime += get_tnow() - t0;
increment_partitioned_counter(cachetable_miss, 1);
increment_partitioned_counter(cachetable_misstime, get_tnow() - t0);
cachetable_unlock(ct);
return TOKUDB_TRY_AGAIN;
}
......@@ -2216,7 +2245,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, uint32_t fullhash,
// if not found then create a pair in the READING state and fetch it
if (p == 0) {
cachetable_prefetches++;
increment_partitioned_counter(cachetable_prefetches, 1);
r = bjm_add_background_job(cf->bjm);
assert_zero(r);
p = cachetable_insert_at(
......@@ -3207,7 +3236,7 @@ toku_cleaner_thread (void *cachetable_v)
assert(ct);
uint32_t num_iterations = toku_get_cleaner_iterations(ct);
for (uint32_t i = 0; i < num_iterations; ++i) {
cleaner_executions++;
increment_partitioned_counter(cleaner_executions, 1);
cachetable_lock(ct);
PAIR best_pair = NULL;
int n_seen = 0;
......
......@@ -507,5 +507,11 @@ extern int toku_cachetable_get_checkpointing_user_data_status(void);
int
toku_cleaner_thread (void *cachetable_v);
void toku_cachetables_init (void);
// Effect: Initialize the cachetables module. CDall this before calling any other cachetable operations.
void toku_cachetables_destroy (void);
// Effect: Deinitialize the cachetables module. CDall this after calling any other cachetable operations to free resources that may have been allocated.
// To use the cachetable module again, call toku_cachetables_init() again.
#endif
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef _DOUBLY_LINKED_LIST_H_
#define _DOUBLY_LINKED_LIST_H_
#ident "$Id: partitioned_counter.cc 46098 2012-07-24 21:58:41Z bkuszmaul $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
//******************************************************************************
//
// Overview: A doubly linked list with elements of type T.
// Each element that wants to be put into the list provides a
// LinkedListElement<T> as well as a pointer to the the object of type T.
// Typically, the user embeds the linked list element into the object itself,
// for example as
// struct foo {
// toku::LinkedListElement<struct foo *> linked_list_elt;
// ... other elements of foo
// };
// then when inserting foo into a list defined as
// toku::DoublyLinkedList<struct foo *> list_of_foos;
// you write
// struct foo f;
// list_of_foos->insert(&f->linked_list_elt, &f);
//
// Operations: Constructor and deconstructors are provided (they don't
// need to anything but fill in a field) for the DoublyLinkedList.
// Operations to insert an element and remove it, as well as to pop
// an element out of the list.
// Also a LinkedListElement class is provided with a method to get a
// pointer to the object of type T.
//******************************************************************************
#include <stdbool.h>
//#define BEGIN_TOKUNAMESPACE namespace toku {
//#define END_TOKUNAMESPACE };
#define BEGIN_TOKUNAMESPACE
#define END_TOKUNAMESPACE
BEGIN_TOKUNAMESPACE
template<typename T> class DoublyLinkedList;
template<typename T> class LinkedListElement {
friend class DoublyLinkedList<T>;
private:
T container;
LinkedListElement<T> *prev, *next;
public:
T get_container(void) {
return container;
}
};
template<typename T> class DoublyLinkedList {
public:
void init (void);
// Effect: Initialize a doubly linked list (to be empty).
void insert(LinkedListElement<T> *ll_elt, T container);
// Effect: Add an item to a linked list.
// Implementation note: Push the item to the head of the list.
void remove(LinkedListElement<T> *ll_elt);
// Effect: Remove an item from a linked list.
// Requires: The item is in the list identified by head.
bool pop(LinkedListElement<T> **ll_eltp);
// Effect: if the list is empty, return false.
// Otherwise return true and set *ll_eltp to the first item, and remove that item from the list.
template<typename extra_t> int iterate(int (*fun)(T container, extra_t extra), extra_t extra);
// Effect: Call fun(e, extra) on every element of the linked list. If ever fun returns nonzero, then quit early and return that value.
// If fun always return zero, then this function returns zero.
private:
LinkedListElement<T> *m_first;
};
//******************************************************************************
// DoublyLinkedList implementation starts here.
//******************************************************************************
#include <stddef.h>
template<typename T> void DoublyLinkedList<T>::init(void) {
m_first = NULL;
}
template<typename T> void DoublyLinkedList<T>::insert(LinkedListElement<T> *ll_elt, T container) {
LinkedListElement<T> *old_first = m_first;
ll_elt->container = container;
ll_elt->next = old_first;
ll_elt->prev = NULL;
if (old_first!=NULL) {
old_first->prev = ll_elt;
}
m_first = ll_elt;
}
template<typename T> void DoublyLinkedList<T>::remove(LinkedListElement<T> *ll_elt) {
LinkedListElement<T> *old_prev = ll_elt->prev;
LinkedListElement<T> *old_next = ll_elt->next;
if (old_prev==NULL) {
m_first = old_next;
} else {
old_prev->next = old_next;
}
if (old_next==NULL) {
/* nothing */
} else {
old_next->prev = old_prev;
}
}
template<typename T> bool DoublyLinkedList<T>::pop(LinkedListElement<T> **ll_eltp) {
LinkedListElement<T> *first = m_first;
if (first) {
assert(first->prev==NULL);
m_first = first->next;
if (first->next) {
first->next->prev = NULL;
}
first->next=NULL;
*ll_eltp = first;
return true;
} else {
return false;
}
}
template<typename T>
template<typename extra_t>
int DoublyLinkedList<T>::iterate(int (*fun)(T container, extra_t extra), extra_t extra) {
for (LinkedListElement<T> *le = m_first; le; le=le->next) {
int r = fun(le->container, extra);
if (r!=0) return r;
}
return 0;
}
END_TOKUNAMESPACE
#endif
......@@ -127,6 +127,7 @@ basement nodes, bulk fetch, and partial fetch:
#include <ft-flusher.h>
#include <valgrind/helgrind.h>
#include "txn_manager.h"
#include "partitioned_counter.h"
#if defined(HAVE_CILK)
#include <cilk/cilk.h>
......@@ -5498,10 +5499,10 @@ int toku_ft_layer_init(void) {
r = toku_portability_init();
if (r) { goto exit; }
partitioned_counters_init();
toku_checkpoint_init();
toku_ft_serialize_layer_init();
toku_cachetables_init();
toku_mutex_init(&ft_open_close_lock, NULL);
exit:
return r;
......@@ -5509,9 +5510,10 @@ exit:
void toku_ft_layer_destroy(void) {
toku_mutex_destroy(&ft_open_close_lock);
toku_cachetables_destroy();
toku_ft_serialize_layer_destroy();
toku_checkpoint_destroy();
partitioned_counters_destroy();
//Portability must be cleaned up last
toku_portability_destroy();
}
......
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef _GROWABLE_ARRAY_H_
#define _GROWABLE_ARRAY_H_
#ident "$Id: partitioned_counter.cc 46098 2012-07-24 21:58:41Z bkuszmaul $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
//******************************************************************************
//
// Overview: A growable array is a little bit like std::vector except that
// it doesn't have constructors (hence can be used in static constructs, since
// the google style guide says no constructors), and it's a little simpler.
// Operations:
// init and deinit (we don't have constructors and destructors).
// fetch_unchecked to get values out.
// store_unchecked to put values in.
// push to add an element at the end
// get_size to find out the size
// get_memory_size to find out how much memory the data stucture is using.
//
//******************************************************************************
//#define BEGIN_TOKUNAMESPACE namespace toku {
//#define END_TOKUNAMESPACE };
#define BEGIN_TOKUNAMESPACE
#define END_TOKUNAMESPACE
BEGIN_TOKUNAMESPACE
template<typename T> class GrowableArray {
public:
void init (void)
// Effect: Initialize the array to contain no elements.
{
m_array=NULL;
m_size=0;
m_size_limit=0;
}
void deinit (void)
// Effect: Deinitialize the array (freeing any memory it uses, for example).
{
toku_free(m_array);
m_array =NULL;
m_size =0;
m_size_limit=0;
}
T fetch_unchecked (size_t i)
// Effect: Fetch the ith element. If i is out of range, the system asserts.
{
return m_array[i];
}
void store_unchecked (size_t i, T v)
// Effect: Store v in the ith element. If i is out of range, the system asserts.
{
assert(i<m_size);
m_array[i]=v;
}
void push (T v)
// Effect: Add v to the end of the array (increasing the size). The amortized cost of this operation is constant.
// Implementation hint: Double the size of the array when it gets too big so that the amortized cost stays constant.
{
if (m_size>=m_size_limit) {
if (m_array==NULL) {
m_size_limit=1;
} else {
m_size_limit*=2;
}
XREALLOC_N(m_size_limit, m_array);
}
m_array[m_size++]=v;
}
size_t get_size (void)
// Effect: Return the number of elements in the array.
{
return m_size;
}
size_t memory_size(void)
// Effect: Return the size (in bytes) that the array occupies in memory. This is really only an estimate.
{
return sizeof(*this)+sizeof(T)*m_size_limit;
}
private:
T *m_array;
size_t m_size;
size_t m_size_limit; // How much space is allocated in array.
};
END_TOKUNAMESPACE
#endif
This diff is collapsed.
......@@ -27,19 +27,74 @@
// destroy_partitioned_counter Destroy it.
// increment_partitioned_counter Increment it. This is the frequent operation.
// read_partitioned_counter Get the current value. This is infrequent.
// See partitioned_counter.cc for the abstraction function and representation invariant.
//
// The google style guide says to avoid using constructors, and it appears that
// constructors may have broken all the tests, because they called
// pthread_key_create before the key was actually created. So the google style
// guide may have some wisdom there...
//
// This version does not use constructors, essentially reverrting to the google C++ style guide.
//
#include "fttypes.h"
// The old C interface. This required a bunch of explicit ___attribute__((__destructor__)) functions to remember to destroy counters at the end.
typedef struct partitioned_counter *PARTITIONED_COUNTER;
PARTITIONED_COUNTER create_partitioned_counter(void);
// Effect: Create a counter, initialized to zero.
void destroy_partitioned_counter (PARTITIONED_COUNTER);
void destroy_partitioned_counter(PARTITIONED_COUNTER);
// Effect: Destroy the counter. No operations on that counter are permitted after this.
void increment_partitioned_counter (PARTITIONED_COUNTER, unsigned long amount);
void increment_partitioned_counter(PARTITIONED_COUNTER, uint64_t amount);
// Effect: Increment the counter by amount.
// Requires: No overflows. This is a 64-bit unsigned counter.
unsigned long read_partitioned_counter (PARTITIONED_COUNTER);
uint64_t read_partitioned_counter(PARTITIONED_COUNTER);
// Effect: Return the current value of the counter.
void partitioned_counters_init(void);
// Effect: Initialize any partitioned counters data structures that must be set up before any partitioned counters run.
void partitioned_counters_destroy(void);
// Effect: Destroy any partitioned counters data structures.
#if 0
#include <pthread.h>
#include "fttypes.h"
// Used inside the PARTITIONED_COUNTER.
struct linked_list_head {
struct linked_list_element *first;
};
class PARTITIONED_COUNTER {
public:
PARTITIONED_COUNTER(void);
// Effect: Construct a counter, initialized to zero.
~PARTITIONED_COUNTER(void);
// Effect: Destruct the counter.
void increment(uint64_t amount);
// Effect: Increment the counter by amount. This is a 64-bit unsigned counter, and if you overflow it, you will get overflowed results (that is mod 2^64).
// Requires: Don't use this from a static constructor or destructor.
uint64_t read(void);
// Effect: Read the sum.
// Requires: Don't use this from a static constructor or destructor.
private:
uint64_t _sum_of_dead; // The sum of all thread-local counts from threads that have terminated.
pthread_key_t _key; // The pthread_key which gives us the hook to construct and destruct thread-local storage.
struct linked_list_head _ll_counter_head; // A linked list of all the thread-local information for this counter.
// This function is used to destroy the thread-local part of the state when a thread terminates.
// But it's not the destructor for the local part of the counter, it's a destructor on a "dummy" key just so that we get a notification when a thread ends.
friend void destroy_thread_local_part_of_partitioned_counters (void *);
};
#endif
#endif
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: test_partitioned_counter.cc 46088 2012-07-24 17:30:28Z bkuszmaul $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "test.h"
#include "doubly_linked_list.h"
static void check_is_empty (DoublyLinkedList<int> *l) {
LinkedListElement<int> *re;
bool r = l->pop(&re);
assert(!r);
}
static void test_doubly_linked_list (void) {
DoublyLinkedList<int> l;
l.init();
LinkedListElement<int> e0, e1;
l.insert(&e0, 3);
{
LinkedListElement<int> *re;
bool r = l.pop(&re);
assert(r);
assert(re==&e0);
assert(re->get_container()==3);
}
check_is_empty(&l);
l.insert(&e0, 0);
l.insert(&e1, 1);
{
bool in[2]={true,true};
for (int i=0; i<2; i++) {
LinkedListElement<int> *re;
bool r = l.pop(&re);
assert(r);
int v = re->get_container();
assert(v==0 || v==1);
assert(in[v]);
in[v]=false;
}
}
check_is_empty(&l);
}
const int N=100;
bool in[N];
DoublyLinkedList<int> l;
LinkedListElement<int> elts[N];
static void maybe_insert_random(void) {
int x = random()%N;
if (!in[x]) {
if (verbose) printf("I%d ", x);
l.insert(&elts[x], x);
in[x]=true;
}
}
static bool checked[N];
static int check_count;
static int check_is_in(int v, int deadbeef) {
assert(deadbeef=0xdeadbeef);
assert(0<=v && v<N);
assert(!checked[v]);
assert(in[v]);
checked[v]=true;
check_count++;
return 0;
}
static int quit_count=0;
static int quit_early(int v __attribute__((__unused__)), int beefbeef) {
assert(beefbeef=0xdeadbeef);
quit_count++;
if (quit_count==check_count) return check_count;
else return 0;
}
static void check_equal(void) {
check_count=0;
for (int i=0; i<N; i++) checked[i]=false;
{
int r = l.iterate<int>(check_is_in, 0xdeadbeef);
assert(r==0);
}
for (int i=0; i<N; i++) assert(checked[i]==in[i]);
if (check_count>0) {
check_count=1+random()%check_count; // quit after 1 or more iterations
quit_count=0;
int r = l.iterate<int>(quit_early, 0xbeefbeef);
assert(r==check_count);
}
}
static void test_doubly_linked_list_randomly(void) {
l.init();
for (int i=0; i<N; i++) in[i]=false;
for (int i=0; i<N/2; i++) maybe_insert_random();
if (verbose) printf("\n");
for (int i=0; i<N*N; i++) {
int x = random()%N;
if (in[x]) {
if (random()%2==0) {
if (verbose) printf("%dR%d ", i, x);
l.remove(&elts[x]);
in[x]=false;
} else {
LinkedListElement<int> *re;
bool r = l.pop(&re);
assert(r);
int v = re->get_container();
assert(in[v]);
in[v]=false;
if (verbose) printf("%dP%d ", i, v);
}
} else {
l.insert(&elts[x], x);
in[x]=true;
if (verbose) printf("%dI%d ", i, x);
}
check_equal();
}
if (verbose) printf("\n");
LinkedListElement<int> *re;
while (l.pop(&re)) {
int v = re->get_container();
assert(in[v]);
in[v]=false;
if (verbose) printf("P%d ", v);
}
for (int i=0; i<N; i++) assert(!in[i]);
if (verbose) printf("\n");
}
int test_main (int argc, const char *argv[]) {
default_parse_args(argc, argv);
test_doubly_linked_list();
for (int i=0; i<4; i++) {
test_doubly_linked_list_randomly();
}
return 0;
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment