Commit 25102f30 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:5097], add fix and test for table corruption issue

git-svn-id: file:///svn/toku/tokudb@44705 c7de825b-a66e-492c-adef-691d508d4ae1
parent 65558fb2
......@@ -2669,6 +2669,11 @@ static void assert_cachefile_is_flushed_and_removed (CACHETABLE ct, CACHEFILE cf
// trying to access the cachefile while this function is executing.
// This implies no client thread will be trying to lock any nodes
// belonging to the cachefile.
//
// This function also assumes that the cachefile is not in the process
// of being used by a checkpoint. If a checkpoint is currently happening,
// it does NOT include this cachefile.
//
static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) {
unsigned nfound = 0;
//
......@@ -2695,8 +2700,9 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) {
// find all of the pairs owned by a cachefile and redirect their completion
// to a completion queue. If an unlocked PAIR is dirty, flush and remove
// the PAIR. Locked PAIRs are on either a reader/writer thread (or maybe a
// checkpoint thread?) and therefore will be placed on the completion queue.
// the PAIR. Locked PAIRs are on either a reader/writer thread
// and therefore will be placed on the completion queue.
//
// The assumptions above lead to this reasoning. All pairs belonging to
// this cachefile are either:
// - unlocked
......@@ -2709,11 +2715,6 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) {
// when the function started).
// - Once the writer thread is done with a PAIR, remove it
//
// A question from Zardosht: Is it possible for a checkpoint thread
// to be running, and also trying to get access to a PAIR while
// the PAIR is on the writer thread? Will this cause problems?
// This question is encapsulated in #3941
//
unsigned i;
unsigned num_pairs = 0;
......@@ -2774,16 +2775,6 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) {
// wait for all of the pairs in the work queue to complete
//
// An important assumption here is that none of the PAIRs that we
// pop off the work queue need to be written out to disk. So, it is
// safe to simply call cachetable_maybe_remove_and_free_pair on
// the PAIRs we find. The reason we can make this assumption
// is based on the assumption that upon entry of this function,
// all PAIRs belonging to this cachefile are either idle,
// being processed by a writer thread, or being processed by a kibbutz.
// At this point in the code, kibbutz work is finished and we
// assume the client will not add any more kibbutz work for this cachefile.
//
// If it were possible
// for some thread to change the state of the node before passing
// it off here, and a write to disk were necessary, then the code
......@@ -2795,13 +2786,44 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) {
//This workqueue's mutex is NOT the cachetable lock.
//You must not be holding the cachetable lock during the dequeue.
int r = workqueue_deq(&cq, &wi, 1); assert(r == 0);
//Some other thread owned the lock, but transferred ownership to the thread executing this function
cachetable_lock(ct);
PAIR p = workitem_arg(wi);
p->cq = 0;
//Some other thread owned the lock, but transferred ownership to the thread executing this function
// check some assertions.
// A checkpoint should not be running on this cachefile, so
// the checkpoint_pending bit must be FALSE and
// no other thread should be accessing this PAIR
assert(!p->checkpoint_pending);
// we are only thread using the PAIR
assert(nb_mutex_users(&p->value_nb_mutex) == 1);
assert(nb_mutex_users(&p->disk_nb_mutex) == 0);
assert(!p->cloned_value_data);
// first we remove the PAIR from the cachetable's linked lists
// and hashtable, so we guarantee that no other thread can access
// this PAIR if we release the cachetable lock
cachetable_remove_pair(ct, p);
//
// #5097 found a bug where another thread had a dirty PAIR pinned
// and was trying to run partial eviction. So, when the ownership
// of the lock is transferred here, the PAIR may still be dirty.
// If so, we need to write it to disk.
//
if (p->dirty) {
PAIR_ATTR attr;
cachetable_only_write_locked_data(
ct,
p,
FALSE, // not for a checkpoint, as we assert above
&attr,
FALSE // not a clone
);
}
// now that we are assured that the PAIR has been written to disk,
// we free the PAIR
nb_mutex_unlock(&p->value_nb_mutex); //Release the lock, no one has a pin, per our assumptions above.
BOOL destroyed;
cachetable_maybe_remove_and_free_pair(ct, p, &destroyed);
cachetable_free_pair(ct, p);
}
workqueue_destroy(&cq);
if (cf) {
......
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: cachetable-simple-verify.c 43762 2012-05-22 16:17:53Z yfogel $"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#include "includes.h"
#include "test.h"
CACHEFILE f1;
CACHEFILE f2;
BOOL check_flush;
BOOL dirty_flush_called;
BOOL check_pe_callback;
BOOL pe_callback_called;
static int
pe_callback (
void *ftnode_pv __attribute__((__unused__)),
PAIR_ATTR bytes_to_free __attribute__((__unused__)),
PAIR_ATTR* bytes_freed,
void* extraargs __attribute__((__unused__))
)
{
*bytes_freed = make_pair_attr(1);
if (check_pe_callback) {
pe_callback_called = TRUE;
}
usleep(4*1024*1024);
return 0;
}
static void
flush (CACHEFILE f __attribute__((__unused__)),
int UU(fd),
CACHEKEY k __attribute__((__unused__)),
void *v __attribute__((__unused__)),
void **dd __attribute__((__unused__)),
void *e __attribute__((__unused__)),
PAIR_ATTR s __attribute__((__unused__)),
PAIR_ATTR* new_size __attribute__((__unused__)),
BOOL w __attribute__((__unused__)),
BOOL keep __attribute__((__unused__)),
BOOL c __attribute__((__unused__)),
BOOL UU(is_clone)
) {
if (check_flush && w) {
dirty_flush_called = TRUE;
}
}
static void *f2_pin(void *arg) {
int r;
void* v1;
long s1;
CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL);
//
// these booleans for pe_callback just ensure that the
// test is working as we expect it to. We expect the get_and_pin to
// cause a partial eviction of f1's PAIR, reducing its size from 8 to 1
// and we expect that to be enough so that the unpin does not invoke a partial eviction
// This is just to ensure that the bug is being exercised
//
check_pe_callback = TRUE;
r = toku_cachetable_get_and_pin(f2, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL);
assert(r == 0);
assert(pe_callback_called);
pe_callback_called = FALSE;
r = toku_cachetable_unpin(f2, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8));
check_pe_callback = FALSE;
assert(!pe_callback_called);
assert(r == 0);
return arg;
}
static void
cachetable_test (void) {
const int test_limit = 12;
int r;
check_flush = FALSE;
dirty_flush_called = FALSE;
CACHETABLE ct;
r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0);
char fname1[] = __SRCFILE__ "test1.dat";
unlink(fname1);
char fname2[] = __SRCFILE__ "test2.dat";
unlink(fname2);
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
assert(r == 0);
r = toku_cachetable_openf(&f2, ct, fname2, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
assert(r == 0);
void* v1;
long s1;
CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL);
wc.pe_callback = pe_callback;
wc.flush_callback = flush;
// pin and unpin a node 20 times, just to get clock count up
for (int i = 0; i < 20; i++) {
r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL);
assert(r == 0);
r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8));
assert(r == 0);
}
// at this point, we have a dirty PAIR in the cachetable associated with cachefile f1
// launch a thread that will put another PAIR in the cachetable, and get partial eviction started
toku_pthread_t tid;
r = toku_pthread_create(&tid, NULL, f2_pin, NULL);
assert_zero(r);
usleep(2*1024*1024);
check_flush = TRUE;
r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN);
assert(r == 0);
assert(dirty_flush_called);
check_flush = FALSE;
void *ret;
r = toku_pthread_join(tid, &ret);
assert_zero(r);
toku_cachetable_verify(ct);
r = toku_cachefile_close(&f2, 0, FALSE, ZERO_LSN); assert(r == 0);
r = toku_cachetable_close(&ct); lazy_assert_zero(r);
}
int
test_main(int argc, const char *argv[]) {
default_parse_args(argc, argv);
cachetable_test();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment