Commit 4920ca10 authored by John Esmet's avatar John Esmet Committed by Yoni Fogel

refs #5504 take a step back, this is apparently not trivial


git-svn-id: file:///svn/toku/tokudb@48094 c7de825b-a66e-492c-adef-691d508d4ae1
parent 93fd6342
ROOT = ../
build: ft.builddir
check: ft.checkdir
include $(ROOT)Makefile.include
# -*- Mode: Makefile -*-
.DEFAULT_GOAL= build
TOKUROOT=../
INCLUDEDIRS=-I. -I../include
ifneq ($(COMBINE),0)
COMBINE=1
endif
#TODO: Replace DEPEND_COMPILE with auto-dependancy generation.
DEPEND_COMPILE += \
$(wildcard *.h) \
log_header.h \
# keep this line so I can have a \ on the previous line
FT_SO = $(TOKUROOT)lib/libft.$(SOEXT)
FT_A = $(TOKUROOT)lib/libft.$(AEXT)
SKIP_FTRULE=1
include $(TOKUROOT)toku_include/Makefile.include
LDFLAGS+=-L$(TOKUROOT)lib -Wl,-rpath,$(shell pwd)/$(TOKUROOT)lib
LDLIBS+=-lft -l$(LIBTOKUPORTABILITY)
# When debugging, try: valgrind --show-reachable=yes --leak-check=full ./brt-test
BINS_RAW= \
ftdump \
ftdump_static \
tdb_logprint \
tdb-recover \
# Intentionally left blank
# BINS will be defined automatically.
.PHONY: build default bins libs local
build default: local
cd tests;$(MAKE) build
local: bins libs $(TEST_FT);
FT_SOURCES = \
block_allocator \
block_table \
ft_loader-callback \
brt-serialize \
brt-verify \
brt \
brt-cachetable-wrappers \
brt-flusher \
brt-hot-flusher \
ft_msg \
brt-test-helpers \
cachetable \
checkpoint \
dbufio \
fifo \
key \
kibbutz \
leafentry \
le-cursor \
logfilemgr \
logger \
log_code \
log_upgrade \
log_print \
logcursor \
memarena \
mempool \
minicron \
omt \
pqueue \
queue \
recover \
roll \
rollback \
sort \
sub_block \
ule \
threadpool \
txn \
workqueue \
x1764 \
xids \
ybt \
# keep this line so I can have a \ on the previous line
ifneq ($(OS_CHOICE),windows)
FT_SOURCES += trace_mem #Windows does not handle 'empty' file
endif
TEST_FT = brt-test-helpers.$(OEXT)
FT_C_FILES = $(patsubst %,%.c,$(FT_SOURCES))
FT_O_FILES = $(patsubst %,%.$(OEXT),$(FT_SOURCES))
ft.$(OEXT): $(FT_C_FILES) $(DEPEND_COMPILE)
$(CC) -c $(FT_C_FILES) $(COMBINE_C) $(CPPFLAGS) $(CFLAGS) $(OOUTPUT)$@
brt-serialize.$(OEXT): $(wildcard backwards_*.c)
ifneq ($(CYGWIN),)
FT_O_FILES = $(FT_O_FILES)
else ifeq ($(CC),icc)
FT_O_FILES = $(FT_O_FILES)
else ifeq ($(COMBINE),0)
FT_O_FILES = $(FT_O_FILES)
else
FT_O_FILES = ft.o
endif
FT_O_FILES += ft_loader.$(OEXT) quicklz.$(OEXT) compress.$(OEXT)
ft_loader.$(OEXT): $(DEPEND_COMPILE)
$(FT_O_FILES): VISIBILITY=
$(FT_O_FILES): $(LZMA_H)
$(FT_SO): DISABLE_WARNING += 10237 # Do not complain about -lcilkrts being linked in dynamically, static library not available
$(FT_SO): $(FT_O_FILES) $(LZMA_A)
$(TOKULINKER) $(SHARED) $(SYMBOLS) $(GCOV_FLAGS) $(SKIP_WARNING) $(FT_O_FILES) $(LZMA_A) -o$(FT_SO) $(LINUX_NOSTDLIB) $(LCILKRTS)
$(FT_A): $(FT_O_FILES)
log_code.$(OEXT): log_header.h wbuf.h log-internal.h rbuf.h
# This version runs logformat twice. There is something screwing in make that if you have a pattern form with two outputs
# then it runs the thing only once, but if it has no % symbols it runs it twice.
## log_header.h log_code.c: logformat$(BINSUF)
## ./logformat
# So we do it this way
log_code.c: logformat$(BINSUF)
./logformat .
log_print.c log_header.h: log_code.c
test 1 = 1
#Needs to be done manually since it does not include ft.
logformat$(BINSUF): logformat.c $(LIBPORTABILITY_SO)
$(CC) $< $(BIN_FROM_O_FLAGS_NOLIB) $(LDFLAGS) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST) $(LIBPORTABILITY_SO)
ifeq ($(PROF),1)
libs: $(FT_A)
else
libs: $(FT_SO) $(FT_A)
endif
bins: $(BINS)
# Put the benchmarktest_256 first since it takes the longest (and we want to use parallelism in the make)
# Put check_benchmarktest_256 first because it is long-running (and therefore on the critical path, so get it started)
check: bins
cd tests;$(MAKE) check
ifeq ($(PROF),1)
$(BINS): $(FT_A) $(LIBPORTABILITY_A)
else
$(BINS): $(FT_SO) $(LIBPORTABILITY_SO)
endif
foo2:
echo $(BINS)
checko2: SHELL=/bin/bash
checko2:
@shopt -s compat31; if [[ "$(OPTFLAGS)" =~ "-O([2-3x])" ]] ; then \
echo OPTFLAGS=$(OPTFLAGS) ok; \
else \
echo OPTFLAGS=$(OPTFLAGS) bad; exit 1; \
fi
clean: clean-local clean-tests
clean-tests:
cd tests;$(MAKE) clean
clean-local:
$(RM) $(TOKUROOT)lib/libft.$(AEXT) $(TOKUROOT)lib/libft.$(SOEXT)
rm -rf $(FT)
rm -rf test_oexcl.c.tmp *.ft_handle
rm -rf log_code.c log_header.h log_print.c logformat
ftdump_static$(BINSUF): DEPEND_LINK = -lpthread -ldl -lz
ftdump_static$(BINSUF): ftdump.$(OEXT) $(FT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(DEPEND_COMPILE) $(DEPEND_LINK)
$(CC) $< $(FT_A) $(LZMA_A) $(LIBPORTABILITY_A) $(BIN_FROM_O_FLAGS_NOLIB) $(ALWAYS_LINK) $(LINK_MUST_BE_LAST)
# After doing (cd ../src/tests;make test_log5.recover), run these. The files should have no differences.
testdump: ftdump$(BINSUF)
./ftdump ../src/tests/dir.test_log5.c.tdb.recover/foo.db > dump.r && ./ftdump ../src/tests/dir.test_log5.c.tdb/foo.db > dump.$(OEXT) && diff dump.$(OEXT) dump.r
foo:
@echo FTLOADER $(FTLOADER)
@echo BDBDIR $(BDBDIR)
......@@ -56,5 +56,6 @@
#include "wbuf.h"
#include <db.h>
#include "tokuconst.h"
#endif
include ft/tests/make.include
FT_BINS = $(patsubst %,ft/%$(BINSUF),$(BINS_RAW))
TEST_FT = ft/brt-test-helpers.$(OEXT)
# This version runs logformat twice. There is something screwing in make that if you have a pattern form with two outputs
# then it runs the thing only once, but if it has no % symbols it runs it twice.
## log_header.h log_code.c: logformat$(BINSUF)
## ./logformat
# So we do it this way
ft/log_print.c ft/log_header.h ft/log_code.c: ft/logformat$(BINSUF)
ft/logformat$(BINSUF) ft
.PRECIOUS: ft/log_print.c ft/log_header.h ft/log_code.c
FT_A = ft/ft.$(AEXT)
FT_SOURCES = \
block_allocator \
block_table \
bread \
brt-serialize \
brt-verify \
brt \
ft_msg \
brt-test-helpers \
cachetable \
checkpoint \
fifo \
fingerprint \
key \
leafentry \
leaflock \
logfilemgr \
logger \
log_code \
log_print \
logcursor \
memarena \
mempool \
minicron \
omt \
recover \
roll \
rollback \
ule \
threadpool \
toku_worker \
trace_mem \
txn \
x1764 \
xids \
ybt \
# keep this line so I can have a \ on the previous line
FT_C_FILES = $(patsubst %,ft/%.c,$(FT_SOURCES))
FT_O_FILES = $(patsubst %,ft/%.$(OEXT),$(FT_SOURCES))
$(TEST_FT): ft/log_header.h
$(TEST_FT) $(FT_O_FILES) ft/ft.$(OEXT): CPPFLAGS_DIRECTORY = -Ift -Iinclude
ft/ft.$(OEXT): $(FT_C_FILES) $(DEPEND_COMPILE)
$(CC) -c $(FT_C_FILES) $(COMBINE_C) $(CPPFLAGS) $(CPPFLAGS_DIRECTORY) $(CFLAGS) $(CFLAGS_DIRECTORY) $(OOUTPUT)$@
ifneq ($(CYGWIN),)
FT_O_FILES = $(FT_O_FILES)
else ifeq ($(CC),icc)
FT_O_FILES = $(FT_O_FILES)
else ifeq ($(COMBINE),0)
FT_O_FILES = $(FT_O_FILES)
else
FT_O_FILES = ft/ft.o
endif
FT_O_FILES += ft/ft_loader.o
$(FT_O_FILES): VISIBILITY=-fvisibility=default
$(FT_O_FILES): CPPFLAGS_DIRECTORY=-Iinclude -Itoku_include -I$(OS)
$(FT_O_FILES): SHADOW=
$(FT_A): $(FT_O_FILES)
ft/libft.$(SOEXT): $(FT_O_FILES)
ft/libft.$(SOEXT): VISIBILITY=-fvisibility=default
ft/libft.$(SOEXT): LOADLIBES_DIRECTORY=-lz -Llib -ltokuportability
ft/libft.$(SOEXT): CFLAGS_DIRECTORY=-lpthread
ft/ft_loader.$(OEXT): CPPFLAGS_DIRECTORY=-Iinclude
ft/logformat$(BINSUF): CPPFLAGS_DIRECTORY=-Iinclude -Itoku_include -I$(OS)
ft/build: ft/bins ft/libs $(TEST_FT) ft/tests/build
ft/check: ft/tests/check
ft/bins: $(FT_BINS)
ft/libs: $(FT_A)
.PHONY: ft/build ft/bins ft/libs
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "Copyright (c) 2009-2010 Tokutek Inc. All rights reserved."
#ifndef TOKUCONST_H
#define TOKUCONST_H
#ident "$Id$"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#endif
......@@ -12,6 +12,7 @@
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "xids.h"
//1 does much slower debugging
#define ULE_DEBUG 0
......
The essential idea of auto-upgrade from FT_LAYOUT_VERSION 12 to 13 is to
take advantage of the similarities between the two versions, and not to
try to create an infrastructure for all future upgrades.
As future layouts are created, upgrade paths, if any, will be crafted to
each particular change.
On startup, the version number of the recovery log is checked. If an
upgrade is needed, then the log is tested for a clean shutdown. If
there is no clean shutdown, then an error is returned. If the log does
end in a clean shutdown, then a new log file is created with the current
version number, starting with an LSN that is one greater than the clean
shutdown.
Once the new log is in place, the persistent environment dictionary is
upgraded, and then normal operation begins.
The startup of a new version of the storage engine might not be crash
safe.
Dictionaries, including the persistent environment and the fileops
directory, are upgraded as they are read into memory from disk.
The brt header is upgraded by
- removing an unused flag
- setting the transaction id to the xid of the clean shutdown
- marking the header as dirty
Each non-leaf node is upgraded by:
- removing an unused flag
- upgrading the version numbers in the node
- marking the node as dirty.
This works because all of the version 12 messages are unchanged
in version 13. The version 12 messages will be applied to the
leafentries using version 13 code.
Each non-leaf node is upgraded by
- removing an unused flag
- using modified version 12 code to unpack the version 12 packed
leaf entries into version 13 unpacked leaf entries
- repacking the leafentries into a new mempool
- destroying the original mempool (that holds the version 12
node read from disk)
The node is marked as dirty.
Once the brt is open, a FT_OPTIMIZE broadcast message is inserted to
optimize the dictionary.
A schematic overview of how a brt node is deserialized:
toku_deserialize_ftnode_from() { // accepts fd, fills in FTNODE, brt_header
deserialize_ftnode_from_rbuf_versioned() {
deserialize_ftnode_from_rbuf() // accepts rbuf fills in FTNODE
if nonleaf deserialize_ftnode_nonleaf_from_rbuf(){ // rbuf -> FTNODE (no version sensitivity)
if leaf deserialize_ftnode_leaf_from_rbuf() { // calculates node size from leafentry sizes
// leafentry sizes vary with version
if version 12 {
if leaf {
unpack each leafentry into a version 13 ule
pack each version 13 ule into version 13 le
allocate new mempool for version 13 les
destroy old mempool
}
remove unused flag
increment version number
mark dirty
}
}
}
Open issues:
- The brt layer makes some callbacks to the handlerton layer. If
any of the functions change from one version to another, then
the result may not be correct. A version number could be
included in all the function signatures so the callback function
could be aware of what version the caller is expecting.
The callbacks are:
- comparator
- hot index generator
- hot column mutator
Note, ft-internal.h defines struct subtree_estimates which contains field nkeys.
This field is obsolete with the removal of dupsort databases (since it will always
be the same as ndata), but removing it is not worth the trouble.
==========
The changes from version 12 to 13 include (may not be complete list):
- Persistent environment dictionary
- version number
- timestamp of environment creation (database installation)
- history of previous versions
- timestamps for upgrades
- Recovery log
- version number
- new log entries (hotindex, maybe others)
- brt header
- version number
- added field (root_xid_that_created), set to last checkpoint lsn
- deleted flag (built-in comparison function for values)
- brt internal node
- version number
- additional message(s) possible, no upgrade needed beyond changing version number
- brt leafnode
- version number
- new leafentry format
- version 12 leafentry unpack code is preserved
- rollback log
- version number is only change, no upgrade is needed because
rollback logs are not preserved through clean shutdown
Because version 12 and version 13 leafentries are significantly
different, the way leafentries is handled is as follows:
- deserialize_ftnode_leaf_from_rbuf()
- sets up array of pointers to leafentries (to be unpacked later),
these pointers are put into an OMT
- calculates checksum (x1764)
- adjusts ndone byte counter to verify that entire rbuf is read
- deserialize_ftnode_from_rbuf_versioned() calls
deserialize_ftnode_leaf_from_rbuf()
- loop through all leafentries, one at a time:
- unpack version 12 le and repack as version 13 le, each in its own malloc'ed memory
- calculate new fingerprint
- create new block
- allocate new mempool
- copy individual les into new mempool
- destroy individual les
- destroy original mempool
Open issues:
- We need to verify clean shutdown before upgrade.
If shutdown was not clean then we would run recovery, and the
code does not support recovering from an old format version.
- One way to do this is to increase the log version number (either
increment or synchronize with FT_LAYOUT_VERSION).
- Can we just look at the log? needs_recovery(env);
If this mechanism is specific
to the version 12 to 13 upgrade, then that is adequate.
Once the recovery log format changes, then we need a
different mechanism, similar to the 3.x->4.x upgrade
logic in log_upgrade.c.
- How to decide that an upgrade is necessary?
Needed for logic that says:
- If upgrade is necessary, then verify clean shutdown:
If upgrade is necessary (recorded version is old)
and clean shutdown was not done, then exit with
error code.
- tokudb_needs_recovery() is not separate from verification of
clean shutdown. This function indicates if a recovery is
necessary, but it does not verify simple clean shutdown
with just the shutdown log entry. Instead, it looks for
checkpoint begin/checkpoint end. (Also, comment at end
is permitted.)
Proposed solution:
- Decision on whether to perform upgrade is done by examining log version.
- If we need an upgrade:
- If not clean shutdown, then exit with error message, change nothing
on disk.
- If clean shutdown, then create new log by simply creating new log file
(empty, or perhaps with initial comment that says "start of new log").
- Normal log-trimming code will delete old logs. (None of the
locking logic in log_upgrade.c is needed.)
- Log-opening logic needs to be modified to do this. See log file
manager initialization function (and maybe functions it calls),
maybe the log cursor:
- logfilemgr.c: toku_logfilemgr_init()
- Log-trimming logic loops over pairs of file names and LSNs,
deleting old files based on LSN.
- Question: would it help any if the "clean shutdown" log entry
was required to be in a new log file of its own? It would
prevent the creation of an empty log file after "clean shutdown."
It might, but it's probably not worth doing.
Issue of optimize message (to be sent into each dictionary on upgrade)
- FT_COMMIT_BROADCAST_ALL (should be faster executing, always commits everything, was needed for an earlier upgrade attempt)
- FT_OPTIMIZE (better tested, has been used, tests to see if transactions are still live)
After upgrade (after clean shutdown, no running transactions, trees
fully flattened), there is no difference in what these two message do.
Note, FT_OPTIMIZE requires a clean shutdown if used on upgrade. If used before recovery (which an upgrade
without clean shutdown would do), then it would be wrong because it would appear that all transactions were
completed.
TODO:
- update brt header fields
- original layout version
- version read from disk
- add accountability counters
- capture LSN of clean shutdown, use instead of checkpoint lsn
......@@ -13,7 +13,6 @@
#include <errno.h>
#include <string.h>
#define CRC_INCR
/* When serializing a value, write it into a buffer. */
......
......@@ -22,6 +22,7 @@
#include "x1764.h"
#include "rbuf.h"
#include "wbuf.h"
#include "tokuconst.h"
/* The number of transaction ids stored in the xids structure is
* represented by an 8-bit value. The value 255 is reserved.
......
......@@ -18,6 +18,7 @@
#include <ft/le-cursor.h>
#include "indexer.h"
#include <ft/ft-internal.h>
#include <ft/tokuconst.h>
#include <ft/ft-ops.h>
#include <ft/leafentry.h>
#include <ft/ule.h>
......
......@@ -12,12 +12,14 @@
/*
* The indexer
*/
#include <toku_portability.h>
#include <stdio.h>
#include <string.h>
#include <toku_assert.h>
#include <toku_portability.h>
#include "toku_assert.h"
#include "ydb-internal.h"
#include <ft/le-cursor.h>
#include "indexer.h"
#include <ft/tokuconst.h>
#include <ft/ft-ops.h>
#include <ft/leafentry.h>
#include <ft/ule.h>
......@@ -25,9 +27,6 @@
#include <ft/log-internal.h>
#include <ft/checkpoint.h>
#include "ydb-internal.h"
#include "indexer.h"
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
......
......@@ -336,7 +336,7 @@ if(BUILD_TESTING)
get_filename_component(base ${bin} NAME_WE)
add_executable(${base}.tdb ${base})
target_link_libraries(${base}.tdb ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
target_link_libraries(${base}.tdb ft ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
set_property(TARGET ${base}.tdb APPEND PROPERTY
COMPILE_DEFINITIONS "ENVDIR=\"dir.${bin}\";USE_TDB;IS_TDB=1;TOKUDB=1")
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "dir.${bin}")
......
......@@ -15,6 +15,7 @@
#include "test.h"
#include <ft/tokuconst.h>
#include <ft/fttypes.h>
#include <ft/omt.h>
#include <ft/leafentry.h>
......@@ -22,6 +23,7 @@
#include <ft/ule-internal.h>
#include <ft/le-cursor.h>
#include "indexer-internal.h"
#include <ft/xids.h>
#include <ft/xids-internal.h>
struct txn {
......
......@@ -10,7 +10,7 @@
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include <ft/xids.h>
#include <ft/tokuconst.h>
#define MAX_NEST MAX_NESTED_TRANSACTIONS
......
......@@ -10,7 +10,7 @@
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include <ft/xids.h>
#include <ft/tokuconst.h>
#define MAX_NEST MAX_TRANSACTION_RECORDS
#define MAX_SIZE MAX_TRANSACTION_RECORDS
......
......@@ -10,7 +10,7 @@
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include <ft/xids.h>
#include <ft/tokuconst.h>
#define MAX_NEST MAX_TRANSACTION_RECORDS
#define MAX_SIZE MAX_TRANSACTION_RECORDS
......
......@@ -10,7 +10,7 @@
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include <ft/xids.h>
#include <ft/tokuconst.h>
#define MAX_NEST MAX_TRANSACTION_RECORDS
#define MAX_SIZE MAX_TRANSACTION_RECORDS
......
......@@ -10,7 +10,7 @@
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include <ft/xids.h>
#include <ft/tokuconst.h>
#define MAX_NEST MAX_TRANSACTION_RECORDS
#define MAX_SIZE (MAX_TRANSACTION_RECORDS + 1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment