diff --git a/buildheader/db.h_4_1 b/buildheader/db.h_4_1 index 5fe91d8f73e1e7afd875a96aa49c91913c1da587..444337929ff2dbf8ede85ce611f1359468e63225 100644 --- a/buildheader/db.h_4_1 +++ b/buildheader/db.h_4_1 @@ -116,6 +116,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/buildheader/db.h_4_3 b/buildheader/db.h_4_3 index 6455d75af858674f40d0a99172b02acba9ef3868..66a3f341544bde78b5283b121a329fff1d87f713 100644 --- a/buildheader/db.h_4_3 +++ b/buildheader/db.h_4_3 @@ -118,6 +118,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/buildheader/db.h_4_4 b/buildheader/db.h_4_4 index 5f554d31d7b24d13048f5be3548f8072368480fe..51e26c91e6bcd9dcb1e3bf383d1fa52ccd4a499d 100644 --- a/buildheader/db.h_4_4 +++ b/buildheader/db.h_4_4 @@ -119,6 +119,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/buildheader/db.h_4_5 b/buildheader/db.h_4_5 index 424c207e00f29233d626d38385f84b0f4ee52ad8..fd6ede077bb0072d2edb841eeb5e90fb209ffa88 100644 --- a/buildheader/db.h_4_5 +++ b/buildheader/db.h_4_5 @@ -119,6 +119,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/buildheader/db.h_4_6 b/buildheader/db.h_4_6 index d7eb687268784958645e7383c0f4018ac1e931ca..e7705478d5dee2800ddd54bcfe98e71a040d2b87 100644 --- a/buildheader/db.h_4_6 +++ b/buildheader/db.h_4_6 @@ -121,6 +121,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/buildheader/make_db_h.c b/buildheader/make_db_h.c index 562088a5f33ebd4f21474830de50f39bc4644cc9..5613e0e483b1e8e14deb5538cf0c4e9689e01e37 100644 --- a/buildheader/make_db_h.c +++ b/buildheader/make_db_h.c @@ -30,11 +30,13 @@ void print_db_notices (void) { #define dodefine(name) printf("#define %s %d\n", #name, name) enum { - TOKUDB_OUT_OF_LOCKS = -100000, - TOKUDB_SUCCEEDED_EARLY = -100001, - TOKUDB_FOUND_BUT_REJECTED = -100002, - TOKUDB_USER_CALLBACK_ERROR = -100003, - TOKUDB_DICTIONARY_TOO_OLD = -100004 + TOKUDB_OUT_OF_LOCKS = -100000, + TOKUDB_SUCCEEDED_EARLY = -100001, + TOKUDB_FOUND_BUT_REJECTED = -100002, + TOKUDB_USER_CALLBACK_ERROR = -100003, + TOKUDB_DICTIONARY_TOO_OLD = -100004, + TOKUDB_DICTIONARY_TOO_NEW = -100005, + TOKUDB_DICTIONARY_NO_HEADER = -100006 }; void print_defines (void) { @@ -141,6 +143,8 @@ void print_defines (void) { dodefine(TOKUDB_OUT_OF_LOCKS); dodefine(TOKUDB_SUCCEEDED_EARLY); dodefine(TOKUDB_DICTIONARY_TOO_OLD); + dodefine(TOKUDB_DICTIONARY_TOO_NEW); + dodefine(TOKUDB_DICTIONARY_NO_HEADER); dodefine(TOKUDB_FOUND_BUT_REJECTED); dodefine(TOKUDB_USER_CALLBACK_ERROR); } diff --git a/buildheader/tdb.h b/buildheader/tdb.h index da77ea871a2feb3f5e309f33031f0101f6c68968..8c8899c3077a34d784b8a56561d6f20cf47f52be 100644 --- a/buildheader/tdb.h +++ b/buildheader/tdb.h @@ -121,6 +121,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/include/db.h b/include/db.h index da77ea871a2feb3f5e309f33031f0101f6c68968..8c8899c3077a34d784b8a56561d6f20cf47f52be 100644 --- a/include/db.h +++ b/include/db.h @@ -121,6 +121,8 @@ typedef enum { #define TOKUDB_OUT_OF_LOCKS -100000 #define TOKUDB_SUCCEEDED_EARLY -100001 #define TOKUDB_DICTIONARY_TOO_OLD -100004 +#define TOKUDB_DICTIONARY_TOO_NEW -100005 +#define TOKUDB_DICTIONARY_NO_HEADER -100006 #define TOKUDB_FOUND_BUT_REJECTED -100002 #define TOKUDB_USER_CALLBACK_ERROR -100003 /* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/ diff --git a/newbrt/brt-serialize.c b/newbrt/brt-serialize.c index 69d427ad3fbed311e2b7a892c07bea71221e17f6..18095569f69ac54634daa55e0e76f8e2eb7fe8b6 100644 --- a/newbrt/brt-serialize.c +++ b/newbrt/brt-serialize.c @@ -970,30 +970,46 @@ void toku_verify_counts (BRTNODE node) { } } -int toku_serialize_brt_header_size (struct brt_header *UU(h)) { - unsigned int size = (+8 // "tokudata" - +4 // size - +4 // version - +8 // byte order verification - +8 // checkpoint_count - +8 // checkpoint_lsn - +4 // tree's nodesize - +8 // translation_size_on_disk - +8 // translation_address_on_disk - +4 // checksum - ); - size+=(+8 // diskoff - +4 // flags - ); +static u_int32_t +serialize_brt_header_min_size (u_int32_t version) { + u_int32_t size; + switch(version) { + case BRT_LAYOUT_VERSION_10: + size = (+8 // "tokudata" + +4 // version + +4 // size + +8 // byte order verification + +8 // checkpoint_count + +8 // checkpoint_lsn + +4 // tree's nodesize + +8 // translation_size_on_disk + +8 // translation_address_on_disk + +4 // checksum + ); + size+=(+8 // diskoff + +4 // flags + ); + break; + default: + assert(FALSE); + } assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); return size; } +int toku_serialize_brt_header_size (struct brt_header *h) { + u_int32_t size = serialize_brt_header_min_size(h->layout_version); + //Add any dynamic data. + assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE); + return size; +} + + int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk) { unsigned int size = toku_serialize_brt_header_size (h); // !!! seems silly to recompute the size when the caller knew it. Do we really need the size? wbuf_literal_bytes(wbuf, "tokudata", 8); - wbuf_network_int (wbuf, size); //MUST be in network order regardless of disk order wbuf_network_int (wbuf, h->layout_version); //MUST be in network order regardless of disk order + wbuf_network_int (wbuf, size); //MUST be in network order regardless of disk order wbuf_literal_bytes(wbuf, &toku_byte_order_host, 8); //Must not translate byte order wbuf_ulonglong(wbuf, h->checkpoint_count); wbuf_LSN (wbuf, h->checkpoint_lsn); @@ -1191,6 +1207,15 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) { struct rbuf rc = *rb; memset(rb, 0, sizeof(*rb)); + //Verification of initial elements. + { + //Check magic number + bytevec magic; + rbuf_literal_bytes(&rc, &magic, 8); + assert(memcmp(magic,"tokudata",8)==0); + } + + struct brt_header *CALLOC(h); if (h==0) return errno; int ret=-1; @@ -1203,12 +1228,16 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) { //version MUST be in network order on disk regardless of disk order h->layout_version = rbuf_network_int(&rc); assert(h->layout_version==BRT_LAYOUT_VERSION_10); + + //Size MUST be in network order regardless of disk order. + u_int32_t size = rbuf_network_int(&rc); + assert(size==rc.size); + bytevec tmp_byte_order_check; rbuf_literal_bytes(&rc, &tmp_byte_order_check, 8); //Must not translate byte order int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check; assert(byte_order_stored == toku_byte_order_host); - assert(h->layout_version==BRT_LAYOUT_VERSION_10); h->checkpoint_count = rbuf_ulonglong(&rc); h->checkpoint_lsn = rbuf_lsn(&rc); h->nodesize = rbuf_int(&rc); @@ -1248,59 +1277,93 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) { return 0; } -//-1 means we can overwrite everything in the file AND the header is useless +//TOKUDB_DICTIONARY_NO_HEADER means we can overwrite everything in the file AND the header is useless static int deserialize_brtheader_from_fd_into_rbuf(int fd, toku_off_t offset, struct rbuf *rb, u_int64_t *checkpoint_count) { int r = 0; - const int prefix_size = 8 + // magic ("tokudata") - 4; // size - char prefix[prefix_size]; + const int64_t prefix_size = 8 + // magic ("tokudata") + 4 + // version + 4; // size + unsigned char prefix[prefix_size]; rb->buf = NULL; int64_t n = pread(fd, prefix, prefix_size, offset); - if (n==0) r = -1; - else if (n<0) r = errno; + if (n==0) r = TOKUDB_DICTIONARY_NO_HEADER; + else if (n<0) {r = errno; assert(r!=0);} else if (n!=prefix_size) r = EINVAL; - else if (memcmp(prefix,"tokudata",8)!=0) { - if ((*(u_int64_t*)&prefix[0]) == 0) r = -1; //Could be a tokudb file but header never written - else r = EINVAL; //Not a tokudb file! Do not use. - } else { - // It's version 7 or later, and the magic looks OK - //Size must be stored in network order regardless of DISK_ORDER - u_int32_t size = toku_ntohl(*(u_int32_t*)(prefix+8)); - rb->size = size; - rb->ndone = prefix_size; - rb->buf = toku_malloc(rb->size); - if (!rb->buf) r = ENOMEM; - else { - n = pread(fd, rb->buf, rb->size, offset); - if (n!=(int64_t)size) r = EINVAL; //Header might be useless (wrong size) or could be an error. - if (r==0) { - //check version (before checksum, since older versions didn't have checksums) - int version = rbuf_network_int(rb); - if (version != BRT_LAYOUT_VERSION_10) r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use - } - if (r==0) { - u_int32_t calculated_x1764 = x1764_memory(rb->buf, size-4); - u_int32_t stored_x1764 = toku_dtoh32(*(int*)(rb->buf+size-4)); - if (calculated_x1764!=stored_x1764) r = -1; //Header useless - else r = 0; - } - if (r==0) { - //Verify byte order - bytevec tmp_byte_order_check; - rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order - int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check; - if (byte_order_stored != toku_byte_order_host) r = EINVAL; //Cannot use + rb->size = prefix_size; + rb->ndone = 0; + rb->buf = prefix; + { + //Check magic number + bytevec magic; + rbuf_literal_bytes(rb, &magic, 8); + if (memcmp(magic,"tokudata",8)!=0) { + if ((*(u_int64_t*)magic) == 0) r = TOKUDB_DICTIONARY_NO_HEADER; + else r = EINVAL; //Not a tokudb file! Do not use. } - if (r==0) { - *checkpoint_count = rbuf_ulonglong(rb); - //Restart after 'size' - rb->ndone = prefix_size; + } + u_int32_t version = 0; + if (r==0) { + //Version MUST be in network order regardless of disk order. + version = rbuf_network_int(rb); + if (version < BRT_LAYOUT_VERSION_10) r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use + if (version > BRT_LAYOUT_VERSION_10) r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use + } + u_int32_t size; + if (r==0) { + const int64_t max_header_size = BLOCK_ALLOCATOR_HEADER_RESERVE; + int64_t min_header_size = serialize_brt_header_min_size(version); + //Size MUST be in network order regardless of disk order. + size = rbuf_network_int(rb); + //If too big, it is corrupt. We would probably notice during checksum + //but may have to do a multi-gigabyte malloc+read to find out. + //If its too small reading rbuf would crash, so verify. + if (size > max_header_size || size < min_header_size) r = TOKUDB_DICTIONARY_NO_HEADER; + } + if (r!=0) { + rb->buf = NULL; //Prevent freeing of 'prefix' + } + if (r==0) { + assert(rb->ndone==prefix_size); + rb->size = size; + rb->buf = toku_xmalloc(rb->size); + } + if (r==0) { + n = pread(fd, rb->buf, rb->size, offset); + if (n==-1) { + r = errno; + assert(r!=0); } + else if (n!=(int64_t)rb->size) r = EINVAL; //Header might be useless (wrong size) or could be a disk read error. } + //It's version 10 or later. Magic looks OK. + //We have an rbuf that represents the header. + //Size is within acceptable bounds. + if (r==0) { + //Verify checksum + u_int32_t calculated_x1764 = x1764_memory(rb->buf, rb->size-4); + u_int32_t stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4)); + if (calculated_x1764!=stored_x1764) r = TOKUDB_DICTIONARY_NO_HEADER; //Header useless + } + if (r==0) { + //Verify byte order + bytevec tmp_byte_order_check; + rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order + int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check; + if (byte_order_stored != toku_byte_order_host) r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary + } + if (r==0) { + //Load checkpoint count + *checkpoint_count = rbuf_ulonglong(rb); + //Restart at beginning during regular deserialization + rb->ndone = 0; + } + } + if (r!=0 && rb->buf) { + toku_free(rb->buf); + rb->buf = NULL; } - if (r!=0 && rb->buf) toku_free(rb->buf); return r; } @@ -1325,18 +1388,31 @@ int toku_deserialize_brtheader_from (int fd, struct brt_header **brth) { r1 = deserialize_brtheader_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1); } struct rbuf *rb = NULL; - if (r0==0) rb = &rb_0; - if (r1==0 && (r0!=0 || checkpoint_count_1 > checkpoint_count_0)) rb = &rb_1; + + if (r0!=TOKUDB_DICTIONARY_TOO_NEW && r1!=TOKUDB_DICTIONARY_TOO_NEW) { + if (r0==0) rb = &rb_0; + if (r1==0 && (r0!=0 || checkpoint_count_1 > checkpoint_count_0)) rb = &rb_1; + if (r0==0 && r1==0) assert(checkpoint_count_1 != checkpoint_count_0); + } int r = 0; if (rb==NULL) { - r = r0; - if (r1==TOKUDB_DICTIONARY_TOO_OLD) r = r1; + // We were unable to read either header or at least one is too new. + // Certain errors are higher priority than others. Order of these if/else if is important. + if (r0==TOKUDB_DICTIONARY_TOO_NEW || r1==TOKUDB_DICTIONARY_TOO_NEW) + r = TOKUDB_DICTIONARY_TOO_NEW; + else if (r0==TOKUDB_DICTIONARY_TOO_OLD || r1==TOKUDB_DICTIONARY_TOO_OLD) { + r = TOKUDB_DICTIONARY_TOO_OLD; + } + else if (r0==TOKUDB_DICTIONARY_NO_HEADER || r1==TOKUDB_DICTIONARY_NO_HEADER) { + r = TOKUDB_DICTIONARY_NO_HEADER; + } + else r = r0; //Arbitrarily report the error from the first header. assert(r!=0); } if (r==0) r = deserialize_brtheader(fd, rb, brth); - if (r0==0 && rb_0.buf) toku_free(rb_0.buf); - if (r1==0 && rb_1.buf) toku_free(rb_1.buf); + if (rb_0.buf) toku_free(rb_0.buf); + if (rb_1.buf) toku_free(rb_1.buf); return r; } diff --git a/newbrt/brt.c b/newbrt/brt.c index 708388b459313f68ca013cb53c3655b2ba906ada..125c49771399911da3250ce085761e26b28ef9b2 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -2968,7 +2968,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, int is_cre } if (is_create) { r = toku_read_brt_header_and_store_in_cachefile(t->cf, &t->h); - if (r==-1) { + if (r==TOKUDB_DICTIONARY_NO_HEADER) { r = toku_brt_alloc_init_header(t); if (r != 0) goto died_after_read_and_pin; } diff --git a/newbrt/recover.c b/newbrt/recover.c index b8de106d90116c9a1291fde77b3b13681b57828b..4343cd311ad509eeedf4e1079ed8af715e1dd14f 100644 --- a/newbrt/recover.c +++ b/newbrt/recover.c @@ -122,7 +122,7 @@ internal_toku_recover_fopen_or_fcreate (int flags, int mode, char *fixedfname, F assert(r==0); brt->cf=cf; r = toku_read_brt_header_and_store_in_cachefile(brt->cf, &brt->h); - if (r==-1) { + if (r==TOKUDB_DICTIONARY_NO_HEADER) { r = toku_brt_alloc_init_header(brt); } toku_recover_note_cachefile(filenum, cf, brt); diff --git a/src/ydb.c b/src/ydb.c index 0b4206d789eaceba566b962bf07579248a0fc66f..93b9a009bf0c69534859ccadc842ca4b15001c57 100644 --- a/src/ydb.c +++ b/src/ydb.c @@ -3243,7 +3243,7 @@ static int toku_db_remove(DB * db, const char *fname, const char *dbname, u_int3 //TODO: Verify DB* db not yet opened //TODO: Verify db file not in use. (all dbs in the file must be unused) r = toku_db_open(db, NULL, fname, dbname, DB_UNKNOWN, 0, S_IRWXU|S_IRWXG|S_IRWXO); - if (r==TOKUDB_DICTIONARY_TOO_OLD) { + if (r==TOKUDB_DICTIONARY_TOO_OLD || r==TOKUDB_DICTIONARY_TOO_NEW || r==TOKUDB_DICTIONARY_NO_HEADER) { need_close = FALSE; goto delete_db_file; }