refs #5710 add counters for the number of bytes read and time spent doing IO,...

refs #5710 add counters for the number of bytes read and time spent doing IO, for the various ways you can do a fetch. improve the name of get_tokutime() and get rid of the now defunct toku_current_time_nanoseconds() git-svn-id: file:///svn/toku/tokudb@50481 c7de825b-a66e-492c-adef-691d508d4ae1

refs #5710 add counters for the number of bytes read and time spent doing IO,...
refs #5710 add counters for the number of bytes read and time spent doing IO, for the various ways you can do a fetch. improve the name of get_tokutime() and get rid of the now defunct toku_current_time_nanoseconds() git-svn-id: file:///svn/toku/tokudb@50481 c7de825b-a66e-492c-adef-691d508d4ae1
3ebbf0cd · John Esmet · Yoni Fogel · 772d2ef8 · 3ebbf0cd · 3ebbf0cd
Commit 3ebbf0cd authored Nov 28, 2012 by John Esmet Committed by Yoni Fogel Apr 17, 2013
5 changed files
--- a/ft/ft-internal.h
+++ b/ft/ft-internal.h
@@ -91,6 +91,9 @@ struct ftnode_fetch_extra {
    // this value will be set during the fetch_callback call by toku_ftnode_fetch_callback or toku_ftnode_pf_req_callback
    // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
    int child_to_read;
+    // Accounting: How many bytes were fetched, and how much time did it take?
+    tokutime_t bytes_read;
+    uint64_t read_time;
 };

 struct toku_fifo_entry_key_msn_heaviside_extra {
@@ -718,6 +721,8 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h)
    bfe->right_is_pos_infty = false;
    bfe->child_to_read = -1;
    bfe->disable_prefetching = false;
+    bfe->bytes_read = 0;
+    bfe->read_time = 0;
 }

 //
@@ -747,6 +752,8 @@ static inline void fill_bfe_for_subset_read(
    bfe->right_is_pos_infty = right_is_pos_infty;
    bfe->child_to_read = -1;
    bfe->disable_prefetching = disable_prefetching;
+    bfe->bytes_read = 0;
+    bfe->read_time = 0;
 }

 //
@@ -766,6 +773,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
    bfe->right_is_pos_infty = false;
    bfe->child_to_read = -1;
    bfe->disable_prefetching = false;
+    bfe->bytes_read = 0;
+    bfe->read_time = 0;
 }

 static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
@@ -812,6 +821,8 @@ static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
    bfe->right_is_pos_infty = c->right_is_pos_infty;
    bfe->child_to_read = -1;
    bfe->disable_prefetching = c->disable_prefetching;
+    bfe->bytes_read = 0;
+    bfe->read_time = 0;
 }

 struct ancestors {
@@ -973,11 +984,9 @@ typedef enum {
    FT_CREATE_NONLEAF,                         // number of nonleaf nodes created
    FT_DESTROY_LEAF,                           // number of leaf nodes destroyed
    FT_DESTROY_NONLEAF,                        // number of nonleaf nodes destroyed
-    FT_MSG_KEYVAL_BYTES_IN,                    // how many bytes of keyval data ingested by the tree (all tree, no overhead counted)
    FT_MSG_BYTES_IN,                           // how many bytes of messages injected at root (for all trees)
    FT_MSG_BYTES_OUT,                          // how many bytes of messages flushed from h1 nodes to leaves
    FT_MSG_BYTES_CURR,                         // how many bytes of messages currently in trees (estimate)
-    //FT_MSG_BYTES_MAX,                          // how many bytes of messages currently in trees (estimate)
    FT_MSG_NUM,                                // how many messages injected at root
    FT_MSG_NUM_BROADCAST,                      // how many broadcast messages injected at root
    FT_NUM_BASEMENTS_DECOMPRESSED_NORMAL,      // how many basement nodes were decompressed because they were the target of a query
@@ -989,16 +998,38 @@ typedef enum {
    FT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH,
    FT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE,
    FT_NUM_PIVOTS_FETCHED_QUERY,               // how many pivots were fetched for a query
+    FT_BYTES_PIVOTS_FETCHED_QUERY,               // how many pivots were fetched for a query
+    FT_NANOTIME_PIVOTS_FETCHED_QUERY,               // how many pivots were fetched for a query
    FT_NUM_PIVOTS_FETCHED_PREFETCH,            // ... for a prefetch
+    FT_BYTES_PIVOTS_FETCHED_PREFETCH,            // ... for a prefetch
+    FT_NANOTIME_PIVOTS_FETCHED_PREFETCH,            // ... for a prefetch
    FT_NUM_PIVOTS_FETCHED_WRITE,               // ... for a write
+    FT_BYTES_PIVOTS_FETCHED_WRITE,               // ... for a write
+    FT_NANOTIME_PIVOTS_FETCHED_WRITE,               // ... for a write
    FT_NUM_BASEMENTS_FETCHED_NORMAL,           // how many basement nodes were fetched because they were the target of a query
+    FT_BYTES_BASEMENTS_FETCHED_NORMAL,           // how many basement nodes were fetched because they were the target of a query
+    FT_NANOTIME_BASEMENTS_FETCHED_NORMAL,           // how many basement nodes were fetched because they were the target of a query
    FT_NUM_BASEMENTS_FETCHED_AGGRESSIVE,       // ... because they were between lc and rc
+    FT_BYTES_BASEMENTS_FETCHED_AGGRESSIVE,       // ... because they were between lc and rc
+    FT_NANOTIME_BASEMENTS_FETCHED_AGGRESSIVE,       // ... because they were between lc and rc
    FT_NUM_BASEMENTS_FETCHED_PREFETCH,
+    FT_BYTES_BASEMENTS_FETCHED_PREFETCH,
+    FT_NANOTIME_BASEMENTS_FETCHED_PREFETCH,
    FT_NUM_BASEMENTS_FETCHED_WRITE,
+    FT_BYTES_BASEMENTS_FETCHED_WRITE,
+    FT_NANOTIME_BASEMENTS_FETCHED_WRITE,
    FT_NUM_MSG_BUFFER_FETCHED_NORMAL,          // how many msg buffers were fetched because they were the target of a query
+    FT_BYTES_MSG_BUFFER_FETCHED_NORMAL,          // how many msg buffers were fetched because they were the target of a query
+    FT_NANOTIME_MSG_BUFFER_FETCHED_NORMAL,          // how many msg buffers were fetched because they were the target of a query
    FT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE,      // ... because they were between lc and rc
+    FT_BYTES_MSG_BUFFER_FETCHED_AGGRESSIVE,      // ... because they were between lc and rc
+    FT_NANOTIME_MSG_BUFFER_FETCHED_AGGRESSIVE,      // ... because they were between lc and rc
    FT_NUM_MSG_BUFFER_FETCHED_PREFETCH,
+    FT_BYTES_MSG_BUFFER_FETCHED_PREFETCH,
+    FT_NANOTIME_MSG_BUFFER_FETCHED_PREFETCH,
    FT_NUM_MSG_BUFFER_FETCHED_WRITE,
+    FT_BYTES_MSG_BUFFER_FETCHED_WRITE,
+    FT_NANOTIME_MSG_BUFFER_FETCHED_WRITE,
    FT_PRO_NUM_ROOT_SPLIT,
    FT_PRO_NUM_ROOT_H0_INJECT,
    FT_PRO_NUM_ROOT_H1_INJECT,

--- a/ft/ft-ops.cc
+++ b/ft/ft-ops.cc
--- a/ft/ft_node-serialize.cc
+++ b/ft/ft_node-serialize.cc
@@ -1132,7 +1132,7 @@ static const int read_header_heuristic_max = 32*1024;
 #define MIN(a,b) (((a)>(b)) ? (b) : (a))
 #endif

-static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT h, struct rbuf *rb)
+static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKNUM blocknum, FT h, struct rbuf *rb, struct ftnode_fetch_extra *bfe)
 // Effect: If the header part of the node is small enough, then read it into the rbuf.  The rbuf will be allocated to be big enough in any case.
 {
    DISKOFF offset, size;
@@ -1142,11 +1142,15 @@ static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKN
    rbuf_init(rb, raw_block, read_size);
    {
        // read the block
+        tokutime_t io_t0 = toku_time_now();
        ssize_t rlen = toku_os_pread(fd, raw_block, read_size, offset);
+        tokutime_t io_t1 = toku_time_now();
        assert(rlen>=0);
        rbuf_init(rb, raw_block, rlen);
+        bfe->bytes_read = rlen;
+        bfe->read_time = io_t1 - io_t0;
+        toku_ft_status_update_pivot_fetch_reason(bfe);
    }
-    
 }

 //
@@ -1590,9 +1594,6 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
        goto cleanup;
    }

-    // We got the entire header and node info!
-    toku_ft_status_update_pivot_fetch_reason(bfe);
-
    // Finish reading compressed the sub_block
    bytevec* cp;
    cp = (bytevec*)&sb_node_info.compressed_ptr;
@@ -2409,8 +2410,7 @@ deserialize_ftnode_from_fd(int fd,
                            STAT64INFO info)
 {
    struct rbuf rb = RBUF_INITIALIZER;
-    read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb);
-
+    read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); 
    int r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd);
    if (r != 0) {
        dump_bad_block(rb.buf,rb.size);
@@ -2433,7 +2433,7 @@ toku_deserialize_ftnode_from (int fd,
    toku_trace("deserial start");
    int r = 0;
    struct rbuf rb = RBUF_INITIALIZER;
-    read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb);
+    read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb, bfe);

    r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd);
    if (r != 0) {

--- a/portability/CMakeLists.txt
+++ b/portability/CMakeLists.txt
@@ -15,7 +15,7 @@ add_library(${LIBTOKUPORTABILITY}_static STATIC ${tokuportability_srcs})
 maybe_add_gcov_to_libraries(${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static)
 set_property(TARGET ${LIBTOKUPORTABILITY} ${LIBTOKUPORTABILITY}_static APPEND PROPERTY COMPILE_DEFINITIONS _GNU_SOURCE)
 set_target_properties(${LIBTOKUPORTABILITY}_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})
+target_link_libraries(${LIBTOKUPORTABILITY} LINK_PUBLIC rt ${CMAKE_THREAD_LIBS_INIT} ${EXTRA_SYSTEM_LIBS})

 set_property(SOURCE file memory os_malloc portability toku_assert toku_rwlock APPEND PROPERTY
  COMPILE_DEFINITIONS TOKU_ALLOW_DEPRECATED=1)

--- a/portability/toku_time.h
+++ b/portability/toku_time.h
@@ -82,8 +82,8 @@ typedef uint64_t tokutime_t;             // Time type used in by tokutek timers.
 //
 double tokutime_to_seconds(tokutime_t)  __attribute__((__visibility__("default"))); // Convert tokutime to seconds.

-// Get tokutime.  We want this to be fast, so we expose the implementation as RDTSC.
-static inline tokutime_t get_tokutime (void) {
+// Get the value of tokutime for right now.  We want this to be fast, so we expose the implementation as RDTSC.
+static inline tokutime_t toku_time_now(void) {
    uint32_t lo, hi;
    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
    return (uint64_t)hi << 32 | lo;
@@ -95,11 +95,4 @@ static inline uint64_t toku_current_time_microsec(void) {
    return t.tv_sec * (1UL * 1000 * 1000) + t.tv_usec;
 }

-static inline uint64_t toku_current_time_nanosec(void) {
-    struct timespec t;
-    int r = toku_clock_gettime(CLOCK_REALTIME, &t);
-    assert(r == 0);
-    return t.tv_sec * (1UL * 1000 * 1000 * 1000) + t.tv_nsec;
-}
-
 #endif