Numerous small fixes to index_merge read time estimates code

a8456e68 · sergefp@mysql.com · 50f29b0e · a8456e68 · a8456e68 · a8456e68
Commit a8456e68 authored Dec 20, 2003 by sergefp@mysql.com
7 changed files
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -666,6 +666,17 @@ extern double		my_atof(const char*);
 #define FLT_MAX		((float)3.40282346638528860e+38)
 #endif

+/* Define missing math constants. */
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_E
+#define M_E 2.7182818284590452354
+#endif
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942
+#endif
+
 /*
  Max size that must be added to a so that we know Size to make
  adressable obj.

--- a/mysql-test/r/index_merge.result
+++ b/mysql-test/r/index_merge.result
@@ -109,7 +109,7 @@ key1	key2	key3	key4	key5	key6	key7	key8
 explain select * from t0 where 
 (key1 < 3 or key2 < 3) and (key3 < 4 or key4 < 4) and (key5 < 2 or key6 < 2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t0	index_merge	i1,i2,i3,i4,i5,i6	i5,i6	4,4	NULL	4	Using where
+1	SIMPLE	t0	index_merge	i1,i2,i3,i4,i5,i6	i1,i2	4,4	NULL	6	Using where
 explain select * from t0 where 
 (key1 < 3 or key2 < 3) and (key3 < 100);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra

--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -18,10 +18,6 @@

 #include "mysql_priv.h"

-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
 Item *create_func_abs(Item* a)
 {
  return new Item_func_abs(a);

--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -750,7 +750,7 @@ double Item_func_log2::val()
  double value=args[0]->val();
  if ((null_value=(args[0]->null_value || value <= 0.0)))
    return 0.0;
-  return log(value) / log(2.0);
+  return log(value) / M_LN2;
 }

 double Item_func_log10::val()

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -296,6 +296,9 @@ typedef struct st_qsel_param {
  char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
  bool quick;				// Don't calulate possible keys
+
+  uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
+  uint imerge_cost_buff_size; /* size of the buffer */
 } PARAM;

 static SEL_TREE * get_mm_parts(PARAM *param,Field *field,
@@ -953,6 +956,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
    param.table=head;
    param.keys=0;
    param.mem_root= &alloc;
+    param.imerge_cost_buff_size= 0;

    thd->no_errors=1;				// Don't warn about NULL
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
@@ -1011,7 +1015,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
 	ha_rows found_records;
 	double found_read_time= read_time;

-        if (!get_quick_select_params(tree, &param, needed_reg, true,
+        if (!get_quick_select_params(tree, &param, needed_reg, false,
                                     &found_read_time, &found_records,
                                     &best_key))
        {
@@ -1254,54 +1258,48 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
  */

  /*
-    It may be possible to use different keys for index_merge scans,
-     e.g. for query like 
+    It may be possible to use different keys for index_merge scans, e.g. for
+    query like 
    ...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
-    we have to make choice between key1 and key2 for one scan and 
-    between key3,key4 for another.
-    We assume we'll get the best way if we choose the best key read
-    inside each of the conjuncts. Comparison is done without 'using index'.
+    we have to make choice between key1 and key2 for one scan and between
+    key3, key4 for another.
+    We assume we'll get the best if we choose the best key read inside each
+    of the conjuncts.
  */
  for (SEL_TREE **ptree= imerge->trees;
       ptree != imerge->trees_next;
       ptree++)
  {
    SEL_ARG **tree_best_key;
-    uint keynr;
-
    tree_read_time= *read_time;
-    if (get_quick_select_params(*ptree, param, needed_reg, false,
+
+    if (get_quick_select_params(*ptree, param, needed_reg, true,
                                &tree_read_time, &tree_records,
                                &tree_best_key))
    {
      /*
-        Non-'index only' range scan on a one in index_merge key is more 
-        expensive than other available option. The entire index_merge will be
-        more expensive then, too. We continue here only to update SQL_SELECT 
-        members.
+        One of index scans in this index_merge is more expensive than entire
+        table read for another available option. The entire index_merge will 
+        be more expensive then, too. We continue here only to update 
+        SQL_SELECT members.
      */
      imerge_too_expensive= true;
    }

    if (imerge_too_expensive)
      continue;
-
+    
+    uint keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];
    imerge->best_keys[ptree - imerge->trees]= tree_best_key;
-    keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];    
+    imerge_cost += tree_read_time;

    if (pk_is_clustered && keynr == param->table->primary_key)
    {
-      /* This is a Clustered PK scan, it will be done without 'index only' */
-      imerge_cost += tree_read_time;
      have_cpk_scan= true;
      cpk_records= tree_records;
    }
    else
-    {
-      /* Non-CPK scan, calculate time to do it using 'index only' */
-      imerge_cost += get_index_only_read_time(param, tree_records,keynr);
      records_for_unique += tree_records;
-    }
  }  
  DBUG_PRINT("info",("index_merge cost of index reads: %g", imerge_cost));

@@ -1359,18 +1357,27 @@ static int get_index_merge_params(PARAM *param, key_map& needed_reg,
  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g", imerge_cost));

  /* PHASE 3: Add Unique operations cost */
-  double unique_cost= 
-    Unique::get_use_cost(param->mem_root, records_for_unique, 
+  register uint unique_calc_buff_size= 
+    Unique::get_cost_calc_buff_size(records_for_unique, 
+                                    param->table->file->ref_length,
+                                    param->thd->variables.sortbuff_size);
+  if (param->imerge_cost_buff_size < unique_calc_buff_size)
+  {
+    if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
+                                                     unique_calc_buff_size)))
+      DBUG_RETURN(1);
+    param->imerge_cost_buff_size= unique_calc_buff_size;
+  }
+
+  imerge_cost += 
+    Unique::get_use_cost(param->imerge_cost_buff, records_for_unique,
                         param->table->file->ref_length,
                         param->thd->variables.sortbuff_size);
-  if (unique_cost < 0.0)
-    DBUG_RETURN(1);

-  imerge_cost += unique_cost;
  DBUG_PRINT("info",("index_merge total cost: %g", imerge_cost));
  if (imerge_cost < *read_time)
  {
-    *read_time=   imerge_cost;    
+    *read_time=   imerge_cost;
    records_for_unique += cpk_records;
    *imerge_rows= min(records_for_unique, param->table->file->records);    
    DBUG_RETURN(0);
@@ -1415,8 +1422,8 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,
      tree        in         make range select for this SEL_TREE 
      param       in         parameters from test_quick_select
      needed_reg  in/out     other table data needed by this quick_select
-      index_read_can_be_used if false, assume that 'index only' option is not
-                             available.
+      index_read_must_be_used if true, assume 'index only' option will be set 
+                             (except for clustered PK indexes)
      read_time   out        read time estimate
      records     out        # of records estimate
      key_to_read out        SEL_ARG to be used for creating quick select
@@ -1424,16 +1431,17 @@ inline double get_index_only_read_time(PARAM* param, ha_rows records,

 static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
                                   key_map& needed_reg,
-                                   bool index_read_can_be_used,
+                                   bool index_read_must_be_used,
                                   double *read_time, ha_rows *records,
                                   SEL_ARG ***key_to_read)
 {
  int idx;
  int result = 1;
+  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
  /*
-    Note that there may be trees that have type SEL_TREE::KEY but contain 
-    no key reads at all. For example, tree for expression "key1 is not null"
-    where key1 is defined as "not null".
+    Note that there may be trees that have type SEL_TREE::KEY but contain no 
+    key reads at all, e.g. tree for expression "key1 is not null" where key1 
+    is defined as "not null".
  */
  SEL_ARG **key,**end;

@@ -1450,22 +1458,29 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
          (*key)->maybe_flag)
        needed_reg.set_bit(keynr);
      
-      bool read_index_only= index_read_can_be_used? 
-                            param->table->used_keys.is_set(keynr): false;
+      bool read_index_only= index_read_must_be_used? true :
+                            (bool)param->table->used_keys.is_set(keynr);
      found_records=check_quick_select(param, idx, *key);

      if (found_records != HA_POS_ERROR && found_records > 2 &&
          read_index_only &&
-          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY))
+          (param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY) &&
+          !(pk_is_clustered && keynr == param->table->primary_key))
      {
        /* We can resolve this by only reading through this key. */
        found_read_time=get_index_only_read_time(param, found_records, keynr);
      }
      else
+      {
+        /* 
+          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
+          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
+        */
 	found_read_time= (param->table->file->read_time(keynr,
 						        param->range_count,
 						        found_records)+
 			  (double) found_records / TIME_FOR_COMPARE);
+      }
      if (*read_time > found_read_time && found_records != HA_POS_ERROR)
      {
        *read_time=   found_read_time;

--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1233,8 +1233,16 @@ public:
  }

  bool get(TABLE *table);
-  static double get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size, 
+  static double get_use_cost(uint *buffer, uint nkeys, uint key_size, 
                             ulong max_in_memory_size);
+  inline static int get_cost_calc_buff_size(ulong nkeys, uint key_size, 
+                                            ulong max_in_memory_size)
+  {
+    register ulong max_elems_in_tree= 
+      (1 + max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
+    return sizeof(uint)*(1 + nkeys/max_elems_in_tree);
+  }
+
  friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
  friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
 };

--- a/sql/uniques.cc
+++ b/sql/uniques.cc
@@ -72,112 +72,161 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
 }


-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
+/*
+  Calculate log2(n!)
+  
+  NOTES
+    Stirling's approximate formula is used:
+          
+      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n 
+   
+    Derivation of formula used for calculations is as follows:

-#ifndef M_E
-#define M_E (exp((double)1.0))
-#endif
+    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
+    
+      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
+*/

 inline double log2_n_fact(double x)
 {
-  return (2 * (((x)+1)*log(((x)+1)/M_E) + log(2*M_PI*((x)+1))/2 ) / log(2));
+  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
 }

+
 /*
-  Calculate cost of merge_buffers call.
+  Calculate cost of merge_buffers function call for given sequence of 
+  input stream lengths and store the number of rows in result stream in *last.

-  NOTE
-    See comment near Unique::get_use_cost for cost formula derivation.
+  SYNOPSIS
+    get_merge_buffers_cost()
+      buff_elems  Array of #s of elements in buffers
+      elem_size   Size of element stored in buffer
+      output_buff Pointer to storage for result buffer size
+      first       Pointer to first merged element size
+      last        Pointer to last merged element size
+  
+  RETURN
+    Cost of merge_buffers operation in disk seeks.
+  
+  NOTES
+    It is assumed that no rows are eliminated during merge.
+    The cost is calculated as 
+    
+      cost(read_and_write) + cost(merge_comparisons).
+      
+    All bytes in the sequences is read and written back during merge so cost 
+    of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
+     
+    For comparisons cost calculations we assume that all merged sequences have
+    the same length, so each of total_buf_size elements will be added to a sort 
+    heap with (n_buffers-1) elements. This gives the comparison cost:
+
+      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
 */
-static double get_merge_buffers_cost(uint* buff_sizes, uint elem_size, 
-                                     int last, int f,int t)
+
+static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
+                                     uint *output_buff, uint *first,
+                                     uint *last)
 {  
-  uint sum= 0;
-  for (int i=f; i <= t; i++)
-    sum+= buff_sizes[i];
-  buff_sizes[last]= sum;
+  uint total_buf_elems= 0;
+  for (uint *pbuf= first; pbuf <= last; pbuf++)
+    total_buf_elems+= *pbuf;
+  *last= total_buf_elems;
  
-  int n_buffers= t - f + 1;
-  double buf_length= sum*elem_size;
+  int n_buffers= last - first + 1;

-  return (((double)buf_length/(n_buffers+1)) / IO_SIZE) * 2 * n_buffers + 
-     buf_length * log(n_buffers)  / (TIME_FOR_COMPARE_ROWID * log(2.0));
+  /* Using log2(n)=log(n)/log(2) formula */
+  return 2*((double)total_buf_elems*elem_size) / IO_SIZE + 
+     total_buf_elems*log(n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
 }

+
 /*
  Calculate cost of merging buffers into one in Unique::get, i.e. calculate
-  how long (in terms of disk seeks) the two call
+  how long (in terms of disk seeks) the two calls
    merge_many_buffs(...); 
    merge_buffers(...); 
  will take.

  SYNOPSIS
    get_merge_many_buffs_cost()
-      alloc         memory pool to use
-      maxbuffer     # of full buffers.
-      max_n_elems   # of elements in first maxbuffer buffers.
-      last_n_elems  # of elements in last buffer.
-      elem_size     size of buffer element.
+      buffer        buffer space for temporary data, at least 
+                    Unique::get_cost_calc_buff_size bytes
+      maxbuffer     # of full buffers
+      max_n_elems   # of elements in first maxbuffer buffers
+      last_n_elems  # of elements in last buffer
+      elem_size     size of buffer element

  NOTES
-    It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
-    contain max_n_elems each, last buffer contains last_n_elems elements.
+    maxbuffer+1 buffers are merged, where first maxbuffer buffers contain 
+    max_n_elems elements each and last buffer contains last_n_elems elements.

    The current implementation does a dumb simulation of merge_many_buffs
-    actions.
+    function actions.
  
  RETURN
-    >=0  Cost of merge in disk seeks.
-    <0   Out of memory.
+    Cost of merge in disk seeks.
 */
-static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
+
+static double get_merge_many_buffs_cost(uint *buffer,
                                        uint maxbuffer, uint max_n_elems,
                                        uint last_n_elems, int elem_size)
 {
  register int i;
  double total_cost= 0.0;
-  int    lastbuff;
-  uint*  buff_sizes;
-  
-  if (!(buff_sizes= (uint*)alloc_root(alloc, sizeof(uint) * (maxbuffer + 1))))
-    return -1.0;
+  uint *buff_elems= buffer; /* #s of elements in each of merged sequences */
+  uint *lastbuff;
+   
+  /* 
+    Set initial state: first maxbuffer sequences contain max_n_elems elements
+    each, last sequence contains last_n_elems elements.
+  */
  for(i = 0; i < (int)maxbuffer; i++)
-    buff_sizes[i]= max_n_elems;
-  
-  buff_sizes[maxbuffer]= last_n_elems;
+    buff_elems[i]= max_n_elems;  
+  buff_elems[maxbuffer]= last_n_elems;

+  /* 
+    Do it exactly as merge_many_buff function does, calling 
+    get_merge_buffers_cost to get cost of merge_buffers.
+  */
  if (maxbuffer >= MERGEBUFF2)
  {
-    /* Simulate merge_many_buff */
    while (maxbuffer >= MERGEBUFF2)
    {
      lastbuff=0;
      for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
-        total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 
-                                             lastbuff++, i, i+MERGEBUFF-1);
+        total_cost+=get_merge_buffers_cost(buff_elems, elem_size, lastbuff++,
+                                           buff_elems + i, 
+                                           buff_elems + i + MERGEBUFF-1);
      
-      total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 
-                                           lastbuff++, i, maxbuffer);
+      total_cost+=get_merge_buffers_cost(buff_elems, elem_size, lastbuff++,
+                                         buff_elems + i, 
+                                         buff_elems + maxbuffer);
      maxbuffer= (uint)lastbuff-1;
    }
  }
  
  /* Simulate final merge_buff call. */
-  total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 0, 0, 
-                                       maxbuffer);
+  total_cost += get_merge_buffers_cost(buff_elems, elem_size, buff_elems, 
+                                       buff_elems, buff_elems + maxbuffer);
  return total_cost;
 }


 /*
-  Calclulate cost of using Unique for processing nkeys elements of size 
+  Calculate cost of using Unique for processing nkeys elements of size 
  key_size using max_in_memory_size memory.
+
+  SYNOPSIS
+    Unique::get_use_cost()
+      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
+                to get # bytes needed.
+      nkeys     #of elements in Unique
+      key_size  size of each elements in bytes
+      max_in_memory_size amount of memory Unique will be allowed to use
  
  RETURN
-    >=0  Cost in disk seeks.
-    <0   Out of memory.
+    Cost in disk seeks.
  
  NOTES
    cost(using_unqiue) = 
@@ -190,16 +239,14 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
      comparisons, where n runs from 1 tree_size (we assume that all added
      elements are different). Together this gives:
    
-      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
+      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
  
-      = 2*ln((N+1)!) / ln(2) = {using Stirling formula} = 
-
-      = 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
-
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;

      Total cost of creating trees:
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
+
+      Approximate value of log2(N!) is calculated by log2_n_fact function.
    
    2. Cost of merging.
      If only one tree is created by Unique no merging will be necessary.
@@ -213,7 +260,7 @@ static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
      these will be random seeks.
 */

-double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size, 
+double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, 
                            ulong max_in_memory_size)
 {
  ulong max_elements_in_tree;
@@ -221,15 +268,16 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
  int   n_full_trees; /* number of trees in unique - 1 */
  double result;
  
-  max_elements_in_tree= max_in_memory_size / 
-                        ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+  max_elements_in_tree= 
+    max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+
  n_full_trees=    nkeys / max_elements_in_tree;
  last_tree_elems= nkeys % max_elements_in_tree;
  
  /* Calculate cost of creating trees */
-  result= log2_n_fact(last_tree_elems);
+  result= 2*log2_n_fact(last_tree_elems + 1.0);
  if (n_full_trees)
-    result+= n_full_trees * log2_n_fact(max_elements_in_tree);
+    result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
  result /= TIME_FOR_COMPARE_ROWID;

  DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
@@ -241,13 +289,15 @@ double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
  
  /* 
    There is more then one tree and merging is necessary.
-    First, add cost of writing all trees to disk. 
+    First, add cost of writing all trees to disk, assuming that all disk
+    writes are sequential.
  */
-  result += n_full_trees * ceil(key_size*max_elements_in_tree / IO_SIZE);
-  result += ceil(key_size*last_tree_elems / IO_SIZE);
+  result += DISK_SEEK_BASE_COST * n_full_trees * 
+              ceil(key_size*max_elements_in_tree / IO_SIZE);
+  result += DISK_SEEK_BASE_COST * ceil(key_size*last_tree_elems / IO_SIZE);

  /* Cost of merge */
-  double merge_cost= get_merge_many_buffs_cost(alloc, n_full_trees,
+  double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
                                               max_elements_in_tree,
                                               last_tree_elems, key_size);
  if (merge_cost < 0.0)