Commit 802937ad authored by unknown's avatar unknown

Precise read time estimates for index_merge/Unique

parent aa6b0bcb
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3,t4;
create table t0
key1 int not null,
......@@ -3,7 +3,7 @@
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3,t4;
# Create and fill a table with simple keys
......@@ -88,9 +88,9 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
FILESORT_INFO table_sort;
don't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT. work with a copy of it and put it back at the
end when index_merge select has finished with it.
Don't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
when index_merge select has finished with it.
memcpy(&table_sort, &table->sort, sizeof(FILESORT_INFO));
table->sort.io_cache= NULL;
......@@ -452,7 +452,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
if (quick_select)
index_merge quick select uses table->sort when retrieving rows, so free
index_merge quick select uses table->sort when retrieving rows, so free
resoures it has allocated.
......@@ -167,7 +167,7 @@ class ha_berkeley: public handler
longlong get_auto_increment();
void print_error(int error, myf errflag);
uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
bool primary_key_is_clustered_covering() { return true; }
bool primary_key_is_clustered() { return true; }
extern bool berkeley_skip, berkeley_shared_data;
......@@ -2003,7 +2003,8 @@ build_template(
update field->query_id so that the formula
thd->query_id == field->query_id did not work. */
ibool index_contains_field = dict_index_contains_col_or_prefix(index, i);
ibool index_contains_field=
dict_index_contains_col_or_prefix(index, i);
if (templ_type == ROW_MYSQL_REC_FIELDS &&
((prebuilt->read_just_key && !index_contains_field) ||
......@@ -187,7 +187,7 @@ class ha_innobase: public handler
void init_table_handle_for_HANDLER();
longlong get_auto_increment();
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
bool primary_key_is_clustered_covering() { return true; }
bool primary_key_is_clustered() { return true; }
extern bool innodb_skip;
......@@ -378,10 +378,10 @@ public:
true primary key (if there is one) is clustered key covering all fields
true Primary key (if there is one) is clustered key covering all fields
false otherwise
virtual bool primary_key_is_clustered_covering() { return false; }
virtual bool primary_key_is_clustered() { return false; }
/* Some extern variables used with handlers */
......@@ -118,6 +118,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
#define TIME_FOR_COMPARE 5 // 5 compares == one read
Number of comparisons of table rowids equivalent to reading one row from a
For sequential disk seeks the cost formula is:
The cost of average seek
#define DISK_SEEK_BASE_COST ((double)0.5)
#define BLOCKS_IN_AVG_SEEK 128
Number of rows in a reference table when refereed through a not unique key.
This value is only used when we don't know anything about the key
This diff is collapsed.
......@@ -118,11 +118,13 @@ public:
friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
const key_map* needed_reg);
friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref);
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref);
friend bool get_quick_keys(struct st_qsel_param *param,
SEL_ARG *key_tree,char *min_key,uint min_key_flag,
SEL_ARG *key_tree,
char *min_key, uint min_key_flag,
char *max_key, uint max_key_flag);
friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx,
SEL_ARG *key_tree,
......@@ -160,58 +162,62 @@ public:
QUICK_INDEX_MERGE_SELECT - index_merge acces method quick select.
QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
Current implementation doesn't detect all cases where index_merge could be
used, in particular:
* index_merge will never be used if range scan is possible (even if range
scan is more expensive)
Current implementation doesn't detect all cases where index_merge could
be used, in particular:
* index_merge will never be used if range scan is possible (even if
range scan is more expensive)
* index_merge+'using index' is not supported (this the consequence of the
above restriction)
* index_merge+'using index' is not supported (this the consequence of
the above restriction)
* If WHERE part contains complex nested AND and OR conditions, some ways to
retrieve rows using index_merge will not be considered. The choice of
read plan may depend on the order of conjuncts/disjuncts in WHERE part of
the query, see comments near SEL_IMERGE::or_sel_tree_with_checks and
imerge_list_or_list function for details.
* If WHERE part contains complex nested AND and OR conditions, some ways
to retrieve rows using index_merge will not be considered. The choice
of read plan may depend on the order of conjuncts/disjuncts in WHERE
part of the query, see comments near imerge_list_or_list and
SEL_IMERGE::or_sel_tree_with_checks functions for details.
* there is no "index_merge_ref" method (but index_merge on non-first table
in join is possible with 'range checked for each record').
* There is no "index_merge_ref" method (but index_merge on non-first
table in join is possible with 'range checked for each record').
See comments around SEL_IMERGE class and test_quick_select for more details.
See comments around SEL_IMERGE class and test_quick_select for more
index_merge uses Unique class for duplicates removal. Index merge takes
advantage of clustered covering primary key (CCPK) if the table has one.
The algorithm is as follows:
index_merge uses Unique class for duplicates removal. index_merge takes
advantage of Clustered Primary Key (CPK) if the table has one.
The index_merge algorithm consists of two phases:
prepare() //implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique
activate 'index only';
while(retrieve next row for non-CCPK scan)
Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
if (there is a CCPK scan and row will be retrieved by it)
skip this row;
put rowid into Unique;
activate 'index only';
while(retrieve next row for non-CPK scan)
if (there is a CPK scan and row will be retrieved by it)
skip this row;
put its rowid into Unique;
deactivate 'index only';
deactivate 'index only';
fetch() //implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for CCPK scan;
Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for CPK scan;
......@@ -239,10 +245,10 @@ public:
/* last element in quick_selects list */
QUICK_RANGE_SELECT* last_quick_select;
/* quick select that uses Covering Clustered Primary Key (NULL if none) */
/* quick select that uses clustered primary key (NULL if none) */
QUICK_RANGE_SELECT* pk_quick_select;
/* true if this select is currently doing a CCPK scan */
/* true if this select is currently doing a clustered PK scan */
bool doing_pk_scan;
Unique *unique;
......@@ -98,7 +98,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
else if (select && select->quick)
//&& (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
DBUG_PRINT("info",("using rr_quick"));
......@@ -1233,7 +1233,8 @@ public:
bool get(TABLE *table);
static double get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
ulong max_in_memory_size);
friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
......@@ -63,12 +63,194 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
/* If the following fail's the next add will also fail */
my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
If you change the following, change it in get_max_elements function, too.
max_elements= max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size);
open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
#ifndef M_PI
#define M_PI 3.14159265358979323846
#define M_E (exp(1))
inline double log2_n_fact(double x)
return (2 * ( ((x)+1) * log(((x)+1)/M_E) + log(2*M_PI*((x)+1))/2 ) / log(2));
Calculate cost of merge_buffers call.
See comment near Unique::get_use_cost for cost formula derivation.
static double get_merge_buffers_cost(uint* buff_sizes, uint elem_size,
int last, int f,int t)
uint sum= 0;
for (int i=f; i <= t; i++)
sum+= buff_sizes[i];
buff_sizes[last]= sum;
int n_buffers= t - f + 1;
double buf_length= sum*elem_size;
return (((double)buf_length/(n_buffers+1)) / IO_SIZE) * 2 * n_buffers +
buf_length * log(n_buffers) / (TIME_FOR_COMPARE_ROWID * log(2.0));
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
how long (in terms of disk seeks) the two call
will take.
alloc memory pool to use
maxbuffer # of full buffers.
max_n_elems # of elements in first maxbuffer buffers.
last_n_elems # of elements in last buffer.
elem_size size of buffer element.
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
contain max_n_elems each, last buffer contains last_n_elems elements.
The current implementation does a dumb simulation of merge_many_buffs
>=0 Cost of merge in disk seeks.
<0 Out of memory.
static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
uint maxbuffer, uint max_n_elems,
uint last_n_elems, int elem_size)
register int i;
double total_cost= 0.0;
int lastbuff;
uint* buff_sizes;
if (!(buff_sizes= (uint*)alloc_root(alloc, sizeof(uint) * (maxbuffer + 1))))
return -1.0;
for(i = 0; i < (int)maxbuffer; i++)
buff_sizes[i]= max_n_elems;
buff_sizes[maxbuffer]= last_n_elems;
if (maxbuffer >= MERGEBUFF2)
/* Simulate merge_many_buff */
while (maxbuffer >= MERGEBUFF2)
for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
lastbuff++, i, i+MERGEBUFF-1);
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
lastbuff++, i, maxbuffer);
maxbuffer= (uint)lastbuff-1;
/* Simulate final merge_buff call. */
total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 0, 0,
return total_cost;
Calclulate cost of using Unique for processing nkeys elements of size
key_size using max_in_memory_size memory.
Use cost as # of disk seeks.
cost(using_unqiue) =
cost(create_trees) + (see #1)
cost(merge) + (see #2)
cost(read_result) (see #3)
1. Cost of trees creation
For each Unique::put operation there will be 2*log2(n+1) elements
comparisons, where n runs from 1 tree_size (we assume that all added
elements are different). Together this gives:
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
Total cost of creating trees:
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
2. Cost of merging.
If only one tree is created by Unique no merging will be necessary.
Otherwise, we model execution of merge_many_buff function and count
#of merges. (The reason behind this is that number of buffers is small,
while size of buffers is big and we don't want to loose precision with
O(x)-style formula)
3. If only one tree is created by Unique no disk io will happen.
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
these will be random seeks.
double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
ulong max_in_memory_size)
ulong max_elements_in_tree;
ulong last_tree_elems;
int n_full_trees; /* number of trees in unique - 1 */
double result;
max_elements_in_tree= max_in_memory_size /
n_full_trees= nkeys / max_elements_in_tree;
last_tree_elems= nkeys % max_elements_in_tree;
/* Calculate cost of creating trees */
result= log2_n_fact(last_tree_elems);
if (n_full_trees)
result+= n_full_trees * log2_n_fact(max_elements_in_tree);
/* Calculate cost of merging */
if (!n_full_trees)
return result;
/* There is more then one tree and merging is necessary. */
/* Add cost of writing all trees to disk. */
result += n_full_trees * ceil(key_size*max_elements_in_tree / IO_SIZE);
result += ceil(key_size*last_tree_elems / IO_SIZE);
/* Cost of merge */
result += get_merge_many_buffs_cost(alloc, n_full_trees,
last_tree_elems, key_size);
Add cost of reading the resulting sequence, assuming there were no
duplicate elements.
result += ceil((double)key_size*nkeys/IO_SIZE);
return result;
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment