Commit 67c6d511 authored by sergefp@mysql.com's avatar sergefp@mysql.com

Precise read time estimates for index_merge/Unique

parent 20295cf1
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3,t4;
create table t0
(
key1 int not null,
......
......@@ -3,7 +3,7 @@
#
--disable_warnings
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3,t4;
--enable_warnings
# Create and fill a table with simple keys
......
......@@ -88,9 +88,9 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
#endif
FILESORT_INFO table_sort;
/*
don't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT. work with a copy of it and put it back at the
end when index_merge select has finished with it.
Don't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
when index_merge select has finished with it.
*/
memcpy(&table_sort, &table->sort, sizeof(FILESORT_INFO));
table->sort.io_cache= NULL;
......@@ -452,7 +452,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
if (quick_select)
{
/*
index_merge quick select uses table->sort when retrieving rows, so free
index_merge quick select uses table->sort when retrieving rows, so free
resoures it has allocated.
*/
end_read_record(&read_record_info);
......
......@@ -167,7 +167,7 @@ class ha_berkeley: public handler
longlong get_auto_increment();
void print_error(int error, myf errflag);
uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
bool primary_key_is_clustered_covering() { return true; }
bool primary_key_is_clustered() { return true; }
};
extern bool berkeley_skip, berkeley_shared_data;
......
......@@ -2003,7 +2003,8 @@ build_template(
update field->query_id so that the formula
thd->query_id == field->query_id did not work. */
ibool index_contains_field = dict_index_contains_col_or_prefix(index, i);
ibool index_contains_field=
dict_index_contains_col_or_prefix(index, i);
if (templ_type == ROW_MYSQL_REC_FIELDS &&
((prebuilt->read_just_key && !index_contains_field) ||
......
......@@ -187,7 +187,7 @@ class ha_innobase: public handler
void init_table_handle_for_HANDLER();
longlong get_auto_increment();
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
bool primary_key_is_clustered_covering() { return true; }
bool primary_key_is_clustered() { return true; }
};
extern bool innodb_skip;
......
......@@ -378,10 +378,10 @@ public:
/*
RETURN
true primary key (if there is one) is clustered key covering all fields
true Primary key (if there is one) is clustered key covering all fields
false otherwise
*/
virtual bool primary_key_is_clustered_covering() { return false; }
virtual bool primary_key_is_clustered() { return false; }
};
/* Some extern variables used with handlers */
......
......@@ -118,6 +118,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
*/
#define TIME_FOR_COMPARE 5 // 5 compares == one read
/*
Number of comparisons of table rowids equivalent to reading one row from a
table.
*/
#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2)
/*
For sequential disk seeks the cost formula is:
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
The cost of average seek
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
*/
#define DISK_SEEK_BASE_COST ((double)0.5)
#define BLOCKS_IN_AVG_SEEK 128
#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
/*
Number of rows in a reference table when refereed through a not unique key.
This value is only used when we don't know anything about the key
......
This diff is collapsed.
......@@ -118,11 +118,13 @@ public:
protected:
friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
const key_map* needed_reg);
friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref);
friend
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
struct st_table_ref *ref);
friend bool get_quick_keys(struct st_qsel_param *param,
QUICK_RANGE_SELECT *quick,KEY_PART *key,
SEL_ARG *key_tree,char *min_key,uint min_key_flag,
SEL_ARG *key_tree,
char *min_key, uint min_key_flag,
char *max_key, uint max_key_flag);
friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx,
SEL_ARG *key_tree,
......@@ -160,58 +162,62 @@ public:
/*
QUICK_INDEX_MERGE_SELECT - index_merge acces method quick select.
QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
QUICK_INDEX_MERGE_SELECT uses
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
QUICK_INDEX_MERGE_SELECT uses
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could be
used, in particular:
* index_merge will never be used if range scan is possible (even if range
scan is more expensive)
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could
be used, in particular:
* index_merge will never be used if range scan is possible (even if
range scan is more expensive)
* index_merge+'using index' is not supported (this the consequence of the
above restriction)
* index_merge+'using index' is not supported (this the consequence of
the above restriction)
* If WHERE part contains complex nested AND and OR conditions, some ways to
retrieve rows using index_merge will not be considered. The choice of
read plan may depend on the order of conjuncts/disjuncts in WHERE part of
the query, see comments near SEL_IMERGE::or_sel_tree_with_checks and
imerge_list_or_list function for details.
* If WHERE part contains complex nested AND and OR conditions, some ways
to retrieve rows using index_merge will not be considered. The choice
of read plan may depend on the order of conjuncts/disjuncts in WHERE
part of the query, see comments near imerge_list_or_list and
SEL_IMERGE::or_sel_tree_with_checks functions for details.
* there is no "index_merge_ref" method (but index_merge on non-first table
in join is possible with 'range checked for each record').
* There is no "index_merge_ref" method (but index_merge on non-first
table in join is possible with 'range checked for each record').
See comments around SEL_IMERGE class and test_quick_select for more details.
See comments around SEL_IMERGE class and test_quick_select for more
details.
ROW RETRIEVAL ALGORITHM
ROW RETRIEVAL ALGORITHM
index_merge uses Unique class for duplicates removal. Index merge takes
advantage of clustered covering primary key (CCPK) if the table has one.
The algorithm is as follows:
index_merge uses Unique class for duplicates removal. index_merge takes
advantage of Clustered Primary Key (CPK) if the table has one.
The index_merge algorithm consists of two phases:
prepare() //implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique
{
activate 'index only';
while(retrieve next row for non-CCPK scan)
Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
prepare()
{
if (there is a CCPK scan and row will be retrieved by it)
skip this row;
else
put rowid into Unique;
activate 'index only';
while(retrieve next row for non-CPK scan)
{
if (there is a CPK scan and row will be retrieved by it)
skip this row;
else
put its rowid into Unique;
}
deactivate 'index only';
}
deactivate 'index only';
}
fetch() //implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls
{
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for CCPK scan;
}
Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
calls):
fetch()
{
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for CPK scan;
}
*/
class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
......@@ -239,10 +245,10 @@ public:
/* last element in quick_selects list */
QUICK_RANGE_SELECT* last_quick_select;
/* quick select that uses Covering Clustered Primary Key (NULL if none) */
/* quick select that uses clustered primary key (NULL if none) */
QUICK_RANGE_SELECT* pk_quick_select;
/* true if this select is currently doing a CCPK scan */
/* true if this select is currently doing a clustered PK scan */
bool doing_pk_scan;
Unique *unique;
......
......@@ -98,7 +98,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
}
}
else if (select && select->quick)
//&& (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
{
DBUG_PRINT("info",("using rr_quick"));
info->read_record=rr_quick;
......
......@@ -1233,7 +1233,8 @@ public:
}
bool get(TABLE *table);
static double get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
ulong max_in_memory_size);
friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
};
......
......@@ -63,12 +63,194 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
comp_func_fixed_arg);
/* If the following fail's the next add will also fail */
my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements= max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size);
open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
MYF(MY_WME));
}
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define M_E (exp(1))
inline double log2_n_fact(double x)
{
return (2 * ( ((x)+1) * log(((x)+1)/M_E) + log(2*M_PI*((x)+1))/2 ) / log(2));
}
/*
Calculate cost of merge_buffers call.
NOTE
See comment near Unique::get_use_cost for cost formula derivation.
*/
static double get_merge_buffers_cost(uint* buff_sizes, uint elem_size,
int last, int f,int t)
{
uint sum= 0;
for (int i=f; i <= t; i++)
sum+= buff_sizes[i];
buff_sizes[last]= sum;
int n_buffers= t - f + 1;
double buf_length= sum*elem_size;
return (((double)buf_length/(n_buffers+1)) / IO_SIZE) * 2 * n_buffers +
buf_length * log(n_buffers) / (TIME_FOR_COMPARE_ROWID * log(2.0));
}
/*
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
how long (in terms of disk seeks) the two call
merge_many_buffs(...);
merge_buffers(...);
will take.
SYNOPSIS
get_merge_many_buffs_cost()
alloc memory pool to use
maxbuffer # of full buffers.
max_n_elems # of elements in first maxbuffer buffers.
last_n_elems # of elements in last buffer.
elem_size size of buffer element.
NOTES
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
contain max_n_elems each, last buffer contains last_n_elems elements.
The current implementation does a dumb simulation of merge_many_buffs
actions.
RETURN
>=0 Cost of merge in disk seeks.
<0 Out of memory.
*/
static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
uint maxbuffer, uint max_n_elems,
uint last_n_elems, int elem_size)
{
register int i;
double total_cost= 0.0;
int lastbuff;
uint* buff_sizes;
if (!(buff_sizes= (uint*)alloc_root(alloc, sizeof(uint) * (maxbuffer + 1))))
return -1.0;
for(i = 0; i < (int)maxbuffer; i++)
buff_sizes[i]= max_n_elems;
buff_sizes[maxbuffer]= last_n_elems;
if (maxbuffer >= MERGEBUFF2)
{
/* Simulate merge_many_buff */
while (maxbuffer >= MERGEBUFF2)
{
lastbuff=0;
for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
lastbuff++, i, i+MERGEBUFF-1);
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
lastbuff++, i, maxbuffer);
maxbuffer= (uint)lastbuff-1;
}
}
/* Simulate final merge_buff call. */
total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 0, 0,
maxbuffer);
return total_cost;
}
/*
Calclulate cost of using Unique for processing nkeys elements of size
key_size using max_in_memory_size memory.
RETURN
Use cost as # of disk seeks.
NOTES
cost(using_unqiue) =
cost(create_trees) + (see #1)
cost(merge) + (see #2)
cost(read_result) (see #3)
1. Cost of trees creation
For each Unique::put operation there will be 2*log2(n+1) elements
comparisons, where n runs from 1 tree_size (we assume that all added
elements are different). Together this gives:
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
Total cost of creating trees:
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
2. Cost of merging.
If only one tree is created by Unique no merging will be necessary.
Otherwise, we model execution of merge_many_buff function and count
#of merges. (The reason behind this is that number of buffers is small,
while size of buffers is big and we don't want to loose precision with
O(x)-style formula)
3. If only one tree is created by Unique no disk io will happen.
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
these will be random seeks.
*/
double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
ulong max_in_memory_size)
{
ulong max_elements_in_tree;
ulong last_tree_elems;
int n_full_trees; /* number of trees in unique - 1 */
double result;
max_elements_in_tree= max_in_memory_size /
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
n_full_trees= nkeys / max_elements_in_tree;
last_tree_elems= nkeys % max_elements_in_tree;
/* Calculate cost of creating trees */
result= log2_n_fact(last_tree_elems);
if (n_full_trees)
result+= n_full_trees * log2_n_fact(max_elements_in_tree);
result /= TIME_FOR_COMPARE_ROWID;
/* Calculate cost of merging */
if (!n_full_trees)
return result;
/* There is more then one tree and merging is necessary. */
/* Add cost of writing all trees to disk. */
result += n_full_trees * ceil(key_size*max_elements_in_tree / IO_SIZE);
result += ceil(key_size*last_tree_elems / IO_SIZE);
/* Cost of merge */
result += get_merge_many_buffs_cost(alloc, n_full_trees,
max_elements_in_tree,
last_tree_elems, key_size);
/*
Add cost of reading the resulting sequence, assuming there were no
duplicate elements.
*/
result += ceil((double)key_size*nkeys/IO_SIZE);
return result;
}
Unique::~Unique()
{
close_cached_file(&file);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment