Commit 90554986 authored by Igor Babaev's avatar Igor Babaev

Fixed several bugs for mwl #253.

One of them is quite serious: the function table_cond_selectivity used
the TABLE_REF structure for ref/eq_ref access methods as if they had been
filled. In  fact these structure are filled after the best execution plan
has been chosen.

The other bugs happened due to:
- an erroneous attempt at get statistics on the result of materialization
  of a view
- incorrect handling of ranges with no left/right limits when calculating
  selectivity of range conditions on non-indexed columns
- lack of cleanup for some newly introduced fields
 
parent e59e5296
This diff is collapsed.
This diff is collapsed.
......@@ -31,6 +31,119 @@ customer, lineitem, nation, orders, part, partsupp, region, supplier;
--enable_result_log
--enable_query_log
--echo === Q15 ===
create view revenue0 (supplier_no, total_revenue) as
select l_suppkey, sum(l_extendedprice * (1 - l_discount))
from lineitem
where
l_shipdate >= '1995-08-01'
and l_shipdate < date_add('1995-08-01', interval 90 day)
group by l_suppkey;
let $Q15=
select s_suppkey, s_name, s_address, s_phone, total_revenue
from supplier, revenue0
where s_suppkey = supplier_no
and total_revenue = (select max(total_revenue) from revenue0)
order by s_suppkey;
set @save_optimizer_switch=@@optimizer_switch;
set optimizer_switch='index_condition_pushdown=off';
set optimizer_use_condition_selectivity=1;
eval EXPLAIN EXTENDED $Q15;
eval $Q15;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q15;
eval $Q15;
set optimizer_switch=@save_optimizer_switch;
drop view revenue0;
--echo === Q16 ===
let $Q16=
select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt
from partsupp, part
where p_partkey = ps_partkey
and p_brand <> 'Brand#11'
and p_type not like 'SMALL POLISHED%'
and p_size in (49, 37, 27, 5, 40, 6, 22, 8)
and ps_suppkey not in (select s_suppkey from supplier
where s_comment like '%Customer%Complaints%')
group by p_brand, p_type, p_size
order by supplier_cnt desc, p_brand, p_type, p_size;
set optimizer_use_condition_selectivity=1;
eval EXPLAIN EXTENDED $Q16;
eval $Q16;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q16;
eval $Q16;
set optimizer_use_condition_selectivity=4;
eval EXPLAIN EXTENDED $Q16;
eval $Q16;
--echo === Q18 ===
let $Q18=
select
c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity)
from customer, orders, lineitem
where
o_orderkey in (select l_orderkey from lineitem
group by l_orderkey having sum(l_quantity) > 250)
and c_custkey = o_custkey
and o_orderkey = l_orderkey
group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
order by o_totalprice desc, o_orderdate;
set optimizer_use_condition_selectivity=1;
eval EXPLAIN EXTENDED $Q18;
eval $Q18;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q18;
eval $Q18;
--echo === Q22 ===
let $Q22=
select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal
from (
select substr(c_phone, 1, 2) as cntrycode, c_acctbal
from customer
where
substr(c_phone, 1, 2) in ('10', '20', '14', '19', '11', '28', '25')
and c_acctbal > (select avg(c_acctbal) from customer
where c_acctbal > 0.00
and substr(c_phone, 1, 2) in
('10', '20', '14', '19', '11', '28', '25'))
and not exists (select * from orders where o_custkey = c_custkey)
) as vip
group by cntrycode
order by cntrycode;
set optimizer_use_condition_selectivity=1;
eval EXPLAIN EXTENDED $Q22;
eval $Q22;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q22;
eval $Q22;
--echo === Q20 ===
let $Q20=
select sql_calc_found_rows
s_name, s_address
......@@ -51,11 +164,11 @@ and n_name = 'UNITED STATES'
order by s_name
limit 10;
set optimizer_use_condition_selectivity=1;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
......@@ -66,7 +179,6 @@ flush table part;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
set optimizer_use_condition_selectivity=4;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
......@@ -79,6 +191,7 @@ ANALYZE TABLE nation PERSISTENT FOR COLUMNS(n_name) INDEXES();
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
DROP DATABASE dbt3_s001;
set histogram_size=@save_histogram_size;
......
......@@ -6878,7 +6878,8 @@ double JOIN::get_examined_rows()
static
double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
table_map rem_tables, TABLE_REF *ref)
table_map rem_tables, uint keyparts,
uint16 *ref_keyuse_steps)
{
double sel= 1.0;
COND_EQUAL *cond_equal= join->cond_equal;
......@@ -6886,15 +6887,15 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
if (!cond_equal || !cond_equal->current_level.elements)
return sel;
Item_equal *item_equal;
List_iterator_fast<Item_equal> it(cond_equal->current_level);
table_map table_bit= s->table->map;
if (!s->keyuse)
if (!s->keyuse)
return sel;
KEY *key_info= s->get_keyinfo_by_key_no(s->ref.key);
Item_equal *item_equal;
List_iterator_fast<Item_equal> it(cond_equal->current_level);
TABLE *table= s->table;
table_map table_bit= table->map;
POSITION *pos= &join->positions[idx];
while ((item_equal= it++))
{
/*
......@@ -6916,17 +6917,25 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
Field *fld= fi.get_curr_field();
if (fld->table->map != table_bit)
continue;
if (ref == 0)
if (pos->key == 0)
adjust_sel= TRUE;
else
{
uint i;
for (i= 0; i < ref->key_parts; i++)
KEYUSE *keyuse= pos->key;
uint key= keyuse->key;
for (i= 0; i < keyparts; i++)
{
if (fld->field_index == key_info->key_part[i].fieldnr - 1)
uint fldno;
if (is_hash_join_key_no(key))
fldno= keyuse->keypart;
else
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
if (fld->field_index == fldno)
break;
}
if (i == ref->key_parts)
if (i == keyparts)
{
/*
Field fld is included in multiple equality item_equal
......@@ -6936,11 +6945,14 @@ double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
equal to fld.
*/
adjust_sel= TRUE;
for (uint j= 0; j < ref->key_parts && adjust_sel; j++)
for (uint j= 0; j < keyparts && adjust_sel; j++)
{
if (ref->items[j]->real_item()->type() == Item::FIELD_ITEM)
if (j > 0)
keyuse+= ref_keyuse_steps[j-1];
Item *ref_item= keyuse->val;
if (ref_item->real_item()->type() == Item::FIELD_ITEM)
{
Item_field *field_item= (Item_field *) (ref->items[j]);
Item_field *field_item= (Item_field *) ref_item;
if (item_equal->contains(field_item->field))
adjust_sel= FALSE;
}
......@@ -6978,33 +6990,70 @@ static
double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
table_map rem_tables)
{
uint16 ref_keyuse_steps[MAX_REF_PARTS - 1];
Field *field;
TABLE *table= s->table;
MY_BITMAP *read_set= table->read_set;
double sel= s->table->cond_selectivity;
double table_records= table->stat_records();
TABLE_REF *ref= s->type == JT_REF || s->type == JT_EQ_REF ? &s->ref : NULL;
POSITION *pos= &join->positions[idx];
uint keyparts= 0;
uint found_part_ref_or_null= 0;
/* Discount the selectivity of the access method used to join table s */
if (s->quick && s->quick->index != MAX_KEY)
{
if (join->positions[idx].key == 0)
if (pos->key == 0)
{
sel*= table->quick_rows[s->quick->index]/table_records;
}
}
else if (ref)
else if (pos->key != 0)
{
/* A ref/ access or hash join is used to join table s */
KEY *key_info= s->get_keyinfo_by_key_no(ref->key);
for (uint i= 0; i < ref->key_parts; i++)
/* A ref/ access or hash join is used to join table */
KEYUSE *keyuse= pos->key;
KEYUSE *prev_ref_keyuse= keyuse;
uint key= keyuse->key;
do
{
if (ref->items[i]->const_item())
if (!(keyuse->used_tables & (rem_tables | table->map)))
{
uint fldno= key_info->key_part[i].fieldnr - 1;
sel*= table->field[fldno]->cond_selectivity;
if (are_tables_local(s, keyuse->val->used_tables()))
{
if (is_hash_join_key_no(key))
{
if (keyparts == keyuse->keypart)
keyparts++;
}
else
{
if (keyparts == keyuse->keypart &&
!(~(keyuse->val->used_tables()) & pos->ref_depend_map) &&
!(found_part_ref_or_null & keyuse->optimize))
{
keyparts++;
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
}
}
if (keyparts > keyuse->keypart)
{
uint fldno;
if (is_hash_join_key_no(key))
fldno= keyuse->keypart;
else
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
if (keyuse->val->const_item())
sel*= table->field[fldno]->cond_selectivity;
if (keyparts > 1)
{
ref_keyuse_steps[keyparts-2]= keyuse - prev_ref_keyuse;
prev_ref_keyuse= keyuse;
}
}
}
}
}
keyuse++;
} while (keyuse->table == table && keyuse->key == key);
}
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
......@@ -7024,7 +7073,8 @@ double table_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
}
}
sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables, ref);
sel*= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
keyparts, ref_keyuse_steps);
return sel;
}
......
......@@ -3303,6 +3303,18 @@ double get_column_avg_frequency(Field * field)
{
double res;
TABLE *table= field->table;
/*
Statistics is shared by table instances and is accessed through
the table share. If table->s->field is not set for 'table', then
no column statistics is available for the table .
*/
if (!table->s->field)
{
res= table->stat_records();
return res;
}
Column_statistics *col_stats= table->s->field[field->field_index]->read_stats;
if (!col_stats)
......@@ -3323,8 +3335,8 @@ double get_column_range_cardinality(Field *field,
if (!col_stats)
res= table->stat_records();
else if (min_endp->length == max_endp->length &&
!memcmp(min_endp->key, max_endp->key, min_endp->length))
else if (min_endp && max_endp && min_endp->length == max_endp->length &&
!memcmp(min_endp->key, max_endp->key, min_endp->length))
{
double avg_frequency= col_stats->get_avg_frequency();
res= avg_frequency;
......@@ -3346,13 +3358,27 @@ double get_column_range_cardinality(Field *field,
{
if (col_stats->min_value && col_stats->max_value)
{
double sel;
store_key_image_to_rec(field, (uchar *) min_endp->key, min_endp->length);
double min_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
store_key_image_to_rec(field, (uchar *) max_endp->key, max_endp->length);
double max_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
double sel, min_mp_pos, max_mp_pos;
if (min_endp)
{
store_key_image_to_rec(field, (uchar *) min_endp->key,
min_endp->length);
min_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
}
else
min_mp_pos= 0.0;
if (max_endp)
{
store_key_image_to_rec(field, (uchar *) max_endp->key,
max_endp->length);
max_mp_pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
}
else
max_mp_pos= 1.0;
Histogram *hist= &col_stats->histogram;
if (hist->get_size() == 0)
sel= (max_mp_pos - min_mp_pos);
......
......@@ -3974,6 +3974,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
file->ha_start_of_new_statement();
reginfo.impossible_range= 0;
created= TRUE;
cond_selectivity= 1.0;
/* Catch wrong handling of the auto_increment_field_not_null. */
DBUG_ASSERT(!auto_increment_field_not_null);
......@@ -3982,6 +3983,11 @@ void TABLE::init(THD *thd, TABLE_LIST *tl)
pos_in_table_list= tl;
clear_column_bitmaps();
for (Field **f_ptr= field ; *f_ptr ; f_ptr++)
{
(*f_ptr)->next_equal_field= NULL;
(*f_ptr)->cond_selectivity= 1.0;
}
DBUG_ASSERT(key_read == 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment