Commit e3705304 authored by Igor Babaev's avatar Igor Babaev

Take into account the number of null values in any used column when

calculating selectivity of conditions.
parent cb47f0a7
......@@ -6,12 +6,37 @@ select @@session.use_stat_tables;
COMPLEMENTARY
set @save_use_stat_tables=@@use_stat_tables;
set use_stat_tables='preferably';
DROP DATABASE IF EXISTS dbt3_s001;
CREATE DATABASE dbt3_s001;
use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
set optimizer_use_condition_selectivity=3;
create table t1 (a int);
insert into t1 values
(9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select * from mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 a 1 9 0.4000 4.0000 1.2000 0 NULL NULL
flush table t1;
explain extended
select * from t1 where a is null;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where isnull(`test`.`t1`.`a`)
explain extended
select * from t1 where a is not null;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` is not null)
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP DATABASE IF EXISTS dbt3_s001;
CREATE DATABASE dbt3_s001;
use dbt3_s001;
=== Q15 ===
create view revenue0 (supplier_no, total_revenue) as
select l_suppkey, sum(l_extendedprice * (1 - l_discount))
......
......@@ -9,12 +9,37 @@ select @@session.use_stat_tables;
COMPLEMENTARY
set @save_use_stat_tables=@@use_stat_tables;
set use_stat_tables='preferably';
DROP DATABASE IF EXISTS dbt3_s001;
CREATE DATABASE dbt3_s001;
use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
set optimizer_use_condition_selectivity=3;
create table t1 (a int);
insert into t1 values
(9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select * from mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 a 1 9 0.4000 4.0000 1.2000 0 NULL NULL
flush table t1;
explain extended
select * from t1 where a is null;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 40.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where isnull(`test`.`t1`.`a`)
explain extended
select * from t1 where a is not null;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 10 60.00 Using where
Warnings:
Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where (`test`.`t1`.`a` is not null)
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
DROP DATABASE IF EXISTS dbt3_s001;
CREATE DATABASE dbt3_s001;
use dbt3_s001;
=== Q15 ===
create view revenue0 (supplier_no, total_revenue) as
select l_suppkey, sum(l_extendedprice * (1 - l_discount))
......
......@@ -7,6 +7,34 @@ set @save_use_stat_tables=@@use_stat_tables;
set use_stat_tables='preferably';
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
# check that statistics on nulls is used
set optimizer_use_condition_selectivity=3;
create table t1 (a int);
insert into t1 values
(9), (3), (2), (NULL), (NULL), (2), (NULL), (1), (5), (NULL);
analyze table t1;
select * from mysql.column_stats;
flush table t1;
explain extended
select * from t1 where a is null;
explain extended
select * from t1 where a is not null;
drop table t1;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
--disable_warnings
DROP DATABASE IF EXISTS dbt3_s001;
--enable_warnings
......@@ -15,10 +43,6 @@ CREATE DATABASE dbt3_s001;
use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
--disable_query_log
--disable_result_log
--disable_warnings
......
......@@ -3342,25 +3342,41 @@ double get_column_range_cardinality(Field *field,
double res;
TABLE *table= field->table;
Column_statistics *col_stats= table->field[field->field_index]->read_stats;
double tab_records= table->stat_records();
if (!col_stats)
res= table->stat_records();
return tab_records;
double col_nulls= tab_records * col_stats->get_nulls_ratio();
double col_non_nulls= tab_records - col_nulls;
if (col_non_nulls < 1)
res= 0;
else if (min_endp && max_endp && min_endp->length == max_endp->length &&
!memcmp(min_endp->key, max_endp->key, min_endp->length))
{
double avg_frequency= col_stats->get_avg_frequency();
res= avg_frequency;
if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_value && col_stats->max_value)
if (field->null_ptr && min_endp->key[0])
{
Histogram *hist= &col_stats->histogram;
if (hist->get_size() > 0)
/* This is null single point range */
res= col_nulls;
}
else
{
double avg_frequency= col_stats->get_avg_frequency();
res= avg_frequency;
if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_value && col_stats->max_value)
{
double pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
res= table->stat_records() *
hist->point_selectivity(pos,
avg_frequency / table->stat_records());
Histogram *hist= &col_stats->histogram;
if (hist->get_size() > 0)
{
double pos= field->middle_point_pos(col_stats->min_value,
col_stats->max_value);
res= col_non_nulls *
hist->point_selectivity(pos,
avg_frequency / col_non_nulls);
}
}
}
}
......@@ -3394,10 +3410,10 @@ double get_column_range_cardinality(Field *field,
sel= (max_mp_pos - min_mp_pos);
else
sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
res= table->stat_records() * sel;
res= col_non_nulls * sel;
}
else
res= table->stat_records();
res= col_non_nulls;
}
return res;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment