Commit 9e1ca105 authored by Igor Babaev's avatar Igor Babaev

Added the type of histogram for mwl #253.

Introduced double precision height-balanced histograms.
parent e59e5296
......@@ -204,6 +204,11 @@ The following options may be given as the first argument:
-?, --help Display this help and exit.
--histogram-size=# Number of bytes used for a histogram. If set to 0, no
histograms are created by ANALYZE.
--histogram-type=name
Specifies type of the histograms created by ANALYZE.
Possible values are: SINGLE_PREC_HB - single precision
height-balanced, DOUBLE_PREC_HB - double precision
height-balanced.
--ignore-builtin-innodb
Disable initialization of builtin InnoDB plugin
--ignore-db-dirs=name
......@@ -932,6 +937,7 @@ general-log FALSE
group-concat-max-len 1024
help TRUE
histogram-size 0
histogram-type SINGLE_PREC_HB
ignore-builtin-innodb FALSE
ignore-db-dirs
init-connect
......
......@@ -11,6 +11,7 @@ CREATE DATABASE dbt3_s001;
use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
......@@ -60,6 +61,10 @@ order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
SELECT ((SELECT COUNT(*) FROM part WHERE p_name LIKE 'g%') /
(SELECT COUNT(*) FROM part)) AS sel;
sel
0.0600
set optimizer_use_condition_selectivity=3;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
......@@ -111,10 +116,10 @@ limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_size=15;
flush table part;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.part analyze status Table is already up to date
flush table part;
set optimizer_use_condition_selectivity=4;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
......@@ -164,11 +169,66 @@ order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_type='DOUBLE_PREC_HB';
set histogram_size=30;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.part analyze status Table is already up to date
flush table part;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
where s_suppkey in (select ps_suppkey from partsupp
where ps_partkey in (select p_partkey from part
where p_name like 'g%')
and ps_availqty >
(select 0.5 * sum(l_quantity)
from lineitem
where l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date('1993-01-01')
and l_shipdate < date('1993-01-01') +
interval '1' year ))
and s_nationkey = n_nationkey
and n_name = 'UNITED STATES'
order by s_name
limit 10;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY nation ALL PRIMARY NULL NULL NULL 25 4.00 Using where; Using temporary; Using filesort
1 PRIMARY supplier ref PRIMARY,i_s_nationkey i_s_nationkey 5 dbt3_s001.nation.n_nationkey 1 100.00
1 PRIMARY part ALL PRIMARY NULL NULL NULL 200 6.25 Using where
1 PRIMARY partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey i_ps_partkey 4 dbt3_s001.part.p_partkey 3 100.00 Using where; FirstMatch(supplier)
4 DEPENDENT SUBQUERY lineitem ref i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey i_l_suppkey_partkey 10 dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey 8 15.14 Using where
Warnings:
Note 1276 Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
Note 1276 Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
Note 1003 select sql_calc_found_rows `dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`supplier`.`s_address` AS `s_address` from `dbt3_s001`.`supplier` semi join (`dbt3_s001`.`part` join `dbt3_s001`.`partsupp`) join `dbt3_s001`.`nation` where ((`dbt3_s001`.`partsupp`.`ps_suppkey` = `dbt3_s001`.`supplier`.`s_suppkey`) and (`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`nation`.`n_name` = 'UNITED STATES') and (`dbt3_s001`.`supplier`.`s_nationkey` = `dbt3_s001`.`nation`.`n_nationkey`) and (`dbt3_s001`.`partsupp`.`ps_availqty` > <expr_cache><`dbt3_s001`.`partsupp`.`ps_partkey`,`dbt3_s001`.`partsupp`.`ps_suppkey`>((select (0.5 * sum(`dbt3_s001`.`lineitem`.`l_quantity`)) from `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`lineitem`.`l_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`) and (`dbt3_s001`.`lineitem`.`l_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`lineitem`.`l_shipDATE` >= <cache>(cast('1993-01-01' as date))) and (`dbt3_s001`.`lineitem`.`l_shipDATE` < <cache>((cast('1993-01-01' as date) + interval '1' year))))))) and (`dbt3_s001`.`part`.`p_name` like 'g%')) order by `dbt3_s001`.`supplier`.`s_name` limit 10
select sql_calc_found_rows
s_name, s_address
from supplier, nation
where s_suppkey in (select ps_suppkey from partsupp
where ps_partkey in (select p_partkey from part
where p_name like 'g%')
and ps_availqty >
(select 0.5 * sum(l_quantity)
from lineitem
where l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date('1993-01-01')
and l_shipdate < date('1993-01-01') +
interval '1' year ))
and s_nationkey = n_nationkey
and n_name = 'UNITED STATES'
order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_type='SINGLE_PREC_HB';
set histogram_size=24;
flush table nation;
ANALYZE TABLE nation PERSISTENT FOR COLUMNS(n_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.nation analyze status Table is already up to date
flush table nation;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
......@@ -218,6 +278,7 @@ limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
DROP DATABASE dbt3_s001;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
......@@ -14,6 +14,7 @@ CREATE DATABASE dbt3_s001;
use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
......@@ -63,6 +64,10 @@ order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
SELECT ((SELECT COUNT(*) FROM part WHERE p_name LIKE 'g%') /
(SELECT COUNT(*) FROM part)) AS sel;
sel
0.0600
set optimizer_use_condition_selectivity=3;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
......@@ -114,10 +119,10 @@ limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_size=15;
flush table part;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.part analyze status OK
flush table part;
set optimizer_use_condition_selectivity=4;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
......@@ -168,11 +173,67 @@ order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_type='DOUBLE_PREC_HB';
set histogram_size=30;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.part analyze status OK
flush table part;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
where s_suppkey in (select ps_suppkey from partsupp
where ps_partkey in (select p_partkey from part
where p_name like 'g%')
and ps_availqty >
(select 0.5 * sum(l_quantity)
from lineitem
where l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date('1993-01-01')
and l_shipdate < date('1993-01-01') +
interval '1' year ))
and s_nationkey = n_nationkey
and n_name = 'UNITED STATES'
order by s_name
limit 10;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY nation ALL PRIMARY NULL NULL NULL 25 4.00 Using where; Using temporary; Using filesort
1 PRIMARY supplier ref PRIMARY,i_s_nationkey i_s_nationkey 5 dbt3_s001.nation.n_nationkey 1 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1 100.00
2 MATERIALIZED part ALL PRIMARY NULL NULL NULL 200 6.25 Using where
2 MATERIALIZED partsupp ref PRIMARY,i_ps_partkey,i_ps_suppkey PRIMARY 4 dbt3_s001.part.p_partkey 3 100.00 Using where
4 DEPENDENT SUBQUERY lineitem ref i_l_shipdate,i_l_suppkey_partkey,i_l_partkey,i_l_suppkey i_l_suppkey_partkey 10 dbt3_s001.partsupp.ps_partkey,dbt3_s001.partsupp.ps_suppkey 8 14.37 Using where
Warnings:
Note 1276 Field or reference 'dbt3_s001.partsupp.ps_partkey' of SELECT #4 was resolved in SELECT #2
Note 1276 Field or reference 'dbt3_s001.partsupp.ps_suppkey' of SELECT #4 was resolved in SELECT #2
Note 1003 select sql_calc_found_rows `dbt3_s001`.`supplier`.`s_name` AS `s_name`,`dbt3_s001`.`supplier`.`s_address` AS `s_address` from `dbt3_s001`.`supplier` semi join (`dbt3_s001`.`part` join `dbt3_s001`.`partsupp`) join `dbt3_s001`.`nation` where ((`dbt3_s001`.`partsupp`.`ps_partkey` = `dbt3_s001`.`part`.`p_partkey`) and (`dbt3_s001`.`nation`.`n_name` = 'UNITED STATES') and (`dbt3_s001`.`supplier`.`s_nationkey` = `dbt3_s001`.`nation`.`n_nationkey`) and (`dbt3_s001`.`partsupp`.`ps_availqty` > <expr_cache><`dbt3_s001`.`partsupp`.`ps_partkey`,`dbt3_s001`.`partsupp`.`ps_suppkey`>((select (0.5 * sum(`dbt3_s001`.`lineitem`.`l_quantity`)) from `dbt3_s001`.`lineitem` where ((`dbt3_s001`.`lineitem`.`l_partkey` = `dbt3_s001`.`partsupp`.`ps_partkey`) and (`dbt3_s001`.`lineitem`.`l_suppkey` = `dbt3_s001`.`partsupp`.`ps_suppkey`) and (`dbt3_s001`.`lineitem`.`l_shipDATE` >= <cache>(cast('1993-01-01' as date))) and (`dbt3_s001`.`lineitem`.`l_shipDATE` < <cache>((cast('1993-01-01' as date) + interval '1' year))))))) and (`dbt3_s001`.`part`.`p_name` like 'g%')) order by `dbt3_s001`.`supplier`.`s_name` limit 10
select sql_calc_found_rows
s_name, s_address
from supplier, nation
where s_suppkey in (select ps_suppkey from partsupp
where ps_partkey in (select p_partkey from part
where p_name like 'g%')
and ps_availqty >
(select 0.5 * sum(l_quantity)
from lineitem
where l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= date('1993-01-01')
and l_shipdate < date('1993-01-01') +
interval '1' year ))
and s_nationkey = n_nationkey
and n_name = 'UNITED STATES'
order by s_name
limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
set histogram_type='SINGLE_PREC_HB';
set histogram_size=24;
flush table nation;
ANALYZE TABLE nation PERSISTENT FOR COLUMNS(n_name) INDEXES();
Table Op Msg_type Msg_text
dbt3_s001.nation analyze status OK
flush table nation;
EXPLAIN EXTENDED select sql_calc_found_rows
s_name, s_address
from supplier, nation
......@@ -223,6 +284,7 @@ limit 10;
s_name s_address
Supplier#000000010 Saygah3gYWMp72i PY
DROP DATABASE dbt3_s001;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
set use_stat_tables=@save_use_stat_tables;
......
This diff is collapsed.
......@@ -287,6 +287,7 @@ column_stats CREATE TABLE `column_stats` (
`avg_length` decimal(12,4) DEFAULT NULL,
`avg_frequency` decimal(12,4) DEFAULT NULL,
`hist_size` tinyint(3) unsigned DEFAULT NULL,
`hist_type` enum('SINGLE_PREC_HB','DOUBLE_PREC_HB') COLLATE utf8_bin DEFAULT NULL,
`histogram` varbinary(255) DEFAULT NULL,
PRIMARY KEY (`db_name`,`table_name`,`column_name`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_bin COMMENT='Statistics on Columns'
......
......@@ -13,8 +13,9 @@ def mysql column_stats avg_frequency 8 NULL YES decimal NULL NULL 12 4 NULL NULL
def mysql column_stats avg_length 7 NULL YES decimal NULL NULL 12 4 NULL NULL NULL decimal(12,4) select,insert,update,references
def mysql column_stats column_name 3 NULL NO varchar 64 192 NULL NULL NULL utf8 utf8_bin varchar(64) PRI select,insert,update,references
def mysql column_stats db_name 1 NULL NO varchar 64 192 NULL NULL NULL utf8 utf8_bin varchar(64) PRI select,insert,update,references
def mysql column_stats histogram 10 NULL YES varbinary 255 255 NULL NULL NULL NULL NULL varbinary(255) select,insert,update,references
def mysql column_stats histogram 11 NULL YES varbinary 255 255 NULL NULL NULL NULL NULL varbinary(255) select,insert,update,references
def mysql column_stats hist_size 9 NULL YES tinyint NULL NULL 3 0 NULL NULL NULL tinyint(3) unsigned select,insert,update,references
def mysql column_stats hist_type 10 NULL YES enum 14 42 NULL NULL NULL utf8 utf8_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB') select,insert,update,references
def mysql column_stats max_value 5 NULL YES varchar 255 765 NULL NULL NULL utf8 utf8_bin varchar(255) select,insert,update,references
def mysql column_stats min_value 4 NULL YES varchar 255 765 NULL NULL NULL utf8 utf8_bin varchar(255) select,insert,update,references
def mysql column_stats nulls_ratio 6 NULL YES decimal NULL NULL 12 4 NULL NULL NULL decimal(12,4) select,insert,update,references
......@@ -332,6 +333,7 @@ NULL mysql column_stats nulls_ratio decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats avg_length decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats avg_frequency decimal NULL NULL NULL NULL decimal(12,4)
NULL mysql column_stats hist_size tinyint NULL NULL NULL NULL tinyint(3) unsigned
3.0000 mysql column_stats hist_type enum 14 42 utf8 utf8_bin enum('SINGLE_PREC_HB','DOUBLE_PREC_HB')
1.0000 mysql column_stats histogram varbinary 255 255 NULL NULL varbinary(255)
3.0000 mysql db Host char 60 180 utf8 utf8_bin char(60)
3.0000 mysql db Db char 64 192 utf8 utf8_bin char(64)
......
SET @start_global_value = @@global.histogram_type;
SELECT @start_global_value;
@start_global_value
SINGLE_PREC_HB
SET @start_session_value = @@session.histogram_type;
SELECT @start_session_value;
@start_session_value
SINGLE_PREC_HB
SET @@global.histogram_type = 1;
SET @@global.histogram_type = DEFAULT;
SELECT @@global.histogram_type;
@@global.histogram_type
SINGLE_PREC_HB
SET @@global.histogram_type = 0;
SELECT @@global.histogram_type;
@@global.histogram_type
SINGLE_PREC_HB
SET @@global.histogram_type = 1;
SELECT @@global.histogram_type;
@@global.histogram_type
DOUBLE_PREC_HB
SET @@global.histogram_type = SINGLE_PREC_HB;
SELECT @@global.histogram_type;
@@global.histogram_type
SINGLE_PREC_HB
SET @@global.histogram_type = DOUBLE_PREC_HB;
SELECT @@global.histogram_type;
@@global.histogram_type
DOUBLE_PREC_HB
SET @@session.histogram_type = 0;
SELECT @@session.histogram_type;
@@session.histogram_type
SINGLE_PREC_HB
SET @@session.histogram_type = 1;
SELECT @@session.histogram_type;
@@session.histogram_type
DOUBLE_PREC_HB
SET @@session.histogram_type = SINGLE_PREC_HB;
SELECT @@session.histogram_type;
@@session.histogram_type
SINGLE_PREC_HB
SET @@session.histogram_type = DOUBLE_PREC_HB;
SELECT @@session.histogram_type;
@@session.histogram_type
DOUBLE_PREC_HB
set sql_mode=TRADITIONAL;
SET @@global.histogram_type = 10;
ERROR 42000: Variable 'histogram_type' can't be set to the value of '10'
SET @@global.histogram_type = -1024;
ERROR 42000: Variable 'histogram_type' can't be set to the value of '-1024'
SET @@global.histogram_type = 2.4;
ERROR 42000: Incorrect argument type to variable 'histogram_type'
SET @@global.histogram_type = OFF;
ERROR 42000: Variable 'histogram_type' can't be set to the value of 'OFF'
SET @@session.histogram_type = 10;
ERROR 42000: Variable 'histogram_type' can't be set to the value of '10'
SET @@session.histogram_type = -2;
ERROR 42000: Variable 'histogram_type' can't be set to the value of '-2'
SET @@session.histogram_type = 1.2;
ERROR 42000: Incorrect argument type to variable 'histogram_type'
SET @@session.histogram_type = ON;
ERROR 42000: Variable 'histogram_type' can't be set to the value of 'ON'
SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='histogram_type';
VARIABLE_NAME VARIABLE_VALUE
HISTOGRAM_TYPE DOUBLE_PREC_HB
SELECT * FROM INFORMATION_SCHEMA.SESSION_VARIABLES
WHERE VARIABLE_NAME='histogram_type';
VARIABLE_NAME VARIABLE_VALUE
HISTOGRAM_TYPE DOUBLE_PREC_HB
SET @@global.histogram_type = @start_global_value;
SELECT @@global.histogram_type;
@@global.histogram_type
SINGLE_PREC_HB
SET @@session.histogram_type = @start_session_value;
SELECT @@session.histogram_type;
@@session.histogram_type
SINGLE_PREC_HB
set sql_mode='';
--source include/load_sysvars.inc
#############################################################
# Save initial value #
#############################################################
SET @start_global_value = @@global.histogram_type;
SELECT @start_global_value;
SET @start_session_value = @@session.histogram_type;
SELECT @start_session_value;
##############################################################
# Display the DEFAULT value of histogram_type #
##############################################################
SET @@global.histogram_type = 1;
SET @@global.histogram_type = DEFAULT;
SELECT @@global.histogram_type;
#################################################################################
# Change the value of histogram_type to a valid value for GLOBAL Scope #
#################################################################################
SET @@global.histogram_type = 0;
SELECT @@global.histogram_type;
SET @@global.histogram_type = 1;
SELECT @@global.histogram_type;
SET @@global.histogram_type = SINGLE_PREC_HB;
SELECT @@global.histogram_type;
SET @@global.histogram_type = DOUBLE_PREC_HB;
SELECT @@global.histogram_type;
###################################################################################
# Change the value of histogram_type to a valid value for SESSION Scope #
###################################################################################
SET @@session.histogram_type = 0;
SELECT @@session.histogram_type;
SET @@session.histogram_type = 1;
SELECT @@session.histogram_type;
SET @@session.histogram_type = SINGLE_PREC_HB;
SELECT @@session.histogram_type;
SET @@session.histogram_type = DOUBLE_PREC_HB;
SELECT @@session.histogram_type;
####################################################################
# Change the value of histogram_type to an invalid value #
####################################################################
set sql_mode=TRADITIONAL;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@global.histogram_type = 10;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@global.histogram_type = -1024;
--Error ER_WRONG_TYPE_FOR_VAR
SET @@global.histogram_type = 2.4;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@global.histogram_type = OFF;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@session.histogram_type = 10;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@session.histogram_type = -2;
--Error ER_WRONG_TYPE_FOR_VAR
SET @@session.histogram_type = 1.2;
--Error ER_WRONG_VALUE_FOR_VAR
SET @@session.histogram_type = ON;
###############################################################################
# Check if the value in GLOBAL & SESSION Tables matches value in variable #
###############################################################################
SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='histogram_type';
SELECT * FROM INFORMATION_SCHEMA.SESSION_VARIABLES
WHERE VARIABLE_NAME='histogram_type';
####################################
# Restore initial value #
####################################
SET @@global.histogram_type = @start_global_value;
SELECT @@global.histogram_type;
SET @@session.histogram_type = @start_session_value;
SELECT @@session.histogram_type;
set sql_mode='';
#####################################################
# END OF histogram_type TESTS #
#####################################################
\ No newline at end of file
......@@ -17,6 +17,7 @@ use dbt3_s001;
set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
set @save_histogram_size=@@histogram_size;
set @save_histogram_type=@@histogram_type;
--disable_query_log
--disable_result_log
......@@ -54,6 +55,9 @@ limit 10;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
SELECT ((SELECT COUNT(*) FROM part WHERE p_name LIKE 'g%') /
(SELECT COUNT(*) FROM part)) AS sel;
set optimizer_use_condition_selectivity=3;
eval EXPLAIN EXTENDED $Q20;
......@@ -61,26 +65,38 @@ eval $Q20;
set histogram_size=15;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
flush table part;
set optimizer_use_condition_selectivity=4;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
set histogram_type='DOUBLE_PREC_HB';
set histogram_size=30;
ANALYZE TABLE part PERSISTENT FOR COLUMNS(p_name) INDEXES();
set optimizer_use_condition_selectivity=4;
flush table part;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
set histogram_type='SINGLE_PREC_HB';
set histogram_size=24;
flush table nation;
ANALYZE TABLE nation PERSISTENT FOR COLUMNS(n_name) INDEXES();
flush table nation;
eval EXPLAIN EXTENDED $Q20;
eval $Q20;
DROP DATABASE dbt3_s001;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
set optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
......
......@@ -178,7 +178,21 @@ SELECT db_name, table_name, column_name,
FROM mysql.column_stats;
DELETE FROM mysql.column_stats;
set histogram_size=8;
set histogram_type='DOUBLE_PREC_HB';
ANALYZE TABLE t1;
SELECT db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, HEX(histogram)
FROM mysql.column_stats;
DELETE FROM mysql.column_stats;
set histogram_size= 0;
set histogram_type=default;
ANALYZE TABLE t1;
......
......@@ -108,6 +108,6 @@ set @had_proxies_priv_table= @@warning_count != 0;
CREATE TABLE IF NOT EXISTS table_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, cardinality bigint(21) unsigned DEFAULT NULL, PRIMARY KEY (db_name,table_name) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Tables';
CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varchar(255) DEFAULT NULL, max_value varchar(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, histogram varbinary(255), PRIMARY KEY (db_name,table_name,column_name) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
CREATE TABLE IF NOT EXISTS column_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, column_name varchar(64) NOT NULL, min_value varchar(255) DEFAULT NULL, max_value varchar(255) DEFAULT NULL, nulls_ratio decimal(12,4) DEFAULT NULL, avg_length decimal(12,4) DEFAULT NULL, avg_frequency decimal(12,4) DEFAULT NULL, hist_size tinyint unsigned, hist_type enum('SINGLE_PREC_HB','DOUBLE_PREC_HB'), histogram varbinary(255), PRIMARY KEY (db_name,table_name,column_name) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Columns';
CREATE TABLE IF NOT EXISTS index_stats (db_name varchar(64) NOT NULL, table_name varchar(64) NOT NULL, index_name varchar(64) NOT NULL, prefix_arity int(11) unsigned NOT NULL, avg_frequency decimal(12,4) DEFAULT NULL, PRIMARY KEY (db_name,table_name,index_name,prefix_arity) ) ENGINE=MyISAM CHARACTER SET utf8 COLLATE utf8_bin comment='Statistics on Indexes';
......@@ -502,6 +502,7 @@ typedef struct system_variables
ulong optimizer_use_condition_selectivity;
ulong use_stat_tables;
ulong histogram_size;
ulong histogram_type;
ulong preload_buff_size;
ulong profiling_history_size;
ulong read_buff_size;
......
......@@ -928,6 +928,9 @@ public:
case COLUMN_STAT_HIST_SIZE:
stat_field->store(table_field->collected_stats->histogram.get_size());
break;
case COLUMN_STAT_HIST_TYPE:
stat_field->store(table_field->collected_stats->histogram.get_type());
break;
case COLUMN_STAT_HISTOGRAM:
const char * col_histogram=
(const char *) (table_field->collected_stats->histogram.get_values());
......@@ -971,7 +974,7 @@ public:
char buff[MAX_FIELD_WIDTH];
String val(buff, sizeof(buff), &my_charset_utf8_bin);
for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_SIZE; i++)
for (uint i= COLUMN_STAT_MIN_VALUE; i <= COLUMN_STAT_HIST_TYPE; i++)
{
Field *stat_field= stat_table->field[i];
......@@ -1007,6 +1010,10 @@ public:
case COLUMN_STAT_HIST_SIZE:
table_field->read_stats->histogram.set_size(stat_field->val_int());
break;
case COLUMN_STAT_HIST_TYPE:
Histogram_type hist_type= (Histogram_type) (stat_field->val_int());
table_field->read_stats->histogram.set_type(hist_type);
break;
}
}
}
......@@ -1238,7 +1245,7 @@ class Histogram_builder
Field *min_value;
Field *max_value;
Histogram *histogram;
uint hist_size;
uint hist_width;
double bucket_capacity;
uint curr_bucket;
ulonglong count;
......@@ -1252,8 +1259,8 @@ public:
min_value= col_stats->min_value;
max_value= col_stats->max_value;
histogram= &col_stats->histogram;
hist_size= histogram->get_size();
bucket_capacity= (double) records / (hist_size + 1);
hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0;
count= 0;
count_distinct= 0;
......@@ -1265,7 +1272,7 @@ public:
{
count_distinct++;
count+= elem_cnt;
if (curr_bucket == hist_size)
if (curr_bucket == hist_width)
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
......@@ -1273,7 +1280,7 @@ public:
histogram->set_value(curr_bucket,
column->middle_point_pos(min_value, max_value));
curr_bucket++;
while (curr_bucket != hist_size &&
while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1))
{
histogram->set_prev_value(curr_bucket);
......@@ -1794,6 +1801,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
columns++;
}
uint hist_size= thd->variables.histogram_size;
Histogram_type hist_type= (Histogram_type) (thd->variables.histogram_type);
uchar *histogram= NULL;
if (hist_size > 0)
histogram= (uchar *) alloc_root(&table->mem_root, hist_size * columns);
......@@ -1818,6 +1826,7 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
{
column_stats->histogram.set_size(hist_size);
column_stats->histogram.set_type(hist_type);
column_stats->histogram.set_values(histogram);
histogram+= hist_size;
}
......@@ -2200,6 +2209,7 @@ void Column_statistics_collected::finish(ha_rows rows)
set_not_null(COLUMN_STAT_HIST_SIZE);
if (hist_size && distincts)
{
set_not_null(COLUMN_STAT_HIST_TYPE);
histogram.set_values(count_distinct->get_histogram());
set_not_null(COLUMN_STAT_HISTOGRAM);
}
......
......@@ -24,6 +24,13 @@ enum enum_use_stat_tables_mode
PEFERABLY,
} Use_stat_tables_mode;
typedef
enum enum_histogram_type
{
SINGLE_PREC_HB,
DOUBLE_PREC_HB
} Histogram_type;
enum enum_stat_tables
{
TABLE_STAT,
......@@ -59,6 +66,7 @@ enum enum_column_stat_col
COLUMN_STAT_AVG_LENGTH,
COLUMN_STAT_AVG_FREQUENCY,
COLUMN_STAT_HIST_SIZE,
COLUMN_STAT_HIST_TYPE,
COLUMN_STAT_HISTOGRAM
};
......@@ -99,46 +107,74 @@ double get_column_range_cardinality(Field *field,
key_range *min_endp,
key_range *max_endp);
#define HIST_FACTOR 255
#define INV_HIST_FACTOR ((double) 1.0 / HIST_FACTOR)
class Histogram
{
private:
Histogram_type type;
uint8 size;
uchar *values;
uint prec_factor()
{
switch (type) {
case SINGLE_PREC_HB:
return ((uint) (1 << 8) - 1);
case DOUBLE_PREC_HB:
return ((uint) (1 << 16) - 1);
}
}
public:
uint get_width()
{
switch (type) {
case SINGLE_PREC_HB:
return size;
case DOUBLE_PREC_HB:
return size / 2;
}
}
private:
uint8 size;
uint8 *values;
uint get_value(uint i)
{
switch (type) {
case SINGLE_PREC_HB:
return (uint) (((uint8 *) values)[i]);
case DOUBLE_PREC_HB:
return (uint) (((uint16 *) values)[i]);
}
}
uint find_bucket(double pos, bool first)
{
uint8 val= (uint8) (pos * HIST_FACTOR);
uint val= (uint) (pos * prec_factor());
int lp= 0;
int rp= size - 1;
int i= 0;
for (int d= size / 2 ; d; d= (rp - lp) / 2)
int rp= get_width() - 1;
uint i= 0;
for (int d= get_width() / 2 ; d; d= (rp - lp) / 2)
{
i= lp + d;
if (val == values[i])
if (val == get_value(i))
break;
if (val < values[i])
if (val < get_value(i))
rp= i;
else if (val > values[i + 1])
else if (val > get_value(i + 1))
lp= i + 1;
else
break;
}
if (val == values[i])
if (val == get_value(i))
{
if (first)
{
while(i && val == values[i - 1])
while(i && val == get_value(i - 1))
i--;
}
else
{
while(i + 1 < size && val == values[i + 1])
while(i + 1 < get_width() && val == get_value(i + 1))
i++;
}
}
......@@ -149,24 +185,44 @@ public:
uint get_size() { return (uint) size; }
Histogram_type get_type() { return type; }
uchar *get_values() { return (uchar *) values; }
void set_size (ulonglong sz) { size= (uint8) sz; }
void set_values (uchar *vals) { values= (uint8 *) vals; }
void set_type (Histogram_type t) { type= t; }
void set_values (uchar *vals) { values= (uchar *) vals; }
void set_value(uint i, double val)
{
values[i]= (uint8) (val * HIST_FACTOR);
switch (type) {
case SINGLE_PREC_HB:
((uint8 *) values)[i]= (uint8) (val * prec_factor());
return;
case DOUBLE_PREC_HB:
((uint16 *) values)[i]= (uint16) (val * prec_factor());
return;
}
}
void set_prev_value(uint i) { values[i]= values[i-1]; }
void set_prev_value(uint i)
{
switch (type) {
case SINGLE_PREC_HB:
((uint8 *) values)[i]= ((uint8 *) values)[i-1];
return;
case DOUBLE_PREC_HB:
((uint16 *) values)[i]= ((uint16 *) values)[i-1];
return;
}
}
double range_selectivity(double min_pos, double max_pos)
{
double sel;
double bucket_sel= 1.0/(size + 1);
double bucket_sel= 1.0/(get_width() + 1);
uint min= find_bucket(min_pos, TRUE);
uint max= find_bucket(max_pos, FALSE);
sel= bucket_sel * (max - min + 1);
......@@ -176,14 +232,14 @@ public:
double point_selectivity(double pos, double avg_sel)
{
double sel;
double bucket_sel= 1.0/(size + 1);
double bucket_sel= 1.0/(get_width() + 1);
uint min= find_bucket(pos, TRUE);
uint max= min;
while (max + 1 < size && values[max + 1] == values[max])
while (max + 1 < get_width() && get_value(max + 1) == get_value(max))
max++;
double width= ((max + 1 == size ? 1.0 : values[max]) -
(min == 0 ? 0.0 : values[min-1])) *
INV_HIST_FACTOR;
double width= ((max + 1 == get_width() ? 1.0 : get_value(max)) -
(min == 0 ? 0.0 : get_value(min-1))) *
((double) 1.0 / prec_factor());
sel= avg_sel * (bucket_sel * (max + 1 - min)) / width;
return sel;
}
......
......@@ -3777,6 +3777,17 @@ static Sys_var_ulong Sys_histogram_size(
SESSION_VAR(histogram_size), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(0, 255), DEFAULT(0), BLOCK_SIZE(1));
const char *histogram_types[] =
{"SINGLE_PREC_HB", "DOUBLE_PREC_HB", 0};
static Sys_var_enum Sys_histogram_type(
"histogram_type",
"Specifies type of the histograms created by ANALYZE. "
"Possible values are: "
"SINGLE_PREC_HB - single precision height-balanced, "
"DOUBLE_PREC_HB - double precision height-balanced.",
SESSION_VAR(histogram_type), CMD_LINE(REQUIRED_ARG),
histogram_types, DEFAULT(0));
static Sys_var_mybool Sys_no_thread_alarm(
"debug_no_thread_alarm",
"Disable system thread alarm calls. Disabling it may be useful "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment