Commit 5f034b7a authored by Rich Prohaska's avatar Rich Prohaska

#261 auto detect index scans to fix perf problem with partitions

parent 94d73529
set default_storage_engine='tokudb';
drop table if exists t,t1,t2,t3;
CREATE TABLE `t` (
`num` int(10) unsigned auto_increment NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
);
INSERT INTO t values (null,null);
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
SELECT count(*) FROM t;
count(*)
8388608
CREATE TABLE `t1` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
);
CREATE TABLE `t2` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
)
PARTITION BY HASH (num) PARTITIONS 10;
CREATE TABLE `t3` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
)
PARTITION BY RANGE (num)
(PARTITION p0 VALUES LESS THAN (1000000),
PARTITION p1 VALUES LESS THAN (2000000),
PARTITION p2 VALUES LESS THAN (3000000),
PARTITION p3 VALUES LESS THAN (4000000),
PARTITION p4 VALUES LESS THAN (5000000),
PARTITION p5 VALUES LESS THAN (6000000),
PARTITION p6 VALUES LESS THAN (7000000),
PARTITION p7 VALUES LESS THAN (8000000),
PARTITION px VALUES LESS THAN MAXVALUE);
insert into t1 select * from t;
insert into t2 select * from t;
insert into t3 select * from t;
select count(*) from t1;
count(*)
8388608
select count(*) from t2;
count(*)
8388608
1
select count(*) from t3;
count(*)
8388608
1
select count(*) from t1 where num>7000000;
count(*)
1847274
select count(*) from t2 where num>7000000;
count(*)
1847274
1
select count(*) from t3 where num>7000000;
count(*)
1847274
1
drop table if exists t,t1,t2,t3;
# verify that index scans on parititions are not slow
# due totokudb bulk fetch not being used
source include/have_tokudb.inc;
set default_storage_engine='tokudb';
disable_warnings;
drop table if exists t,t1,t2,t3;
CREATE TABLE `t` (
`num` int(10) unsigned auto_increment NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
);
# put 8M rows into t
INSERT INTO t values (null,null);
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
INSERT INTO t SELECT null,null FROM t;
SELECT count(*) FROM t;
CREATE TABLE `t1` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
);
CREATE TABLE `t2` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
)
PARTITION BY HASH (num) PARTITIONS 10;
CREATE TABLE `t3` (
`num` int(10) unsigned NOT NULL,
`val` varchar(32) DEFAULT NULL,
PRIMARY KEY (`num`)
)
PARTITION BY RANGE (num)
(PARTITION p0 VALUES LESS THAN (1000000),
PARTITION p1 VALUES LESS THAN (2000000),
PARTITION p2 VALUES LESS THAN (3000000),
PARTITION p3 VALUES LESS THAN (4000000),
PARTITION p4 VALUES LESS THAN (5000000),
PARTITION p5 VALUES LESS THAN (6000000),
PARTITION p6 VALUES LESS THAN (7000000),
PARTITION p7 VALUES LESS THAN (8000000),
PARTITION px VALUES LESS THAN MAXVALUE);
insert into t1 select * from t;
insert into t2 select * from t;
insert into t3 select * from t;
# verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1
let $s = `select to_seconds(now())`;
select count(*) from t1;
let $t1 = `select to_seconds(now()) - $s`;
# echo $t1;
let $s = `select to_seconds(now())`;
select count(*) from t2;
let $t2 = `select to_seconds(now()) - $s`;
# echo $t2;
let $d = `select abs($t2 - $t1) <= $t1`;
echo $d;
let $s = `select to_seconds(now())`;
select count(*) from t3;
let $t3 = `select to_seconds(now()) - $s`;
# echo $t3;
let $d = `select abs($t3 - $t1) <= $t1`;
echo $d;
let $s = `select to_seconds(now())`;
select count(*) from t1 where num>7000000;
let $t1 = `select to_seconds(now()) - $s`;
# echo $t1;
let $s = `select to_seconds(now())`;
select count(*) from t2 where num>7000000;
let $t2 = `select to_seconds(now()) - $s`;
# echo $t2;
let $d = `select abs($t2 - $t1) <= $t1`;
echo $d;
let $s = `select to_seconds(now())`;
select count(*) from t3 where num>7000000;
let $t3 = `select to_seconds(now()) - $s`;
# echo $t3;
let $d = `select abs($t3 - $t1) <= $t1`;
echo $d;
enable_warnings;
drop table if exists t,t1,t2,t3;
......@@ -4504,6 +4504,7 @@ int ha_tokudb::index_init(uint keynr, bool sorted) {
}
invalidate_bulk_fetch();
doing_bulk_fetch = false;
maybe_index_scan = false;
error = 0;
exit:
TOKUDB_HANDLER_DBUG_RETURN(error);
......@@ -5246,86 +5247,91 @@ cleanup:
}
int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read) {
int error = 0;
uint32_t flags = SET_PRELOCK_FLAG(0);
THD* thd = ha_thd();
tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
bool need_val;
int error = 0;
HANDLE_INVALID_CURSOR();
// we need to read the val of what we retrieve if
// we do NOT have a covering index AND we are using a clustering secondary
// key
need_val = (do_key_read == 0) &&
(tokudb_active_index == primary_key ||
key_is_clustering(&table->key_info[tokudb_active_index])
);
if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
error = read_data_from_range_query_buff(buf, need_val, do_key_read);
}
else if (icp_went_out_of_range) {
icp_went_out_of_range = false;
error = HA_ERR_END_OF_FILE;
if (maybe_index_scan) {
maybe_index_scan = false;
if (!range_lock_grabbed) {
error = prepare_index_scan();
}
}
else {
invalidate_bulk_fetch();
if (doing_bulk_fetch) {
struct smart_dbt_bf_info bf_info;
bf_info.ha = this;
// you need the val if you have a clustering index and key_read is not 0;
bf_info.direction = direction;
bf_info.thd = ha_thd();
bf_info.need_val = need_val;
bf_info.buf = buf;
bf_info.key_to_compare = key_to_compare;
//
// call c_getf_next with purpose of filling in range_query_buff
//
rows_fetched_using_bulk_fetch = 0;
// it is expected that we can do ICP in the smart_dbt_bf_callback
// as a result, it's possible we don't return any data because
// none of the rows matched the index condition. Therefore, we need
// this while loop. icp_out_of_range will be set if we hit a row that
// the index condition states is out of our range. When that hits,
// we know all the data in the buffer is the last data we will retrieve
while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
if (direction > 0) {
error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
} else {
error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
}
}
// if there is no data set and we went out of range,
// then there is nothing to return
if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
icp_went_out_of_range = false;
error = HA_ERR_END_OF_FILE;
}
if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
bulk_fetch_iteration++;
}
if (!error) {
uint32_t flags = SET_PRELOCK_FLAG(0);
error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
if (error) { goto cleanup; }
//
// now that range_query_buff is filled, read an element
//
// we need to read the val of what we retrieve if
// we do NOT have a covering index AND we are using a clustering secondary
// key
bool need_val = (do_key_read == 0) &&
(tokudb_active_index == primary_key || key_is_clustering(&table->key_info[tokudb_active_index]));
if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
error = read_data_from_range_query_buff(buf, need_val, do_key_read);
}
else if (icp_went_out_of_range) {
icp_went_out_of_range = false;
error = HA_ERR_END_OF_FILE;
}
else {
struct smart_dbt_info info;
info.ha = this;
info.buf = buf;
info.keynr = tokudb_active_index;
invalidate_bulk_fetch();
if (doing_bulk_fetch) {
struct smart_dbt_bf_info bf_info;
bf_info.ha = this;
// you need the val if you have a clustering index and key_read is not 0;
bf_info.direction = direction;
bf_info.thd = ha_thd();
bf_info.need_val = need_val;
bf_info.buf = buf;
bf_info.key_to_compare = key_to_compare;
//
// call c_getf_next with purpose of filling in range_query_buff
//
rows_fetched_using_bulk_fetch = 0;
// it is expected that we can do ICP in the smart_dbt_bf_callback
// as a result, it's possible we don't return any data because
// none of the rows matched the index condition. Therefore, we need
// this while loop. icp_out_of_range will be set if we hit a row that
// the index condition states is out of our range. When that hits,
// we know all the data in the buffer is the last data we will retrieve
while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
if (direction > 0) {
error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
} else {
error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
}
}
// if there is no data set and we went out of range,
// then there is nothing to return
if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
icp_went_out_of_range = false;
error = HA_ERR_END_OF_FILE;
}
if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
bulk_fetch_iteration++;
}
if (direction > 0) {
error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
} else {
error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
if (error) { goto cleanup; }
//
// now that range_query_buff is filled, read an element
//
error = read_data_from_range_query_buff(buf, need_val, do_key_read);
}
else {
struct smart_dbt_info info;
info.ha = this;
info.buf = buf;
info.keynr = tokudb_active_index;
if (direction > 0) {
error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
} else {
error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
}
error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
}
error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
}
}
......@@ -5337,12 +5343,15 @@ int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_
// read the full row by doing a point query into the
// main table.
//
if (!error && !do_key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
error = read_full_row(buf);
}
trx->stmt_progress.queried++;
track_progress(thd);
if (!error) {
tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton);
trx->stmt_progress.queried++;
track_progress(ha_thd());
}
cleanup:
return error;
}
......@@ -5411,8 +5420,7 @@ int ha_tokudb::index_first(uchar * buf) {
info.buf = buf;
info.keynr = tokudb_active_index;
error = cursor->c_getf_first(cursor, flags,
SMART_DBT_CALLBACK(key_read), &info);
error = cursor->c_getf_first(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);
//
......@@ -5422,9 +5430,11 @@ int ha_tokudb::index_first(uchar * buf) {
if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
error = read_full_row(buf);
}
trx->stmt_progress.queried++;
if (trx) {
trx->stmt_progress.queried++;
}
track_progress(thd);
maybe_index_scan = true;
cleanup:
TOKUDB_HANDLER_DBUG_RETURN(error);
}
......@@ -5454,8 +5464,7 @@ int ha_tokudb::index_last(uchar * buf) {
info.buf = buf;
info.keynr = tokudb_active_index;
error = cursor->c_getf_last(cursor, flags,
SMART_DBT_CALLBACK(key_read), &info);
error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);
//
// still need to get entire contents of the row if operation done on
......@@ -5469,6 +5478,7 @@ int ha_tokudb::index_last(uchar * buf) {
trx->stmt_progress.queried++;
}
track_progress(thd);
maybe_index_scan = true;
cleanup:
TOKUDB_HANDLER_DBUG_RETURN(error);
}
......
......@@ -251,6 +251,7 @@ private:
uint64_t bulk_fetch_iteration;
uint64_t rows_fetched_using_bulk_fetch;
bool doing_bulk_fetch;
bool maybe_index_scan;
//
// buffer used to temporarily store a "packed key"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment