Commit f86c438f authored by Mattias Jonsson's avatar Mattias Jonsson

Bug#48846: Too much time spent in ha_partition::records_in_range if not able to prune

Problem was that ha_partition::records_in_range called
records_in_range for all non pruned partitions, even if
an estimate should be given.

Solution is to only use 1/3 of the partitions (up to 10) for
records_in_range and estimate the total from this subset.
(And continue until a non zero return value from the called
partitions records_in_range is given, since 0 means no rows
will match.)
parent 6947ee37
......@@ -5747,6 +5747,23 @@ const key_map *ha_partition::keys_to_use_for_scanning()
DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
}
#define MAX_PARTS_FOR_OPTIMIZER_CALLS 10
/*
Prepare start variables for estimating optimizer costs.
@param[out] num_used_parts Number of partitions after pruning.
@param[out] check_min_num Number of partitions to call.
@param[out] first first used partition.
*/
void ha_partition::partitions_optimizer_call_preparations(uint *first,
uint *num_used_parts,
uint *check_min_num)
{
*first= bitmap_get_first_set(&(m_part_info->used_partitions));
*num_used_parts= bitmap_bits_set(&(m_part_info->used_partitions));
*check_min_num= min(MAX_PARTS_FOR_OPTIMIZER_CALLS, *num_used_parts);
}
/*
Return time for a scan of the table
......@@ -5760,43 +5777,67 @@ const key_map *ha_partition::keys_to_use_for_scanning()
double ha_partition::scan_time()
{
double scan_time= 0;
handler **file;
double scan_time= 0.0;
uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
DBUG_ENTER("ha_partition::scan_time");
for (file= m_file; *file; file++)
if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
scan_time+= (*file)->scan_time();
partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
for (part_id= first; partitions_called < num_used_parts ; part_id++)
{
if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
continue;
scan_time+= m_file[part_id]->scan_time();
partitions_called++;
if (partitions_called >= check_min_num && scan_time != 0.0)
{
DBUG_RETURN(scan_time *
(double) num_used_parts / (double) partitions_called);
}
}
DBUG_RETURN(scan_time);
}
/*
Get time to read
Estimate rows for records_in_range or estimate_rows_upper_bound.
SYNOPSIS
read_time()
index Index number used
ranges Number of ranges
rows Number of rows
RETURN VALUE
time for read
@param is_records_in_range call records_in_range instead of
estimate_rows_upper_bound.
@param inx (only for records_in_range) index to use.
@param min_key (only for records_in_range) start of range.
@param max_key (only for records_in_range) end of range.
DESCRIPTION
This will be optimised later to include whether or not the index can
be used with partitioning. To achieve we need to add another parameter
that specifies how many of the index fields that are bound in the ranges.
Possibly added as a new call to handlers.
@return Number of rows or HA_POS_ERROR.
*/
double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
ha_rows ha_partition::estimate_rows(bool is_records_in_range, uint inx,
key_range *min_key, key_range *max_key)
{
DBUG_ENTER("ha_partition::read_time");
ha_rows rows, estimated_rows= 0;
uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
DBUG_ENTER("ha_partition::records_in_range");
DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
for (part_id= first; partitions_called < num_used_parts ; part_id++)
{
if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
continue;
if (is_records_in_range)
rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
else
rows= m_file[part_id]->estimate_rows_upper_bound();
if (rows == HA_POS_ERROR)
DBUG_RETURN(HA_POS_ERROR);
estimated_rows+= rows;
partitions_called++;
if (partitions_called >= check_min_num && estimated_rows)
{
DBUG_RETURN(estimated_rows * num_used_parts / partitions_called);
}
}
DBUG_RETURN(estimated_rows);
}
/*
Find number of records in a range
......@@ -5824,22 +5865,9 @@ double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
key_range *max_key)
{
handler **file;
ha_rows in_range= 0;
DBUG_ENTER("ha_partition::records_in_range");
file= m_file;
do
{
if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
{
ha_rows tmp_in_range= (*file)->records_in_range(inx, min_key, max_key);
if (tmp_in_range == HA_POS_ERROR)
DBUG_RETURN(tmp_in_range);
in_range+= tmp_in_range;
}
} while (*(++file));
DBUG_RETURN(in_range);
DBUG_RETURN(estimate_rows(TRUE, inx, min_key, max_key));
}
......@@ -5855,22 +5883,36 @@ ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
ha_rows ha_partition::estimate_rows_upper_bound()
{
ha_rows rows, tot_rows= 0;
handler **file;
DBUG_ENTER("ha_partition::estimate_rows_upper_bound");
file= m_file;
do
{
if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
{
rows= (*file)->estimate_rows_upper_bound();
if (rows == HA_POS_ERROR)
DBUG_RETURN(HA_POS_ERROR);
tot_rows+= rows;
}
} while (*(++file));
DBUG_RETURN(tot_rows);
DBUG_RETURN(estimate_rows(FALSE, 0, NULL, NULL));
}
/*
Get time to read
SYNOPSIS
read_time()
index Index number used
ranges Number of ranges
rows Number of rows
RETURN VALUE
time for read
DESCRIPTION
This will be optimised later to include whether or not the index can
be used with partitioning. To achieve we need to add another parameter
that specifies how many of the index fields that are bound in the ranges.
Possibly added as a new call to handlers.
*/
double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
{
DBUG_ENTER("ha_partition::read_time");
DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
}
......
......@@ -547,6 +547,18 @@ public:
-------------------------------------------------------------------------
*/
private:
/*
Helper function to get the minimum number of partitions to use for
the optimizer hints/cost calls.
*/
void partitions_optimizer_call_preparations(uint *num_used_parts,
uint *check_min_num,
uint *first);
ha_rows estimate_rows(bool is_records_in_range, uint inx,
key_range *min_key, key_range *max_key);
public:
/*
keys_to_use_for_scanning can probably be implemented as the
intersection of all underlying handlers if mixed handlers are used.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment