Commit 28ad1285 authored by Sergei Petrunia's avatar Sergei Petrunia

Fix off-by-one error in Histogram_json_hb::find_bucket

parent b1796402
...@@ -4093,12 +4093,12 @@ test.t2 analyze status Engine-independent statistics collected ...@@ -4093,12 +4093,12 @@ test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK test.t2 analyze status OK
explain extended select * from t2 where city = 'Moscow'; explain extended select * from t2 where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 101 98.02 Using where
Warnings: Warnings:
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow' Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` = 'Moscow'
analyze select * from t2 where city = 'Moscow'; analyze select * from t2 where city = 'Moscow';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 50.00 98.02 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 98.02 98.02 Using where
explain extended select * from t2 where city = 'Helsinki'; explain extended select * from t2 where city = 'Helsinki';
id select_type table type possible_keys key key_len ref rows filtered Extra id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where 1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
......
...@@ -182,4 +182,3 @@ SET histogram_type= JSON_HB; ...@@ -182,4 +182,3 @@ SET histogram_type= JSON_HB;
ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM t1; SELECT * FROM t1;
drop table t1; drop table t1;
...@@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint, ...@@ -483,12 +483,12 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
// If the value is outside of the histogram's range, this will "clip" it to // If the value is outside of the histogram's range, this will "clip" it to
// first or last bucket. // first or last bucket.
int idx= find_bucket(field, key, false); bool equal;
int idx= find_bucket(field, key, &equal);
double sel; double sel;
if (buckets[idx].ndv == 1 && if (buckets[idx].ndv == 1 && !equal)
field->key_cmp((uchar*)buckets[idx].start_value.data(), key))
{ {
// The bucket has a single value and it doesn't match! Use the global // The bucket has a single value and it doesn't match! Use the global
// average. // average.
...@@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, ...@@ -550,7 +550,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
// Find the leftmost bucket that contains the lookup value. // Find the leftmost bucket that contains the lookup value.
// (If the lookup value is to the left of all buckets, find bucket #0) // (If the lookup value is to the left of all buckets, find bucket #0)
int idx= find_bucket(field, min_key, exclusive_endp); bool equal;
int idx= find_bucket(field, min_key, &equal);
if (equal && exclusive_endp && buckets[idx].ndv==1 &&
idx < (int)buckets.size()-1)
{
/*
The range is "col > $CONST" and we've found a bucket that contains
only the value $CONST. Move to the next bucket.
TODO: what if the last value in the histogram is a popular one?
*/
idx++;
}
double left_fract= get_left_fract(idx); double left_fract= get_left_fract(idx);
double sel= position_in_interval(field, min_key, min_key_len, double sel= position_in_interval(field, min_key, min_key_len,
buckets[idx].start_value, buckets[idx].start_value,
...@@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, ...@@ -573,8 +584,18 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
max_key++; max_key++;
max_key_len--; max_key_len--;
} }
bool equal;
int idx= find_bucket(field, max_key, &equal);
int idx= find_bucket(field, max_key, inclusive_endp); if (equal && !inclusive_endp && idx > 0)
{
/*
The range is "col < $CONST" and we've found a bucket starting with
$CONST. Move to the previous bucket.
TODO: what if the first value is the popular one?
*/
idx--;
}
double left_fract= get_left_fract(idx); double left_fract= get_left_fract(idx);
double sel= position_in_interval(field, max_key, max_key_len, double sel= position_in_interval(field, max_key, max_key_len,
buckets[idx].start_value, buckets[idx].start_value,
...@@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field) ...@@ -616,22 +637,59 @@ void Histogram_json_hb::serialize(Field *field)
*/ */
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val, int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
bool equal_is_less) bool *equal)
{ {
int res;
int low= 0; int low= 0;
int high= (int)buckets.size() - 1; int high= (int)buckets.size() - 1;
*equal= false;
while (low + 1 < high) while (low + 1 < high)
{ {
int middle= (low + high) / 2; int middle= (low + high) / 2;
int res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val); res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
if (!res) if (!res)
res= equal_is_less? -1: 1; {
if (res < 0) *equal= true;
return middle;
}
else if (res < 0)
low= middle; low= middle;
else //res > 0 else //res > 0
high= middle; high= middle;
} }
/*
If low and high were assigned a value in the above loop, then they are not
equal to the lookup value:
bucket[low] < lookup_val < bucket[high]
But there are two special cases: low=0 and high=last_bucket. Handle them
below.
*/
if (low == 0)
{
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
if (!res)
*equal= true;
else if (res < 0)
{
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
if (res >= 0)
low= high;
}
}
else if (high == (int)buckets.size() - 1)
{
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
if (res >= 0)
low= high;
}
return low; return low;
} }
...@@ -123,6 +123,6 @@ class Histogram_json_hb : public Histogram_base ...@@ -123,6 +123,6 @@ class Histogram_json_hb : public Histogram_base
private: private:
double get_left_fract(int idx); double get_left_fract(int idx);
std::string& get_end_value(int idx); std::string& get_end_value(int idx);
int find_bucket(Field *field, const uchar *lookup_val, bool equal_is_less); int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
}; };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment