Commit 69f24c23 authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Use generic Histogram_base class for Histogram_builders

This fixes the wrong calculation for avg_frequency in json histograms
by replacing the specific histogram objects with the generic Histogram_base class.

It also restores get/set size functions as they were useful in calculating fields
for binary histogram.
Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent 21e0f548
...@@ -19,7 +19,7 @@ test.t1 analyze status Engine-independent statistics collected ...@@ -19,7 +19,7 @@ test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK test.t1 analyze status OK
SELECT * FROM mysql.column_stats WHERE table_name='t1'; SELECT * FROM mysql.column_stats WHERE table_name='t1';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 a 1 25 0.0000 4.0000 0.0000 10 JSON [ test t1 a 1 25 0.0000 4.0000 1.0000 10 JSON [
"3", "3",
"5", "5",
"7", "7",
...@@ -31,7 +31,7 @@ test t1 a 1 25 0.0000 4.0000 0.0000 10 JSON [ ...@@ -31,7 +31,7 @@ test t1 a 1 25 0.0000 4.0000 0.0000 10 JSON [
"21", "21",
"23" "23"
] ]
test t1 b 1 9 0.0000 1.6400 0.0000 10 JSON [ test t1 b 1 9 0.0000 1.6400 1.0000 10 JSON [
"11", "11",
"13", "13",
"15", "15",
...@@ -43,7 +43,7 @@ test t1 b 1 9 0.0000 1.6400 0.0000 10 JSON [ ...@@ -43,7 +43,7 @@ test t1 b 1 9 0.0000 1.6400 0.0000 10 JSON [
"5", "5",
"7" "7"
] ]
test t1 c 1 9 0.0000 2.0000 0.0000 10 JSON [ test t1 c 1 9 0.0000 2.0000 1.0000 10 JSON [
"11", "11",
"13", "13",
"15", "15",
...@@ -55,7 +55,7 @@ test t1 c 1 9 0.0000 2.0000 0.0000 10 JSON [ ...@@ -55,7 +55,7 @@ test t1 c 1 9 0.0000 2.0000 0.0000 10 JSON [
"5", "5",
"7" "7"
] ]
test t1 d 1 25 0.0000 8.0000 0.0000 10 JSON [ test t1 d 1 25 0.0000 8.0000 1.0000 10 JSON [
"3", "3",
"5", "5",
"7", "7",
...@@ -101,7 +101,7 @@ ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'. ...@@ -101,7 +101,7 @@ ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'.
UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1'; UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1';
FLUSH TABLES; FLUSH TABLES;
SELECT * FROM t1; SELECT * FROM t1;
ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32608'. ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32641'.
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
DROP TABLE t1; DROP TABLE t1;
create schema world; create schema world;
......
...@@ -1082,7 +1082,7 @@ class Column_stat: public Stat_table ...@@ -1082,7 +1082,7 @@ class Column_stat: public Stat_table
// Note: this is dumb. the histogram size is stored with the // Note: this is dumb. the histogram size is stored with the
// histogram! // histogram!
stat_field->store(stats->histogram_? stat_field->store(stats->histogram_?
stats->histogram_->get_width() : 0); stats->histogram_->get_size() : 0);
break; break;
case COLUMN_STAT_HIST_TYPE: case COLUMN_STAT_HIST_TYPE:
if (stats->histogram_) if (stats->histogram_)
...@@ -1269,7 +1269,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar ...@@ -1269,7 +1269,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar
*/ */
void Histogram_binary::serialize(Field *field) void Histogram_binary::serialize(Field *field)
{ {
field->store((char*)get_values(), get_width(), &my_charset_bin); field->store((char*)get_values(), get_size(), &my_charset_bin);
} }
void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
...@@ -1292,6 +1292,7 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp ...@@ -1292,6 +1292,7 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg) bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg)
{ {
DBUG_ENTER("Histogram_json::parse"); DBUG_ENTER("Histogram_json::parse");
size = (uint8) size_arg;
type = type_arg; type = type_arg;
const char *json = (char *)ptr; const char *json = (char *)ptr;
int vt; int vt;
...@@ -1545,7 +1546,7 @@ double Histogram_json::range_selectivity_new(Field *field, key_range *min_endp, ...@@ -1545,7 +1546,7 @@ double Histogram_json::range_selectivity_new(Field *field, key_range *min_endp,
void Histogram_json::serialize(Field *field) void Histogram_json::serialize(Field *field)
{ {
field->store((char*)values, strlen((char*)values), field->store((char*)get_values(), strlen((char*)get_values()),
&my_charset_bin); &my_charset_bin);
} }
...@@ -1866,13 +1867,13 @@ class Stat_table_write_iter ...@@ -1866,13 +1867,13 @@ class Stat_table_write_iter
class Histogram_builder class Histogram_builder
{ {
private: public:
Field *column; /* table field for which the histogram is built */ Field *column; /* table field for which the histogram is built */
uint col_length; /* size of this field */ uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */ ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */ Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */ Field *max_value; /* pointer to the maximal value for the field */
Histogram_binary *histogram; /* the histogram location */ Histogram_base *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */ uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */ double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */ uint curr_bucket; /* number of the current bucket to be built */
...@@ -1881,14 +1882,13 @@ class Histogram_builder ...@@ -1881,14 +1882,13 @@ class Histogram_builder
/* number of distinct values that occured only once */ /* number of distinct values that occured only once */
ulonglong count_distinct_single_occurence; ulonglong count_distinct_single_occurence;
public:
Histogram_builder(Field *col, uint col_len, ha_rows rows) Histogram_builder(Field *col, uint col_len, ha_rows rows)
: column(col), col_length(col_len), records(rows) : column(col), col_length(col_len), records(rows)
{ {
Column_statistics *col_stats= col->collected_stats; Column_statistics *col_stats= col->collected_stats;
min_value= col_stats->min_value; min_value= col_stats->min_value;
max_value= col_stats->max_value; max_value= col_stats->max_value;
histogram= dynamic_cast<Histogram_binary *>(col_stats->histogram_); histogram= col_stats->histogram_;
hist_width= histogram->get_width(); hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1); bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0; curr_bucket= 0;
...@@ -1918,13 +1918,13 @@ class Histogram_builder ...@@ -1918,13 +1918,13 @@ class Histogram_builder
if (count > bucket_capacity * (curr_bucket + 1)) if (count > bucket_capacity * (curr_bucket + 1))
{ {
column->store_field_value((uchar *) elem, col_length); column->store_field_value((uchar *) elem, col_length);
histogram->set_value(curr_bucket, ((Histogram_binary *)histogram)->set_value(curr_bucket,
column->pos_in_interval(min_value, max_value)); column->pos_in_interval(min_value, max_value));
curr_bucket++; curr_bucket++;
while (curr_bucket != hist_width && while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1)) count > bucket_capacity * (curr_bucket + 1))
{ {
histogram->set_prev_value(curr_bucket); ((Histogram_binary *)histogram)->set_prev_value(curr_bucket);
curr_bucket++; curr_bucket++;
} }
} }
...@@ -1934,35 +1934,13 @@ class Histogram_builder ...@@ -1934,35 +1934,13 @@ class Histogram_builder
class Histogram_builder_json : public Histogram_builder class Histogram_builder_json : public Histogram_builder
{ {
Field *column; /* table field for which the histogram is built */ std::vector<std::string> bucket_bounds;
uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */
Histogram_json *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */
ulonglong count; /* number of values retrieved */
ulonglong count_distinct; /* number of distinct values retrieved */
/* number of distinct values that occured only once */
ulonglong count_distinct_single_occurence;
std::vector<std::string> bucket_bounds = {};
public: public:
Histogram_builder_json(Field *col, uint col_len, ha_rows rows) Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
: column(col), col_length(col_len), records(rows) : Histogram_builder(col, col_len, rows)
{ {
Column_statistics *col_stats= col->collected_stats; bucket_bounds = {};
min_value= col_stats->min_value;
max_value= col_stats->max_value;
histogram= dynamic_cast<Histogram_json *>(col_stats->histogram_);
hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0;
count= 0;
count_distinct= 0;
count_distinct_single_occurence= 0;
} }
~Histogram_builder_json() override = default; ~Histogram_builder_json() override = default;
...@@ -1995,7 +1973,7 @@ class Histogram_builder_json : public Histogram_builder ...@@ -1995,7 +1973,7 @@ class Histogram_builder_json : public Histogram_builder
writer->end_array(); writer->end_array();
histogram->set_size(bucket_bounds.size()); histogram->set_size(bucket_bounds.size());
Binary_string *json_string = (Binary_string *) writer->output.get_string(); Binary_string *json_string = (Binary_string *) writer->output.get_string();
histogram->set_values((uchar *) json_string->c_ptr()); ((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr());
} }
}; };
......
...@@ -179,7 +179,11 @@ class Histogram_base : public Sql_alloc ...@@ -179,7 +179,11 @@ class Histogram_base : public Sql_alloc
return 1.0; return 1.0;
}; };
virtual ~Histogram_base(){} // Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column.
// Newer, JSON-based histograms may return 0.
virtual uint get_size()=0;
virtual ~Histogram_base()= default;
}; };
class Histogram_binary : public Histogram_base class Histogram_binary : public Histogram_base
...@@ -283,7 +287,9 @@ class Histogram_binary : public Histogram_base ...@@ -283,7 +287,9 @@ class Histogram_binary : public Histogram_base
void set_values (uchar *vals) override { values= (uchar *) vals; } void set_values (uchar *vals) override { values= (uchar *) vals; }
void set_size (ulonglong sz) override { size= (uint8) sz; } void set_size (ulonglong sz) override { size= (uint8) sz; }
bool is_available() override { return get_width() > 0 && get_values(); } uint get_size() override {return (uint)size;}
bool is_available() override { return get_size() > 0 && get_values(); }
/* /*
This function checks that histograms should be usable only when This function checks that histograms should be usable only when
...@@ -385,6 +391,10 @@ class Histogram_json : public Histogram_base ...@@ -385,6 +391,10 @@ class Histogram_json : public Histogram_base
void set_size (ulonglong sz) override {size = (uint8) sz; } void set_size (ulonglong sz) override {size = (uint8) sz; }
uint get_size() override {
return size;
}
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override; void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
bool is_available() override {return get_width() > 0 /*&& get_values()*/; } bool is_available() override {return get_width() > 0 /*&& get_values()*/; }
...@@ -395,9 +405,9 @@ class Histogram_json : public Histogram_base ...@@ -395,9 +405,9 @@ class Histogram_json : public Histogram_base
is_available(); is_available();
} }
void set_values (uchar *vals) override { values= vals; } void set_values (uchar *vals) override { values= (uchar *) vals; }
uchar *get_values() override { return values; } uchar *get_values() override { return (uchar *) values; }
double range_selectivity(double min_pos, double max_pos) override {return 0.1;} double range_selectivity(double min_pos, double max_pos) override {return 0.1;}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment