Commit e15a83c0 authored by Jan Lindström's avatar Jan Lindström

Better comments part 2 with proof and simplified implementation.

Thanks to Daniel Black.
parent 43054872
...@@ -3844,7 +3844,7 @@ btr_estimate_number_of_different_key_vals( ...@@ -3844,7 +3844,7 @@ btr_estimate_number_of_different_key_vals(
ib_uint64_t* n_diff; ib_uint64_t* n_diff;
ib_uint64_t* n_not_null; ib_uint64_t* n_not_null;
ibool stats_null_not_equal; ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */ ullint n_sample_pages = 1; /* number of pages to sample */
ulint not_empty_flag = 0; ulint not_empty_flag = 0;
ulint total_external_size = 0; ulint total_external_size = 0;
ulint i; ulint i;
...@@ -3897,8 +3897,6 @@ btr_estimate_number_of_different_key_vals( ...@@ -3897,8 +3897,6 @@ btr_estimate_number_of_different_key_vals(
if (srv_stats_transient_sample_pages > index->stat_index_size) { if (srv_stats_transient_sample_pages > index->stat_index_size) {
if (index->stat_index_size > 0) { if (index->stat_index_size > 0) {
n_sample_pages = index->stat_index_size; n_sample_pages = index->stat_index_size;
} else {
n_sample_pages = 1;
} }
} else { } else {
n_sample_pages = srv_stats_transient_sample_pages; n_sample_pages = srv_stats_transient_sample_pages;
...@@ -3906,17 +3904,44 @@ btr_estimate_number_of_different_key_vals( ...@@ -3906,17 +3904,44 @@ btr_estimate_number_of_different_key_vals(
} else { } else {
/* New logaritmic number of pages that are estimated. /* New logaritmic number of pages that are estimated.
Number of pages estimated should be between 1 and Number of pages estimated should be between 1 and
index->stat_index_size. We pick index->stat_index_size index->stat_index_size.
as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages
if between range as minimum.*/ If we have only 0 or 1 index pages then we can only take 1
if (index->stat_index_size > 0) { sample. We have already initialized n_sample_pages to 1.
n_sample_pages = ut_min(index->stat_index_size,
ut_max(ut_min(srv_stats_transient_sample_pages, So taking index size as I and sample as S and log(I)*S as L
index->stat_index_size),
log2(index->stat_index_size) requirement 1) we want the out limit of the expression to not exceed I;
*srv_stats_transient_sample_pages)); requirement 2) we want the ideal pages to be at least S;
} else { so the current expression is min(I, max( min(S,I), L)
n_sample_pages = 1;
looking for simplifications:
case 1: assume S < I
min(I, max( min(S,I), L) -> min(I , max( S, L))
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
so we have: min(I , L)
case 2: assume I < S
min(I, max( min(S,I), L) -> min(I, max( I, L))
case 2a: L > I
min(I, max( I, L)) -> min(I, L) -> I
case 2b: when L < I
min(I, max( I, L)) -> min(I, I ) -> I
so taking all case2 paths is I, our expression is:
n_pages = S < I? min(I,L) : I
*/
if (index->stat_index_size > 1) {
n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
ut_min(index->stat_index_size,
log2(index->stat_index_size)*srv_stats_transient_sample_pages)
: index->stat_index_size;
} }
} }
......
...@@ -4035,7 +4035,7 @@ btr_estimate_number_of_different_key_vals( ...@@ -4035,7 +4035,7 @@ btr_estimate_number_of_different_key_vals(
ib_uint64_t* n_diff; ib_uint64_t* n_diff;
ib_uint64_t* n_not_null; ib_uint64_t* n_not_null;
ibool stats_null_not_equal; ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */ ullint n_sample_pages=1; /* number of pages to sample */
ulint not_empty_flag = 0; ulint not_empty_flag = 0;
ulint total_external_size = 0; ulint total_external_size = 0;
ulint i; ulint i;
...@@ -4088,8 +4088,6 @@ btr_estimate_number_of_different_key_vals( ...@@ -4088,8 +4088,6 @@ btr_estimate_number_of_different_key_vals(
if (srv_stats_transient_sample_pages > index->stat_index_size) { if (srv_stats_transient_sample_pages > index->stat_index_size) {
if (index->stat_index_size > 0) { if (index->stat_index_size > 0) {
n_sample_pages = index->stat_index_size; n_sample_pages = index->stat_index_size;
} else {
n_sample_pages = 1;
} }
} else { } else {
n_sample_pages = srv_stats_transient_sample_pages; n_sample_pages = srv_stats_transient_sample_pages;
...@@ -4097,18 +4095,44 @@ btr_estimate_number_of_different_key_vals( ...@@ -4097,18 +4095,44 @@ btr_estimate_number_of_different_key_vals(
} else { } else {
/* New logaritmic number of pages that are estimated. /* New logaritmic number of pages that are estimated.
Number of pages estimated should be between 1 and Number of pages estimated should be between 1 and
index->stat_index_size. We pick index->stat_index_size index->stat_index_size.
as maximum and log2(index->stat_index_size)*sr_stats_transient_sample_pages
if between range as minimum.*/ If we have only 0 or 1 index pages then we can only take 1
sample. We have already initialized n_sample_pages to 1.
if (index->stat_index_size > 0) {
n_sample_pages = ut_min(index->stat_index_size, So taking index size as I and sample as S and log(I)*S as L
ut_max(ut_min(srv_stats_transient_sample_pages,
index->stat_index_size), requirement 1) we want the out limit of the expression to not exceed I;
log2(index->stat_index_size) requirement 2) we want the ideal pages to be at least S;
*srv_stats_transient_sample_pages)); so the current expression is min(I, max( min(S,I), L)
} else {
n_sample_pages = 1; looking for simplifications:
case 1: assume S < I
min(I, max( min(S,I), L) -> min(I , max( S, L))
but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
so we have: min(I , L)
case 2: assume I < S
min(I, max( min(S,I), L) -> min(I, max( I, L))
case 2a: L > I
min(I, max( I, L)) -> min(I, L) -> I
case 2b: when L < I
min(I, max( I, L)) -> min(I, I ) -> I
so taking all case2 paths is I, our expression is:
n_pages = S < I? min(I,L) : I
*/
if (index->stat_index_size > 1) {
n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
ut_min(index->stat_index_size,
log2(index->stat_index_size)*srv_stats_transient_sample_pages)
: index->stat_index_size;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment