Commit ea3ea9ed authored by sergefp@mysql.com's avatar sergefp@mysql.com

Merge spetrunia@bk-internal.mysql.com:/home/bk/mysql-4.1

into mysql.com:/home/psergey/mysql-4.1-nulls-stats-r2
parents 7510c454 e3f623d6
......@@ -319,6 +319,8 @@ enum ha_base_keytype {
#define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */
#define SEARCH_NULL_ARE_NOT_EQUAL 65536 /* NULL in keys are not equal */
#define SEARCH_RETURN_B_POS (65536*2) /* see ha_key_cmp for description */
/* bits in opt_flag */
#define QUICK_USED 1
#define READ_CACHE_USED 2
......
......@@ -63,4 +63,6 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
register uchar *b, uint key_length, uint nextflag,
uint *diff_pos);
extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a);
#endif /* _my_handler_h */
......@@ -322,7 +322,9 @@ typedef enum
/* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
MI_STATS_METHOD_NULLS_NOT_EQUAL,
/* Treat NULLs as equal when collecting statistics (like 4.0 did) */
MI_STATS_METHOD_NULLS_EQUAL
MI_STATS_METHOD_NULLS_EQUAL,
/* Ignore NULLs - count only tuples without NULLs in the index components */
MI_STATS_METHOD_IGNORE_NULLS
} enum_mi_stats_method;
typedef struct st_mi_check_param
......@@ -349,7 +351,14 @@ typedef struct st_mi_check_param
int tmpfile_createflag;
myf myf_rw;
IO_CACHE read_cache;
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
ulonglong unique_count[MI_MAX_KEY_SEG+1];
ulonglong notnull_count[MI_MAX_KEY_SEG+1];
ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
void *thd;
......@@ -409,7 +418,8 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
my_bool repair);
int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong records);
ulonglong *unique, ulonglong *notnull,
ulonglong records);
int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type);
int movepoint(MI_INFO *info,byte *record,my_off_t oldpos,
......
This diff is collapsed.
......@@ -339,7 +339,8 @@ static struct my_option my_long_options[] =
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"stats_method", OPT_STATS_METHOD,
"Specifies how index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
"\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
......@@ -451,6 +452,10 @@ static void usage(void)
-a, --analyze Analyze distribution of keys. Will make some joins in\n\
MySQL faster. You can check the calculated distribution\n\
by using '--description --verbose table_name'.\n\
--stats_method=name Specifies how index statistics collection code should\n\
threat NULLs. Possible values of name are \"nulls_unequal\"\n\
(default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
\"nulls_ignored\".\n\
-d, --description Prints some information about table.\n\
-A, --set-auto-increment[=value]\n\
Force auto_increment to start at this or higher value\n\
......@@ -472,7 +477,7 @@ static void usage(void)
#include <help_end.h>
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
NullS};
"nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
......@@ -699,14 +704,25 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
enum_mi_stats_method method_conv;
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1);
}
check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
MI_STATS_METHOD_NULLS_NOT_EQUAL;
switch (method-1) {
case 0:
method_conv= MI_STATS_METHOD_NULLS_EQUAL;
break;
case 1:
method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
case 2:
method_conv= MI_STATS_METHOD_IGNORE_NULLS;
break;
}
check_param.stats_method= method_conv;
break;
}
#ifdef DEBUG /* Only useful if debugging */
......
......@@ -297,7 +297,14 @@ typedef struct st_mi_sort_param
pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek;
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
ulonglong unique[MI_MAX_KEY_SEG+1];
ulonglong notnull[MI_MAX_KEY_SEG+1];
my_off_t pos,max_pos,filepos,start_recpos;
uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length;
......
......@@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
{
share->state.key_map|=(ulonglong) 1 << sinfo->key;
if (param->testflag & T_STATISTICS)
update_key_parts(sinfo->keyinfo, rec_per_key_part,
sinfo->unique, (ulonglong) info->state->records);
update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
sinfo->notnull: NULL,
(ulonglong) info->state->records);
if (!sinfo->buffpek.elements)
{
if (param->testflag & T_VERBOSE)
......
......@@ -670,3 +670,35 @@ show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE
drop table t1;
set myisam_stats_method=nulls_ignored;
show variables like 'myisam_stats_method';
Variable_name Value
myisam_stats_method nulls_ignored
create table t1 (
a char(3), b char(4), c char(5), d char(6),
key(a,b,c,d)
);
insert into t1 values ('bcd','def1', NULL, 'zz');
insert into t1 values ('bcd','def2', NULL, 'zz');
insert into t1 values ('bce','def1', 'yuu', NULL);
insert into t1 values ('bce','def2', NULL, 'quux');
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 2 NULL NULL YES BTREE
t1 1 a 2 b A 4 NULL NULL YES BTREE
t1 1 a 3 c A 4 NULL NULL YES BTREE
t1 1 a 4 d A 4 NULL NULL YES BTREE
delete from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 0 NULL NULL YES BTREE
t1 1 a 2 b A 0 NULL NULL YES BTREE
t1 1 a 3 c A 0 NULL NULL YES BTREE
t1 1 a 4 d A 0 NULL NULL YES BTREE
set myisam_stats_method=DEFAULT;
......@@ -637,4 +637,23 @@ show index from t1;
drop table t1;
# WL#2609, CSC#XXXX: MyISAM
set myisam_stats_method=nulls_ignored;
show variables like 'myisam_stats_method';
create table t1 (
a char(3), b char(4), c char(5), d char(6),
key(a,b,c,d)
);
insert into t1 values ('bcd','def1', NULL, 'zz');
insert into t1 values ('bcd','def2', NULL, 'zz');
insert into t1 values ('bce','def1', 'yuu', NULL);
insert into t1 values ('bce','def2', NULL, 'quux');
analyze table t1;
show index from t1;
delete from t1;
analyze table t1;
show index from t1;
set myisam_stats_method=DEFAULT;
# End of 4.1 tests
......@@ -75,7 +75,7 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
SYNOPSIS
ha_key_cmp()
keyseg Key segments of key to compare
keyseg Array of key segments of key to compare
a First key to compare, in format from _mi_pack_key()
This is normally key specified by user
b Second key to compare. This is always from a row
......@@ -84,10 +84,32 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
next_flag How keys should be compared
If bit SEARCH_FIND is not set the keys includes the row
position and this should also be compared
diff_pos OUT Number of first keypart where values differ, counting
from one.
DESCRIPTION
If SEARCH_RETURN_B_POS flag is set, diff_pos must point to array of 2
values, first value has the meaning as described in parameter
description above, the second value is:
diff_pos[1] OUT (b + diff_pos[1]) points to first value in tuple b
that is different from corresponding value in tuple a.
EXAMPLES
Example1: if the function is called for tuples
('aaa','bbb') and ('eee','fff'), then
diff_pos[0] = 1 (as 'aaa' != 'eee')
diff_pos[1] = 0 (offset from beggining of tuple b to 'eee' keypart).
Example2: if the index function is called for tuples
('aaa','bbb') and ('aaa','fff'),
diff_pos[0] = 2 (as 'aaa' != 'eee')
diff_pos[1] = 3 (offset from beggining of tuple b to 'fff' keypart,
here we assume that first key part is CHAR(3) NOT NULL)
NOTES
Number-keys can't be splited
RETURN VALUES
<0 If a < b
0 If a == b
......@@ -107,6 +129,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
float f_1,f_2;
double d_1,d_2;
uint next_key_length;
uchar *orig_b= b;
*diff_pos=0;
for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
......@@ -115,6 +138,9 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
uint piks=! (keyseg->flag & HA_NO_SORT);
(*diff_pos)++;
if (nextflag & SEARCH_RETURN_B_POS)
diff_pos[1]= (uint)(b - orig_b);
/* Handle NULL part */
if (keyseg->null_bit)
{
......@@ -448,3 +474,84 @@ end:
}
return 0;
} /* ha_key_cmp */
/*
Find the first NULL value in index-suffix values tuple
SYNOPSIS
ha_find_null()
keyseg Array of keyparts for key suffix
a Key suffix value tuple
DESCRIPTION
Find the first NULL value in index-suffix values tuple.
TODO Consider optimizing this fuction or its use so we don't search for
NULL values in completely NOT NULL index suffixes.
RETURN
First key part that has NULL as value in values tuple, or the last key part
(with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs.
*/
HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
{
for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++)
{
uchar *end;
if (keyseg->null_bit)
{
if (!*a++)
return keyseg;
}
end= a+ keyseg->length;
switch ((enum ha_base_keytype) keyseg->type) {
case HA_KEYTYPE_TEXT:
case HA_KEYTYPE_BINARY:
if (keyseg->flag & HA_SPACE_PACK)
{
int a_length;
get_key_length(a_length, a);
a += a_length;
break;
}
else
a= end;
break;
case HA_KEYTYPE_VARTEXT:
case HA_KEYTYPE_VARBINARY:
{
int a_length;
get_key_length(a_length, a);
a+= a_length;
break;
}
case HA_KEYTYPE_NUM:
if (keyseg->flag & HA_SPACE_PACK)
{
int alength= *a++;
end= a+alength;
}
a= end;
break;
case HA_KEYTYPE_INT8:
case HA_KEYTYPE_SHORT_INT:
case HA_KEYTYPE_USHORT_INT:
case HA_KEYTYPE_LONG_INT:
case HA_KEYTYPE_ULONG_INT:
case HA_KEYTYPE_INT24:
case HA_KEYTYPE_UINT24:
#ifdef HAVE_LONG_LONG
case HA_KEYTYPE_LONGLONG:
case HA_KEYTYPE_ULONGLONG:
#endif
case HA_KEYTYPE_FLOAT:
case HA_KEYTYPE_DOUBLE:
a= end;
break;
}
}
return keyseg;
}
......@@ -40,7 +40,7 @@ TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"",
myisam_recover_names, NULL};
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
NullS};
"nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
......
......@@ -5281,7 +5281,8 @@ The minimum value for this variable is 4096.",
GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0},
{"myisam_stats_method", OPT_MYISAM_STATS_METHOD,
"Specifies how MyISAM index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
"\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"net_buffer_length", OPT_NET_BUFFER_LENGTH,
......@@ -6475,16 +6476,26 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
}
case OPT_MYISAM_STATS_METHOD:
{
myisam_stats_method_str= argument;
int method;
ulong method_conv;
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument);
exit(1);
}
global_system_variables.myisam_stats_method=
test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
MI_STATS_METHOD_NULLS_NOT_EQUAL;
switch (method-1) {
case 0:
method_conv= MI_STATS_METHOD_NULLS_EQUAL;
break;
case 1:
method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
case 2:
method_conv= MI_STATS_METHOD_IGNORE_NULLS;
break;
}
global_system_variables.myisam_stats_method= method_conv;
break;
}
case OPT_SQL_MODE:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment