fixes/optimizations for count(distinct)

more extensive testing of count(distinct)
parent c706bf40
n1
1
2
NULL
count(distinct n1)
2
n2
11
12
13
NULL
count(distinct n2)
3
s
one
two
NULL
count(distinct s)
2
vs
eleven
twevle
thirteen
NULL
count(distinct vs)
3
t
eleven
twelve
foo
bar
NULL
count(distinct t)
4
n1 n2
1 11
2 11
2 12
2 13
NULL 13
2 NULL
count(distinct n1,n2)
4
n1 s
1 one
2 two
NULL two
2 NULL
count(distinct n1,s)
2
s n1 vs
one 1 eleven
two 2 eleven
two 2 twevle
two 2 thirteen
two NULL thirteen
NULL 2 thirteen
two 2 NULL
count(distinct s,n1,vs)
4
s t
one eleven
two eleven
two twelve
two foo
two bar
NULL bar
two NULL
count(distinct s,t)
5
count(distinct n1) count(distinct n2)
2 3
create table t1(n1 int, n2 int, s char(20), vs varchar(20), t text);
insert into t1 values (1,11, 'one','eleven', 'eleven'),
(1,11, 'one','eleven', 'eleven'),
(2,11, 'two','eleven', 'eleven'),
(2,12, 'two','twevle', 'twelve'),
(2,13, 'two','thirteen', 'foo'),
(2,13, 'two','thirteen', 'foo'),
(2,13, 'two','thirteen', 'bar'),
(NULL,13, 'two','thirteen', 'bar'),
(2,NULL, 'two','thirteen', 'bar'),
(2,13, NULL,'thirteen', 'bar'),
(2,13, 'two',NULL, 'bar'),
(2,13, 'two','thirteen', NULL);
select distinct n1 from t1;
select count(distinct n1) from t1;
select distinct n2 from t1;
select count(distinct n2) from t1;
select distinct s from t1;
select count(distinct s) from t1;
select distinct vs from t1;
select count(distinct vs) from t1;
select distinct t from t1;
select count(distinct t) from t1;
select distinct n1,n2 from t1;
select count(distinct n1,n2) from t1;
select distinct n1,s from t1;
select count(distinct n1,s) from t1;
select distinct s,n1,vs from t1;
select count(distinct s,n1,vs) from t1;
select distinct s,t from t1;
select count(distinct s,t) from t1;
select count(distinct n1), count(distinct n2) from t1;
......@@ -810,12 +810,13 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
for(; field < field_end; ++field)
{
int res;
int len = (*field)->field_length;
Field* f = *field;
int len = f->field_length;
switch((*field)->type())
{
case FIELD_TYPE_STRING:
case FIELD_TYPE_VAR_STRING:
res = my_sortcmp(key1, key2, len);
res = f->key_cmp(key1, key2);
break;
default:
res = memcmp(key1, key2, len);
......@@ -879,20 +880,22 @@ bool Item_sum_count_distinct::setup(THD *thd)
// to use a simpler key compare method that can take advantage
// of not having to worry about other fields
{
switch(table->field[0]->type())
Field* field = table->field[0];
switch(field->type())
{
// if we have a string, we must take care of charsets
// and case sensitivity
case FIELD_TYPE_STRING:
case FIELD_TYPE_VAR_STRING:
compare_key = (qsort_cmp2)simple_str_key_cmp;
compare_key = (qsort_cmp2)(field->binary() ? simple_raw_key_cmp:
simple_str_key_cmp);
break;
default: // since at this point we cannot have blobs
// anything else can be compared with memcmp
compare_key = (qsort_cmp2)simple_raw_key_cmp;
break;
}
cmp_arg = (void*)(key_len = table->field[0]->field_length);
cmp_arg = (void*)(key_len = field->field_length);
rec_offset = 1;
}
else // too bad, cannot cheat - there is more than one field
......@@ -908,7 +911,8 @@ bool Item_sum_count_distinct::setup(THD *thd)
rec_offset = table->reclength - key_len;
}
init_tree(&tree, 0, key_len, compare_key, 0, 0);
init_tree(&tree, min(max_heap_table_size, sortbuff_size/16),
key_len, compare_key, 0, 0);
tree.cmp_arg = cmp_arg;
use_tree = 1;
}
......@@ -919,11 +923,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
void Item_sum_count_distinct::reset()
{
table->file->extra(HA_EXTRA_NO_CACHE);
table->file->delete_all_rows();
table->file->extra(HA_EXTRA_WRITE_CACHE);
if(use_tree)
delete_tree(&tree);
else
{
table->file->extra(HA_EXTRA_NO_CACHE);
table->file->delete_all_rows();
table->file->extra(HA_EXTRA_WRITE_CACHE);
}
(void) add();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment