fixes/optimizations for count(distinct)

more extensive testing of count(distinct)

fixes/optimizations for count(distinct)
more extensive testing of count(distinct)
b2977103 · sasha@mysql.sashanet.com · c706bf40 · b2977103 · b2977103 · b2977103
Commit b2977103 authored May 12, 2001 by sasha@mysql.sashanet.com
Showing with 130 additions and 9 deletions

mysql-test/r/count_distinct2.result mysql-test/r/count_distinct2.result +72 -0

mysql-test/t/count_distinct2.test mysql-test/t/count_distinct2.test +42 -0

sql/item_sum.cc sql/item_sum.cc +16 -9

No files found.
--- a/mysql-test/r/count_distinct2.result
+++ b/mysql-test/r/count_distinct2.result
+n1
+1
+2
+NULL
+count(distinct n1)
+2
+n2
+11
+12
+13
+NULL
+count(distinct n2)
+3
+s
+one
+two
+NULL
+count(distinct s)
+2
+vs
+eleven
+twevle
+thirteen
+NULL
+count(distinct vs)
+3
+t
+eleven
+twelve
+foo
+bar
+NULL
+count(distinct t)
+4
+n1	n2
+1	11
+2	11
+2	12
+2	13
+NULL	13
+2	NULL
+count(distinct n1,n2)
+4
+n1	s
+1	one
+2	two
+NULL	two
+2	NULL
+count(distinct n1,s)
+2
+s	n1	vs
+one	1	eleven
+two	2	eleven
+two	2	twevle
+two	2	thirteen
+two	NULL	thirteen
+NULL	2	thirteen
+two	2	NULL
+count(distinct s,n1,vs)
+4
+s	t
+one	eleven
+two	eleven
+two	twelve
+two	foo
+two	bar
+NULL	bar
+two	NULL
+count(distinct s,t)
+5
+count(distinct n1)	count(distinct n2)
+2	3
--- a/mysql-test/t/count_distinct2.test
+++ b/mysql-test/t/count_distinct2.test
+create table t1(n1 int, n2 int, s char(20), vs varchar(20), t text);
+insert into t1 values (1,11, 'one','eleven', 'eleven'),
+ (1,11, 'one','eleven', 'eleven'),
+ (2,11, 'two','eleven', 'eleven'),
+ (2,12, 'two','twevle', 'twelve'),
+ (2,13, 'two','thirteen', 'foo'),
+ (2,13, 'two','thirteen', 'foo'),
+ (2,13, 'two','thirteen', 'bar'),
+ (NULL,13, 'two','thirteen', 'bar'),
+ (2,NULL, 'two','thirteen', 'bar'),
+ (2,13, NULL,'thirteen', 'bar'),
+ (2,13, 'two',NULL, 'bar'),
+ (2,13, 'two','thirteen', NULL);
+
+select distinct n1 from t1;
+select count(distinct n1) from t1;
+
+select distinct n2 from t1;
+select count(distinct n2) from t1;
+
+select distinct s from t1;
+select count(distinct s) from t1;
+
+select distinct vs from t1;
+select count(distinct vs) from t1;
+
+select distinct t from t1;
+select count(distinct t) from t1;
+
+select distinct n1,n2 from t1;
+select count(distinct n1,n2) from t1;
+
+select distinct n1,s from t1;
+select count(distinct n1,s) from t1;
+
+select distinct s,n1,vs from t1;
+select count(distinct s,n1,vs) from t1;
+
+select distinct s,t from t1;
+select count(distinct s,t) from t1;
+
+select count(distinct n1), count(distinct n2) from t1;
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -810,12 +810,13 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
  for(; field < field_end; ++field)
    {
      int res;
-      int len = (*field)->field_length;
+      Field* f = *field;
+      int len = f->field_length;
      switch((*field)->type())
 	{
 	case FIELD_TYPE_STRING:
 	case FIELD_TYPE_VAR_STRING:
-	  res = my_sortcmp(key1, key2, len);
+	  res = f->key_cmp(key1, key2);
 	  break;
 	default:
 	  res = memcmp(key1, key2, len);
@@ -879,20 +880,22 @@ bool Item_sum_count_distinct::setup(THD *thd)
 	// to use a simpler key compare method that can take advantage
 	// of not having to worry about other fields
 	{
-	  switch(table->field[0]->type())
+	  Field* field = table->field[0];
+	  switch(field->type())
 	    {
 	      // if we have a string, we must take care of charsets
 	      // and case sensitivity
 	    case FIELD_TYPE_STRING:
 	    case FIELD_TYPE_VAR_STRING:
-	      compare_key = (qsort_cmp2)simple_str_key_cmp;
+	      compare_key = (qsort_cmp2)(field->binary() ? simple_raw_key_cmp:
+					 simple_str_key_cmp);
 	      break;
 	    default: // since at this point we cannot have blobs
 	      // anything else can be compared with memcmp
 	      compare_key = (qsort_cmp2)simple_raw_key_cmp;
 	      break;
 	    }
-	  cmp_arg = (void*)(key_len = table->field[0]->field_length);
+	  cmp_arg = (void*)(key_len = field->field_length);
 	  rec_offset = 1;
 	}
      else // too bad, cannot cheat - there is more than one field
@@ -908,7 +911,8 @@ bool Item_sum_count_distinct::setup(THD *thd)
 	  rec_offset = table->reclength - key_len;
 	}

-      init_tree(&tree, 0, key_len, compare_key, 0, 0);
+      init_tree(&tree, min(max_heap_table_size, sortbuff_size/16),
+		key_len, compare_key, 0, 0);
      tree.cmp_arg = cmp_arg;
      use_tree = 1;
    }
@@ -919,11 +923,14 @@ bool Item_sum_count_distinct::setup(THD *thd)

 void Item_sum_count_distinct::reset()
 {
-  table->file->extra(HA_EXTRA_NO_CACHE);
-  table->file->delete_all_rows();
-  table->file->extra(HA_EXTRA_WRITE_CACHE);
  if(use_tree)
    delete_tree(&tree);
+  else
+    {
+      table->file->extra(HA_EXTRA_NO_CACHE);
+      table->file->delete_all_rows();
+      table->file->extra(HA_EXTRA_WRITE_CACHE);
+    }
  (void) add();
 }