MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases

75d65b5f · Alexander Barkov · bd210585 · 75d65b5f · 75d65b5f · 75d65b5f
Commit 75d65b5f authored Mar 13, 2015 by Alexander Barkov
8 changed files
--- a/mysql-test/r/range.result
+++ b/mysql-test/r/range.result
@@ -618,10 +618,10 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a=binary 'aaa';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	11	NULL	2	Using index condition
+1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a='aaa' collate latin1_bin;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	11	NULL	2	Using index condition
+1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a='aaa' collate latin1_german1_ci;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	a	NULL	NULL	NULL	9	Using where

--- a/mysql-test/r/range_mrr_icp.result
+++ b/mysql-test/r/range_mrr_icp.result
@@ -620,10 +620,10 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a=binary 'aaa';
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	11	NULL	2	Using index condition; Rowid-ordered scan
+1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a='aaa' collate latin1_bin;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	a	a	11	NULL	2	Using index condition; Rowid-ordered scan
+1	SIMPLE	t1	ref	a	a	11	const	2	Using index condition
 explain select * from t1 where a='aaa' collate latin1_german1_ci;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	a	NULL	NULL	NULL	9	Using where

--- a/mysql-test/r/type_varchar.result
+++ b/mysql-test/r/type_varchar.result
@@ -549,5 +549,65 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	index	PRIMARY	PRIMARY	22	NULL	2	Using where; Using index
 DROP TABLE IF EXISTS t1,t2;
 #
+# MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases
+#
+CREATE TABLE t1 (c1 VARCHAR(20) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t1 VALUES ('a'),('b'),('c'),('d');
+SELECT * FROM t1 WHERE c1=BINARY 'a';
+c1
+a
+EXPLAIN SELECT * FROM t1 WHERE c1=BINARY 'a';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	const	PRIMARY	PRIMARY	22	const	1	Using index
+SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
+c1
+a
+EXPLAIN SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	const	PRIMARY	PRIMARY	22	const	1	Using index
+DROP TABLE t1;
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
+INSERT INTO t1 VALUES ('a');
+CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t2 VALUES ('a'),('b');
+SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+c1	c1
+a	a
+EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	system	NULL	NULL	NULL	NULL	1	
+1	SIMPLE	t2	const	PRIMARY	PRIMARY	12	const	1	Using index
+ALTER TABLE t1 MODIFY c1 VARBINARY(10);
+SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+c1	c1
+a	a
+EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	system	NULL	NULL	NULL	NULL	1	
+1	SIMPLE	t2	const	PRIMARY	PRIMARY	12	const	1	Using index
+DROP TABLE t1, t2;
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
+INSERT INTO t1 VALUES ('a'),('c');
+CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t2 VALUES ('a'),('b');
+SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+c1
+a
+c
+# t2 should be eliminated
+EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	
+ALTER TABLE t1 MODIFY c1 VARBINARY(10);
+SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+c1
+a
+c
+# t2 should be eliminated
+EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	2	
+DROP TABLE t1,t2;
+#
 # End of 10.0 tests
 #
--- a/mysql-test/t/type_varchar.test
+++ b/mysql-test/t/type_varchar.test
@@ -240,6 +240,42 @@ SELECT t1.* FROM t1 LEFT JOIN t2 ON t1.c1=t2.c1;
 EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 ON t1.c1=t2.c1;
 DROP TABLE IF EXISTS t1,t2;

+
+--echo #
+--echo # MDEV-6989 BINARY and COLLATE xxx_bin comparisions are not used for optimization in some cases
+--echo #
+CREATE TABLE t1 (c1 VARCHAR(20) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t1 VALUES ('a'),('b'),('c'),('d');
+SELECT * FROM t1 WHERE c1=BINARY 'a';
+EXPLAIN SELECT * FROM t1 WHERE c1=BINARY 'a';
+SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
+EXPLAIN SELECT * FROM t1 WHERE c1=_latin1'a' COLLATE latin1_bin;
+DROP TABLE t1;
+
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
+INSERT INTO t1 VALUES ('a');
+CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t2 VALUES ('a'),('b');
+SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+ALTER TABLE t1 MODIFY c1 VARBINARY(10);
+SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+EXPLAIN SELECT * FROM t1, t2 WHERE t1.c1=t2.c1;
+DROP TABLE t1, t2;
+
+CREATE TABLE t1 (c1 VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_bin);
+INSERT INTO t1 VALUES ('a'),('c');
+CREATE TABLE t2 (c1 VARCHAR(10) CHARACTER SET latin1, PRIMARY KEY(c1));
+INSERT INTO t2 VALUES ('a'),('b');
+SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+--echo # t2 should be eliminated
+EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+ALTER TABLE t1 MODIFY c1 VARBINARY(10);
+SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+--echo # t2 should be eliminated
+EXPLAIN SELECT t1.* FROM t1 LEFT JOIN t2 USING (c1);
+DROP TABLE t1,t2;
+
 --echo #
 --echo # End of 10.0 tests
 --echo #
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1246,11 +1246,23 @@ double Field::pos_in_interval_val_str(Field *min, Field *max, uint data_offset)
 }


+/*
+  This handles all numeric and BIT data types.
+*/ 
+bool Field::can_optimize_keypart_ref(const Item_func *cond,
+                                     const Item *item) const
+{
+  DBUG_ASSERT(cmp_type() != STRING_RESULT);
+  DBUG_ASSERT(cmp_type() != TIME_RESULT);
+  return item->cmp_type() != TIME_RESULT;
+}
+
+
 /*
  This handles all numeric and BIT data types.
 */ 
 bool Field::can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                       const Item *const_item)
+                                       const Item *const_item) const
 {
  DBUG_ASSERT(cmp_type() != STRING_RESULT);
  DBUG_ASSERT(cmp_type() != TIME_RESULT);
@@ -5270,8 +5282,15 @@ my_decimal *Field_temporal::val_decimal(my_decimal *d)
 }


+bool Field_temporal::can_optimize_keypart_ref(const Item_func *cond,
+                                              const Item *value) const
+{
+  return true; // Field is of TIME_RESULT, which supersedes everything else.
+}
+
+
 bool Field_temporal::can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                                const Item *const_item)
+                                                const Item *const_item) const
 {
  return true; // Field is of TIME_RESULT, which supersedes everything else.
 }
@@ -6468,15 +6487,50 @@ uint32 Field_longstr::max_data_length() const
 }


-bool Field_longstr::can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                               const Item *const_item)
+bool
+Field_longstr::cmp_to_string_with_same_collation(const Item_func *cond,
+                                                 const Item *item) const
 {
-  // Can't use indexes when comparing a string to a number or a date
-  if (const_item->cmp_type() != STRING_RESULT)
-    return false;
+  return item->cmp_type() == STRING_RESULT &&
+         charset() == cond->compare_collation();
+}

-  // Don't use an index when comparing strings of different collations.
-  return charset() ==  cond->compare_collation();
+
+bool
+Field_longstr::cmp_to_string_with_stricter_collation(const Item_func *cond,
+                                                     const Item *item) const
+{
+  return item->cmp_type() == STRING_RESULT &&
+         (charset() == cond->compare_collation() ||
+          cond->compare_collation()->state & MY_CS_BINSORT);
+}
+
+
+bool Field_longstr::can_optimize_keypart_ref(const Item_func *cond,
+                                             const Item *item) const
+{
+  DBUG_ASSERT(cmp_type() == STRING_RESULT);
+  return cmp_to_string_with_stricter_collation(cond, item);
+}
+
+
+bool Field_longstr::can_optimize_hash_join(const Item_func *cond,
+                                           const Item *item) const
+{
+  DBUG_ASSERT(cmp_type() == STRING_RESULT);
+  return cmp_to_string_with_same_collation(cond, item);
+}
+
+
+bool Field_longstr::can_optimize_group_min_max(const Item_bool_func2 *cond,
+                                               const Item *const_item) const
+{
+  /*
+    Can't use indexes when comparing a string to a number or a date
+    Don't use an index when comparing strings of different collations.
+  */
+  DBUG_ASSERT(cmp_type() == STRING_RESULT);
+  return cmp_to_string_with_same_collation(cond, const_item);
 }


@@ -8489,6 +8543,31 @@ uint Field_num::is_equal(Create_field *new_field)
 }


+bool Field_enum::can_optimize_keypart_ref(const Item_func *cond,
+                                          const Item *item) const
+{
+  DBUG_ASSERT(cmp_type() == INT_RESULT);
+  DBUG_ASSERT(result_type() == STRING_RESULT);
+
+  switch (item->cmp_type())
+  {
+  case TIME_RESULT:
+    return false;
+  case INT_RESULT:
+  case DECIMAL_RESULT:
+  case REAL_RESULT:
+    return true;
+  case STRING_RESULT:
+    return charset() == ((Item_func*)cond)->compare_collation();
+  case IMPOSSIBLE_RESULT:
+  case ROW_RESULT:
+    DBUG_ASSERT(0);
+    break;
+  }
+  return false;
+}
+
+
 /*
  Bit field.


--- a/sql/field.h
+++ b/sql/field.h
@@ -39,6 +39,7 @@ class Relay_log_info;
 class Field;
 class Column_statistics;
 class Column_statistics_collected;
+class Item_func;
 class Item_bool_func2;

 enum enum_check_fields
@@ -964,9 +965,21 @@ public:
    return (double) 0.5; 
  }

+  virtual bool can_optimize_keypart_ref(const Item_func *cond,
+                                        const Item *item) const;
+  virtual bool can_optimize_hash_join(const Item_func *cond,
+                                      const Item *item) const
+  {
+    return can_optimize_keypart_ref(cond, item);
+  }
  virtual bool can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                          const Item *const_item);
-
+                                          const Item *const_item) const;
+  bool can_optimize_outer_join_table_elimination(const Item_func *cond,
+                                                 const Item *item) const
+  {
+    // Exactly the same rules with REF access
+    return can_optimize_keypart_ref(cond, item);
+  }
  friend int cre_myisam(char * name, register TABLE *form, uint options,
 			ulonglong auto_increment_value);
  friend class Copy_field;
@@ -1147,6 +1160,10 @@ protected:
    return report_if_important_data(copier->source_end_pos(),
                                    end, count_spaces);
  }
+  bool cmp_to_string_with_same_collation(const Item_func *cond,
+                                         const Item *item) const;
+  bool cmp_to_string_with_stricter_collation(const Item_func *cond,
+                                             const Item *item) const;
 public:
  Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
                uchar null_bit_arg, utype unireg_check_arg,
@@ -1158,8 +1175,10 @@ public:
  int store_decimal(const my_decimal *d);
  uint32 max_data_length() const;
  bool match_collation_to_optimize_range() const { return true; }
+  bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
+  bool can_optimize_hash_join(const Item_func *cond, const Item *item) const;
  bool can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                  const Item *const_item);
+                                  const Item *const_item) const;
 };

 /* base class for float and double and decimal (old one) */
@@ -1587,8 +1606,13 @@ public:
  uint size_of() const { return sizeof(*this); }
  uint32 max_display_length() { return 4; }
  void move_field_offset(my_ptrdiff_t ptr_diff) {}
+  bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const
+  {
+    DBUG_ASSERT(0);
+    return false;
+  }
  bool can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                  const Item *const_item)
+                                  const Item *const_item) const
  {
    DBUG_ASSERT(0);
    return false;
@@ -1625,8 +1649,9 @@ public:
  {
    return pos_in_interval_val_real(min, max);
  }
+  bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
  bool can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                  const Item *const_item);
+                                  const Item *const_item) const;
 };


@@ -2664,8 +2689,9 @@ public:
  virtual const uchar *unpack(uchar *to, const uchar *from,
                              const uchar *from_end, uint param_data);

+  bool can_optimize_keypart_ref(const Item_func *cond, const Item *item) const;
  bool can_optimize_group_min_max(const Item_bool_func2 *cond,
-                                  const Item *const_item)
+                                  const Item *const_item) const
  {
    /*
      Can't use GROUP_MIN_MAX optimization for ENUM and SET,

--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -1486,28 +1486,8 @@ void check_equality(Dep_analysis_context *ctx, Dep_module_expr **eq_mod,
      left->real_item()->type() == Item::FIELD_ITEM)
  {
    Field *field= ((Item_field*)left->real_item())->field;
-    if (right->cmp_type() == TIME_RESULT && field->cmp_type() != TIME_RESULT)
+    if (!field->can_optimize_outer_join_table_elimination(cond, right))
      return;
-    if (field->result_type() == STRING_RESULT)
-    {
-      if (right->result_type() != STRING_RESULT)
-      {
-        if (field->cmp_type() != right->result_type())
-          return;
-      }
-      else
-      {
-        /*
-          We can't assume there's a functional dependency if the effective
-          collation of the operation differ from the field collation.
-        */
-        if ((field->cmp_type() == STRING_RESULT ||
-            field->real_type() == MYSQL_TYPE_ENUM ||
-            field->real_type() == MYSQL_TYPE_SET) &&
-            field->charset() != cond->compare_collation())
-          return;
-      }
-    }
    Dep_value_field *field_val;
    if ((field_val= ctx->get_field_value(field)))
      add_module_expr(ctx, eq_mod, and_level, field_val, right, NULL);

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -4124,6 +4124,7 @@ error:
 /// Used when finding key fields
 typedef struct key_field_t {
  Field		*field;
+  Item_func     *cond;
  Item		*val;			///< May be empty if diff constant
  uint		level;
  uint		optimize;
@@ -4466,46 +4467,6 @@ add_key_field(JOIN *join,
      }
      if (!eq_func) // eq_func is NEVER true when num_values > 1
        return;
-
-      if ((*value)->cmp_type() == TIME_RESULT &&
-          field->cmp_type() != TIME_RESULT)
-        return;
-
-      /*
-        Note, for ITEM/ENUM columns:
-        - field->cmp_type() returns INT_RESULT
-        - field->result_type() returns STRING_RESULT
-        - field->type() returns MYSQL_TYPE_STRING
-
-        Using field->real_type() to detect ENUM/SET,
-        as they need a special handling:
-        - Conditions between a ENUM/SET filter and a TIME expression
-          cannot be optimized. They were filtered out in the previous if block.
-        - It's Ok to use ref access for an ENUM/SET field compared to an
-          INT/REAL/DECIMAL expression.
-        - It's Ok to use ref for an ENUM/SET field compared to a STRING
-          expression if the collation of the field and the collation of
-          the condition match.
-      */
-      if ((field->real_type() == MYSQL_TYPE_ENUM ||
-           field->real_type() == MYSQL_TYPE_SET) &&
-          (*value)->cmp_type () == STRING_RESULT &&
-          field->charset() != cond->compare_collation())
-        return;
-
-      /*
-	We can't use indexes when comparing a string index to a
-	number or two strings if the effective collation
-        of the operation differ from the field collation.
-       */
-
-      if (field->cmp_type() == STRING_RESULT)
-      {
-        if ((*value)->cmp_type() != STRING_RESULT)
-            return;
-        if (field->charset() != cond->compare_collation())
-          return;
-      }
    }
  }
  /*
@@ -4517,6 +4478,7 @@ add_key_field(JOIN *join,
  (*key_fields)->field=		field;
  (*key_fields)->eq_func=	eq_func;
  (*key_fields)->val=		*value;
+  (*key_fields)->cond=          cond;
  (*key_fields)->level=         and_level;
  (*key_fields)->optimize=      optimize;
  /*
@@ -4958,7 +4920,8 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
      uint key_parts= form->actual_n_key_parts(keyinfo);
      for (uint part=0 ; part <  key_parts ; part++)
      {
-	if (field->eq(form->key_info[key].key_part[part].field))
+        if (field->eq(form->key_info[key].key_part[part].field) &&
+            field->can_optimize_keypart_ref(key_field->cond, key_field->val))
 	{
          if (add_keyuse(keyuse_array, key_field, key, part))
            return TRUE;
@@ -4969,6 +4932,8 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
        (key_field->optimize & KEY_OPTIMIZE_EQ) &&
        key_field->val->used_tables())
    {
+      if (!field->can_optimize_hash_join(key_field->cond, key_field->val))
+        return false;      
      /* 
        If a key use is extracted from an equi-join predicate then it is
        added not only as a key use for every index whose component can