BUG#31159 - fulltext search on ucs2 column crashes server

ucs2 doesn't provide required by fulltext ctype array. Crash happens because fulltext attempts to use unitialized ctype array. Fixed by converting ucs2 fields to compatible utf8 analogue. include/my_sys.h: Added a function to find compatible character set with ctype array available. Currently used by fulltext search to find compatible substitute for ucs2 collations. mysql-test/r/ctype_ucs.result: A test case for BUG#31159. mysql-test/t/ctype_ucs.test: A test case for BUG#31159. mysys/charset.c: Added a function to find compatible character set with ctype array available. Currently used by fulltext search to find compatible substitute for ucs2 collations. sql/item_func.cc: Convert ucs2 fields to utf8. Fulltext requires ctype array, but ucs2 doesn't provide it.

BUG#31159 - fulltext search on ucs2 column crashes server
ucs2 doesn't provide required by fulltext ctype array. Crash happens because fulltext attempts to use unitialized ctype array. Fixed by converting ucs2 fields to compatible utf8 analogue. include/my_sys.h: Added a function to find compatible character set with ctype array available. Currently used by fulltext search to find compatible substitute for ucs2 collations. mysql-test/r/ctype_ucs.result: A test case for BUG#31159. mysql-test/t/ctype_ucs.test: A test case for BUG#31159. mysys/charset.c: Added a function to find compatible character set with ctype array available. Currently used by fulltext search to find compatible substitute for ucs2 collations. sql/item_func.cc: Convert ucs2 fields to utf8. Fulltext requires ctype array, but ucs2 doesn't provide it.
660d6626 · unknown · d244374f · 660d6626 · 660d6626 · 660d6626
Commit 660d6626 authored Oct 24, 2007 by unknown
5 changed files
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -784,6 +784,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
 extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags);
 extern CHARSET_INFO *get_charset_by_csname(const char *cs_name,
 					   uint cs_flags, myf my_flags);
+extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO
+                                                       *original_cs);
 extern void free_charsets(void);
 extern char *get_charsets_dir(char *buf);
 extern my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2);

--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -803,4 +803,10 @@ quote(name)
 ????????
 ????????????????
 drop table bug20536;
+CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
+INSERT INTO t1 VALUES('abcd');
+SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
+a
+abcd
+DROP TABLE t1;
 End of 4.1 tests
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -535,4 +535,12 @@ select quote(name) from bug20536;

 drop table bug20536;

+#
+# BUG#31159 - fulltext search on ucs2 column crashes server
+#
+CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
+INSERT INTO t1 VALUES('abcd');
+SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
+DROP TABLE t1;
+
 --echo End of 4.1 tests
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -673,3 +673,43 @@ CHARSET_INFO *fs_character_set()
  return fs_cset_cache;
 }
 #endif
+
+
+/**
+  @brief Find compatible character set with ctype.
+
+  @param[in] original_cs Original character set
+
+  @note
+    128 my_charset_ucs2_general_uca      ->192 my_charset_utf8_general_uca_ci
+    129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci
+    130 my_charset_ucs2_latvian_uca_ci   ->194 my_charset_utf8_latvian_uca_ci
+    131 my_charset_ucs2_romanian_uca_ci  ->195 my_charset_utf8_romanian_uca_ci
+    132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci
+    133 my_charset_ucs2_polish_uca_ci    ->197 my_charset_utf8_polish_uca_ci
+    134 my_charset_ucs2_estonian_uca_ci  ->198 my_charset_utf8_estonian_uca_ci
+    135 my_charset_ucs2_spanish_uca_ci   ->199 my_charset_utf8_spanish_uca_ci
+    136 my_charset_ucs2_swedish_uca_ci   ->200 my_charset_utf8_swedish_uca_ci
+    137 my_charset_ucs2_turkish_uca_ci   ->201 my_charset_utf8_turkish_uca_ci
+    138 my_charset_ucs2_czech_uca_ci     ->202 my_charset_utf8_czech_uca_ci
+    139 my_charset_ucs2_danish_uca_ci    ->203 my_charset_utf8_danish_uca_ci
+    140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci
+    141 my_charset_ucs2_slovak_uca_ci    ->205 my_charset_utf8_slovak_uca_ci
+    142 my_charset_ucs2_spanish2_uca_ci  ->206 my_charset_utf8_spanish2_uca_ci
+    143 my_charset_ucs2_roman_uca_ci     ->207 my_charset_utf8_roman_uca_ci
+    144 my_charset_ucs2_persian_uca_ci   ->208 my_charset_utf8_persian_uca_ci
+
+  @return Compatible character set or NULL.
+*/
+
+CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO *original_cs)
+{
+  CHARSET_INFO *compatible_cs= 0;
+  DBUG_ENTER("get_compatible_charset_with_ctype");
+  if (!strcmp(original_cs->csname, "ucs2") &&
+      (compatible_cs= get_charset(original_cs->number + 64, MYF(0))) &&
+      (!compatible_cs->ctype ||
+       strcmp(original_cs->name + 4, compatible_cs->name + 4)))
+    compatible_cs= 0;
+  DBUG_RETURN(compatible_cs);
+}
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -3135,13 +3135,44 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
    my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
    return 1;
  }
-  table=((Item_field *)item)->field->table;
+  /*
+    With prepared statements Item_func_match::fix_fields is called twice.
+    When it is called first time we have original item tree here and add
+    conversion layer for character sets that do not have ctype array a few
+    lines below. When it is called second time, we already have conversion
+    layer in item tree.
+  */
+  table= (item->type() == Item::FIELD_ITEM) ?
+         ((Item_field *)item)->field->table :
+         ((Item_field *)((Item_func_conv *)item)->key_item())->field->table;
  if (!(table->file->table_flags() & HA_CAN_FULLTEXT))
  {
    my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0));
    return 1;
  }
  table->fulltext_searched=1;
+  /* A workaround for ucs2 character set */
+  if (!args[1]->collation.collation->ctype)
+  {
+    CHARSET_INFO *compatible_cs=
+      get_compatible_charset_with_ctype(args[1]->collation.collation);
+    bool rc= 1;
+    if (compatible_cs)
+    {
+      Item_string *conv_item= new Item_string("", 0, compatible_cs,
+                                              DERIVATION_EXPLICIT);
+      item= args[0];
+      args[0]= conv_item;
+      rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count,
+                            MY_COLL_ALLOW_SUPERSET_CONV |
+                            MY_COLL_ALLOW_COERCIBLE_CONV |
+                            MY_COLL_DISALLOW_NONE);
+      args[0]= item;
+    }
+    else
+      my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH");
+    return rc;
+  }
  return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
 }