Commit 660d6626 authored by unknown's avatar unknown

BUG#31159 - fulltext search on ucs2 column crashes server

ucs2 doesn't provide required by fulltext ctype array. Crash
happens because fulltext attempts to use unitialized ctype
array.

Fixed by converting ucs2 fields to compatible utf8 analogue.


include/my_sys.h:
  Added a function to find compatible character set with ctype array
  available. Currently used by fulltext search to find compatible
  substitute for ucs2 collations.
mysql-test/r/ctype_ucs.result:
  A test case for BUG#31159.
mysql-test/t/ctype_ucs.test:
  A test case for BUG#31159.
mysys/charset.c:
  Added a function to find compatible character set with ctype array
  available. Currently used by fulltext search to find compatible
  substitute for ucs2 collations.
sql/item_func.cc:
  Convert ucs2 fields to utf8. Fulltext requires ctype array, but
  ucs2 doesn't provide it.
parent d244374f
......@@ -784,6 +784,8 @@ extern CHARSET_INFO *get_charset(uint cs_number, myf flags);
extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags);
extern CHARSET_INFO *get_charset_by_csname(const char *cs_name,
uint cs_flags, myf my_flags);
extern CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO
*original_cs);
extern void free_charsets(void);
extern char *get_charsets_dir(char *buf);
extern my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2);
......
......@@ -803,4 +803,10 @@ quote(name)
????????
????????????????
drop table bug20536;
CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
INSERT INTO t1 VALUES('abcd');
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
a
abcd
DROP TABLE t1;
End of 4.1 tests
......@@ -535,4 +535,12 @@ select quote(name) from bug20536;
drop table bug20536;
#
# BUG#31159 - fulltext search on ucs2 column crashes server
#
CREATE TABLE t1(a TEXT CHARSET ucs2 COLLATE ucs2_unicode_ci);
INSERT INTO t1 VALUES('abcd');
SELECT * FROM t1 WHERE MATCH(a) AGAINST ('+abcd' IN BOOLEAN MODE);
DROP TABLE t1;
--echo End of 4.1 tests
......@@ -673,3 +673,43 @@ CHARSET_INFO *fs_character_set()
return fs_cset_cache;
}
#endif
/**
@brief Find compatible character set with ctype.
@param[in] original_cs Original character set
@note
128 my_charset_ucs2_general_uca ->192 my_charset_utf8_general_uca_ci
129 my_charset_ucs2_icelandic_uca_ci ->193 my_charset_utf8_icelandic_uca_ci
130 my_charset_ucs2_latvian_uca_ci ->194 my_charset_utf8_latvian_uca_ci
131 my_charset_ucs2_romanian_uca_ci ->195 my_charset_utf8_romanian_uca_ci
132 my_charset_ucs2_slovenian_uca_ci ->196 my_charset_utf8_slovenian_uca_ci
133 my_charset_ucs2_polish_uca_ci ->197 my_charset_utf8_polish_uca_ci
134 my_charset_ucs2_estonian_uca_ci ->198 my_charset_utf8_estonian_uca_ci
135 my_charset_ucs2_spanish_uca_ci ->199 my_charset_utf8_spanish_uca_ci
136 my_charset_ucs2_swedish_uca_ci ->200 my_charset_utf8_swedish_uca_ci
137 my_charset_ucs2_turkish_uca_ci ->201 my_charset_utf8_turkish_uca_ci
138 my_charset_ucs2_czech_uca_ci ->202 my_charset_utf8_czech_uca_ci
139 my_charset_ucs2_danish_uca_ci ->203 my_charset_utf8_danish_uca_ci
140 my_charset_ucs2_lithuanian_uca_ci->204 my_charset_utf8_lithuanian_uca_ci
141 my_charset_ucs2_slovak_uca_ci ->205 my_charset_utf8_slovak_uca_ci
142 my_charset_ucs2_spanish2_uca_ci ->206 my_charset_utf8_spanish2_uca_ci
143 my_charset_ucs2_roman_uca_ci ->207 my_charset_utf8_roman_uca_ci
144 my_charset_ucs2_persian_uca_ci ->208 my_charset_utf8_persian_uca_ci
@return Compatible character set or NULL.
*/
CHARSET_INFO *get_compatible_charset_with_ctype(CHARSET_INFO *original_cs)
{
CHARSET_INFO *compatible_cs= 0;
DBUG_ENTER("get_compatible_charset_with_ctype");
if (!strcmp(original_cs->csname, "ucs2") &&
(compatible_cs= get_charset(original_cs->number + 64, MYF(0))) &&
(!compatible_cs->ctype ||
strcmp(original_cs->name + 4, compatible_cs->name + 4)))
compatible_cs= 0;
DBUG_RETURN(compatible_cs);
}
......@@ -3135,13 +3135,44 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1;
}
table=((Item_field *)item)->field->table;
/*
With prepared statements Item_func_match::fix_fields is called twice.
When it is called first time we have original item tree here and add
conversion layer for character sets that do not have ctype array a few
lines below. When it is called second time, we already have conversion
layer in item tree.
*/
table= (item->type() == Item::FIELD_ITEM) ?
((Item_field *)item)->field->table :
((Item_field *)((Item_func_conv *)item)->key_item())->field->table;
if (!(table->file->table_flags() & HA_CAN_FULLTEXT))
{
my_error(ER_TABLE_CANT_HANDLE_FT, MYF(0));
return 1;
}
table->fulltext_searched=1;
/* A workaround for ucs2 character set */
if (!args[1]->collation.collation->ctype)
{
CHARSET_INFO *compatible_cs=
get_compatible_charset_with_ctype(args[1]->collation.collation);
bool rc= 1;
if (compatible_cs)
{
Item_string *conv_item= new Item_string("", 0, compatible_cs,
DERIVATION_EXPLICIT);
item= args[0];
args[0]= conv_item;
rc= agg_item_charsets(cmp_collation, func_name(), args, arg_count,
MY_COLL_ALLOW_SUPERSET_CONV |
MY_COLL_ALLOW_COERCIBLE_CONV |
MY_COLL_DISALLOW_NONE);
args[0]= item;
}
else
my_error(ER_WRONG_ARGUMENTS, MYF(0), "MATCH");
return rc;
}
return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment