Commit 64b19133 authored by unknown's avatar unknown

Bug#31081 server crash in regexp function

Problem: The "regex" library written by Henry Spencer
does not support tricky character sets like UCS2.
Fix: convert tricky character sets to UTF8 before calling
regex functions.


mysql-test/r/ctype_uca.result:
  Adding tests
mysql-test/r/ctype_ucs.result:
  Adding tests
mysql-test/r/ctype_utf8.result:
  Adding tests
mysql-test/r/func_regexp.result:
  Adding tests
mysql-test/t/ctype_uca.test:
  Adding tests
mysql-test/t/ctype_ucs.test:
  Adding tests
mysql-test/t/ctype_utf8.test:
  Adding tests
mysql-test/t/func_regexp.test:
  Adding tests
sql/item_cmpfunc.cc:
  - Adding new method Item_func_regex::regcomp()
  to share more code between fix_fields() and val_int()
  - Adding conversion from ASCII-incompatible charsets like UCS2
  to UTF8, because the "regexp" does not support these charsets
  - Additional optimization: calculate flags for regcomp only
    once in fix_fields, instead of every regcomp()
sql/item_cmpfunc.h:
  Adding prototypes for new members and methods
mysql-test/include/ctype_regex.inc:
  New BitKeeper file ``mysql-test/include/ctype_regex.inc''
  
  Moving common regular expression tests into a separate
  file and uncluding it into func_regexp and into many ctype_xxx tests.
parent 4e331363
#
# To test a desired collation, set session.collation_connection to
# this collation before including this file
#
--disable_warnings
drop table if exists t1;
--enable_warnings
#
# Create a table with two varchar(64) null-able column,
# using current values of
# @@character_set_connection and @@collation_connection.
#
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
drop table t1;
......@@ -2754,4 +2754,49 @@ a
c
ch
drop table t1;
set collation_connection=ucs2_unicode_ci;
drop table if exists t1;
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL,
`s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
s1 regexp s2
1
1
1
1
1
1
1
0
0
0
NULL
NULL
NULL
NULL
drop table t1;
set names utf8;
End for 5.0 tests
......@@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_gen
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
drop table t1;
set collation_connection=ucs2_general_ci;
drop table if exists t1;
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(64) character set ucs2 default NULL,
`s2` varchar(64) character set ucs2 default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
s1 regexp s2
1
1
1
1
1
1
1
0
0
0
NULL
NULL
NULL
NULL
drop table t1;
set names latin1;
End of 5.0 tests
......@@ -267,6 +267,51 @@ b
select * from t1 where a = 'b' and a != 'b';
a
drop table t1;
set collation_connection=utf8_general_ci;
drop table if exists t1;
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(64) character set utf8 default NULL,
`s2` varchar(64) character set utf8 default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
s1 regexp s2
1
1
1
1
1
1
1
0
0
0
NULL
NULL
NULL
NULL
drop table t1;
set names utf8;
set names utf8;
select 'вася' rlike '[[:<:]]вася[[:>:]]';
'вася' rlike '[[:<:]]вася[[:>:]]'
......
drop table if exists t1;
create table t1 (s1 char(64),s2 char(64));
set names latin1;
drop table if exists t1;
create table t1 as
select repeat(' ', 64) as s1, repeat(' ',64) as s2
union
select null, null;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`s1` varchar(64) default NULL,
`s2` varchar(64) default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
delete from t1;
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
......
......@@ -538,4 +538,8 @@ alter table t1 convert to character set ucs2 collate ucs2_czech_ci;
select * from t1 where a like 'c%';
drop table t1;
set collation_connection=ucs2_unicode_ci;
--source include/ctype_regex.inc
set names utf8;
-- echo End for 5.0 tests
......@@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
drop table t1;
set collation_connection=ucs2_general_ci;
--source include/ctype_regex.inc
set names latin1;
--echo End of 5.0 tests
......@@ -185,6 +185,13 @@ select * from t1 where a = 'b' and a = 'b';
select * from t1 where a = 'b' and a != 'b';
drop table t1;
#
# Testing regexp
#
set collation_connection=utf8_general_ci;
--source include/ctype_regex.inc
set names utf8;
#
# Bug #3928 regexp [[:>:]] and UTF-8
#
......
......@@ -6,28 +6,9 @@
drop table if exists t1;
--enable_warnings
create table t1 (s1 char(64),s2 char(64));
set names latin1;
--source include/ctype_regex.inc
insert into t1 values('aaa','aaa');
insert into t1 values('aaa|qqq','qqq');
insert into t1 values('gheis','^[^a-dXYZ]+$');
insert into t1 values('aab','^aa?b');
insert into t1 values('Baaan','^Ba*n');
insert into t1 values('aaa','qqq|aaa');
insert into t1 values('qqq','qqq|aaa');
insert into t1 values('bbb','qqq|aaa');
insert into t1 values('bbb','qqq');
insert into t1 values('aaa','aba');
insert into t1 values(null,'abc');
insert into t1 values('def',null);
insert into t1 values(null,null);
insert into t1 values('ghi','ghi[');
select HIGH_PRIORITY s1 regexp s2 from t1;
drop table t1;
#
# This test a bug in regexp on Alpha
......
......@@ -4225,6 +4225,51 @@ void Item_func_like::cleanup()
#ifdef USE_REGEX
bool
Item_func_regex::regcomp(bool send_error)
{
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),&my_charset_bin);
String *res= args[1]->val_str(&tmp);
int error;
if (args[1]->null_value)
return TRUE;
if (regex_compiled)
{
if (!stringcmp(res, &prev_regexp))
return FALSE;
prev_regexp.copy(*res);
my_regfree(&preg);
regex_compiled= 0;
}
if (cmp_collation.collation != regex_lib_charset)
{
/* Convert UCS2 strings to UTF8 */
uint dummy_errors;
if (conv.copy(res->ptr(), res->length(), res->charset(),
regex_lib_charset, &dummy_errors))
return TRUE;
res= &conv;
}
if ((error= my_regcomp(&preg, res->c_ptr(),
regex_lib_flags, regex_lib_charset)))
{
if (send_error)
{
(void) my_regerror(error, &preg, buff, sizeof(buff));
my_error(ER_REGEXP_ERROR, MYF(0), buff);
}
return TRUE;
}
regex_compiled= 1;
return FALSE;
}
bool
Item_func_regex::fix_fields(THD *thd, Item **ref)
{
......@@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref)
if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1))
return TRUE;
regex_lib_flags= (cmp_collation.collation->state &
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE;
/*
If the case of UCS2 and other non-ASCII character sets,
we will convert patterns and strings to UTF8.
*/
regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ?
&my_charset_utf8_general_ci :
cmp_collation.collation;
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
not_null_tables_cache= (args[0]->not_null_tables() |
args[1]->not_null_tables());
const_item_cache=args[0]->const_item() && args[1]->const_item();
if (!regex_compiled && args[1]->const_item())
{
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),&my_charset_bin);
String *res=args[1]->val_str(&tmp);
if (args[1]->null_value)
{ // Will always return NULL
maybe_null=1;
return FALSE;
}
int error;
if ((error= my_regcomp(&preg,res->c_ptr(),
((cmp_collation.collation->state &
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE),
cmp_collation.collation)))
{
(void) my_regerror(error,&preg,buff,sizeof(buff));
my_error(ER_REGEXP_ERROR, MYF(0), buff);
if (regcomp(TRUE))
return TRUE;
}
regex_compiled=regex_is_const=1;
maybe_null=args[0]->maybe_null;
regex_is_const= 1;
maybe_null= args[0]->maybe_null;
}
else
maybe_null=1;
......@@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int()
{
DBUG_ASSERT(fixed == 1);
char buff[MAX_FIELD_WIDTH];
String *res, tmp(buff,sizeof(buff),&my_charset_bin);
String tmp(buff,sizeof(buff),&my_charset_bin);
String *res= args[0]->val_str(&tmp);
res=args[0]->val_str(&tmp);
if (args[0]->null_value)
{
null_value=1;
if ((null_value= (args[0]->null_value ||
(!regex_is_const && regcomp(FALSE)))))
return 0;
}
if (!regex_is_const)
{
char buff2[MAX_FIELD_WIDTH];
String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin);
res2= args[1]->val_str(&tmp2);
if (args[1]->null_value)
if (cmp_collation.collation != regex_lib_charset)
{
/* Convert UCS2 strings to UTF8 */
uint dummy_errors;
if (conv.copy(res->ptr(), res->length(), res->charset(),
regex_lib_charset, &dummy_errors))
{
null_value=1;
null_value= 1;
return 0;
}
if (!regex_compiled || stringcmp(res2,&prev_regexp))
{
prev_regexp.copy(*res2);
if (regex_compiled)
{
my_regfree(&preg);
regex_compiled=0;
}
if (my_regcomp(&preg,res2->c_ptr_safe(),
((cmp_collation.collation->state &
(MY_CS_BINSORT | MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE),
cmp_collation.collation))
{
null_value=1;
return 0;
}
regex_compiled=1;
}
res= &conv;
}
null_value=0;
return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1;
}
......
......@@ -1313,6 +1313,10 @@ class Item_func_regex :public Item_bool_func
bool regex_is_const;
String prev_regexp;
DTCollation cmp_collation;
CHARSET_INFO *regex_lib_charset;
int regex_lib_flags;
String conv;
bool regcomp(bool send_error);
public:
Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b),
regex_compiled(0),regex_is_const(0) {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment