diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 7e4a03e96ccc2e745c08552bb394928a751934ec..906817955132f3c81224d7b292855471632de17c 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -1,5 +1,35 @@ DROP TABLE IF EXISTS t1; set names utf8; +set collation_connection=utf8_unicode_ci; +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; +'a' = 'a' 'a' = 'a ' 'a ' = 'a' +1 1 1 +select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; +'a\t' = 'a' 'a\t' < 'a' 'a\t' > 'a' +0 1 0 +select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; +'a\t' = 'a ' 'a\t' < 'a ' 'a\t' > 'a ' +0 1 0 +select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; +'a' = 'a\t' 'a' < 'a\t' 'a' > 'a\t' +0 0 1 +select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; +'a ' = 'a\t' 'a ' < 'a\t' 'a ' > 'a\t' +0 0 1 +select 'a a' > 'a', 'a \t' < 'a'; +'a a' > 'a' 'a \t' < 'a' +1 1 +CREATE TABLE t ( +c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +c +a +ab +aba +DROP TABLE t; create table t1 (c1 char(10) character set utf8 collate utf8_bin); insert into t1 values ('A'),('a'); insert into t1 values ('B'),('b'); diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index cbb2bd7ba4bcf4c7a24fdbd2573e34c7e162d857..708a31d637e7ab7e26372fbf16a46872fe3cc0a7 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -7,8 +7,35 @@ DROP TABLE IF EXISTS t1; # # Test Unicode collations. # - set names utf8; + +# +# Check trailing spaces +# +set collation_connection=utf8_unicode_ci; + +select 'a' = 'a', 'a' = 'a ', 'a ' = 'a'; + +select 'a\t' = 'a' , 'a\t' < 'a' , 'a\t' > 'a'; +select 'a\t' = 'a ', 'a\t' < 'a ', 'a\t' > 'a '; + +select 'a' = 'a\t', 'a' < 'a\t', 'a' > 'a\t'; +select 'a ' = 'a\t', 'a ' < 'a\t', 'a ' > 'a\t'; + +select 'a a' > 'a', 'a \t' < 'a'; + +# +# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters +# +CREATE TABLE t ( + c char(20) NOT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +INSERT INTO t VALUES ('a'),('ab'),('aba'); +ALTER TABLE t ADD INDEX (c); +SELECT c FROM t WHERE c LIKE 'a%'; +#should find 3 rows but only found 2 +DROP TABLE t; + create table t1 (c1 char(10) character set utf8 collate utf8_bin); # diff --git a/strings/CHARSET_INFO.txt b/strings/CHARSET_INFO.txt index 883000e7ade27fb5d3e1eb524266b6dd626cc085..f7a10f9588016802c65fecced500a55d619e0366 100644 --- a/strings/CHARSET_INFO.txt +++ b/strings/CHARSET_INFO.txt @@ -74,7 +74,16 @@ Conversion tables ctype - pointer to array[257] of "type of characters" bit mask for each chatacter, e.g. if a character is a digit or a letter or a separator, etc. - to_lower - pointer to arrat[256] used in LCASE() + + Monty 2004-10-21: + If you look at the macros, we use ctype[(char)+1]. + ctype[0] is traditionally in most ctype libraries + reserved for EOF (-1). The idea is that you can use + the result from fgetc() directly with ctype[]. As + we have to be compatible with external ctype[] versions, + it's better to do it the same way as they do... + + to_lower - pointer to array[256] used in LCASE() to_upper - pointer to array[256] used in UCASE() sort_order - pointer to array[256] used for strings comparison @@ -137,7 +146,7 @@ following set of functions: Multibyte routines ------------------ ismbchar() - detects if the given string is a multibyte sequence -mbcharlen() - retuturns length of multibyte sequence starting with +mbcharlen() - returns length of multibyte sequence starting with the given character numchars() - returns number of characters in the given string, e.g. in SQL function CHAR_LENGTH(). diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 446fc821337391b2e125ae0445db38b42992dfb6..4763a7b7e1bb63161bd1317364de6a43aca8067f 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -7052,6 +7052,28 @@ static int my_strnncoll_uca(CHARSET_INFO *cs, NOTES: Works exactly the same with my_strnncoll_uca(), but ignores trailing spaces. + + In the while() comparison these situations are possible: + 1. (s_res>0) and (t_res>0) and (s_res == t_res) + Weights are the same so far, continue comparison + 2. (s_res>0) and (t_res>0) and (s_res!=t_res) + A difference has been found, return. + 3. (s_res>0) and (t_res<0) + We have reached the end of the second string, or found + an illegal multibyte sequence in the second string. + Compare the first string to an infinite array of + space characters until difference is found, or until + the end of the first string. + 4. (s_res<0) and (t_res>0) + We have reached the end of the first string, or found + an illegal multibyte sequence in the first string. + Compare the second string to an infinite array of + space characters until difference is found or until + the end of the second steing. + 5. (s_res<0) and (t_res<0) + Both scanners returned -1. It means we have riched + the end-of-string of illegal-sequence in both strings + at the same time. Return 0, strings are equal. RETURN Difference between two strings, according to the collation: @@ -7070,9 +7092,6 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, int s_res; int t_res; - slen= cs->cset->lengthsp(cs, (char*) s, slen); - tlen= cs->cset->lengthsp(cs, (char*) t, tlen); - scanner_handler->init(&sscanner, cs, s, slen); scanner_handler->init(&tscanner, cs, t, tlen); @@ -7080,6 +7099,37 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, { s_res= scanner_handler->next(&sscanner); t_res= scanner_handler->next(&tscanner); + + if (s_res > 0 && t_res < 0) + { + /* Calculate weight for SPACE character */ + t_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; + + /* compare the first string to spaces */ + do + { + if (s_res != t_res) + return (s_res - t_res); + s_res= scanner_handler->next(&sscanner); + } while (s_res > 0); + return 0; + } + + if (s_res < 0 && t_res > 0) + { + /* Calculate weight for SPACE character */ + s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]]; + + /* compare the second string to spaces */ + do + { + if (s_res != t_res) + return (s_res - t_res); + t_res= scanner_handler->next(&tscanner); + } while (t_res > 0); + return 0; + } + } while ( s_res == t_res && s_res >0); return ( s_res - t_res );