Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
f685ae07
Commit
f685ae07
authored
Mar 30, 2007
by
bar@mysql.com
Browse files
Options
Browse Files
Download
Plain Diff
Merge abarkov@bk-internal.mysql.com:/home/bk/mysql-5.0-rpl
into mysql.com:/home/bar/mysql-5.0.b22638
parents
75668471
4b3826ba
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
158 additions
and
29 deletions
+158
-29
mysql-test/r/ctype_ucs.result
mysql-test/r/ctype_ucs.result
+18
-0
mysql-test/r/ctype_utf8.result
mysql-test/r/ctype_utf8.result
+12
-0
mysql-test/t/ctype_ucs.test
mysql-test/t/ctype_ucs.test
+14
-0
mysql-test/t/ctype_utf8.test
mysql-test/t/ctype_utf8.test
+8
-0
sql/item_strfunc.cc
sql/item_strfunc.cc
+106
-29
No files found.
mysql-test/r/ctype_ucs.result
View file @
f685ae07
...
...
@@ -839,6 +839,24 @@ lily
river
drop table t1;
deallocate prepare stmt;
set names latin1;
set character_set_connection=ucs2;
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
soundex('') soundex('he') soundex('hello all folks') soundex('#3556 in bugdb')
H000 H4142 I51231
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
hex(soundex('')) hex(soundex('he')) hex(soundex('hello all folks')) hex(soundex('#3556 in bugdb'))
0048003000300030 00480034003100340032 004900350031003200330031
select 'mood' sounds like 'mud';
'mood' sounds like 'mud'
1
select hex(soundex(_ucs2 0x041004110412));
hex(soundex(_ucs2 0x041004110412))
0410003000300030
select hex(soundex(_ucs2 0x00BF00C0));
hex(soundex(_ucs2 0x00BF00C0))
00C0003000300030
set names latin1;
create table t1(a blob, b text charset utf8, c text charset ucs2);
select data_type, character_octet_length, character_maximum_length
from information_schema.columns where table_name='t1';
...
...
mysql-test/r/ctype_utf8.result
View file @
f685ae07
...
...
@@ -854,6 +854,18 @@ select * from t1 where soundex(a) = soundex('test');
id a
1 Test
drop table t1;
select soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB);
soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB)
阅000
select hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB));
hex(soundex(_utf8 0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB))
E99885303030
select soundex(_utf8 0xD091D092D093);
soundex(_utf8 0xD091D092D093)
Б000
select hex(soundex(_utf8 0xD091D092D093));
hex(soundex(_utf8 0xD091D092D093))
D091303030
SET collation_connection='utf8_general_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
...
...
mysql-test/t/ctype_ucs.test
View file @
f685ae07
...
...
@@ -572,6 +572,20 @@ select utext from t1 where utext like '%%';
drop
table
t1
;
deallocate
prepare
stmt
;
#
# Bug#22638 SOUNDEX broken for international characters
#
set
names
latin1
;
set
character_set_connection
=
ucs2
;
select
soundex
(
''
),
soundex
(
'he'
),
soundex
(
'hello all folks'
),
soundex
(
'#3556 in bugdb'
);
select
hex
(
soundex
(
''
)),
hex
(
soundex
(
'he'
)),
hex
(
soundex
(
'hello all folks'
)),
hex
(
soundex
(
'#3556 in bugdb'
));
select
'mood'
sounds
like
'mud'
;
# Cyrillic A, BE, VE
select
hex
(
soundex
(
_ucs2
0x041004110412
));
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
select
hex
(
soundex
(
_ucs2
0x00BF00C0
));
set
names
latin1
;
#
# Bug #14290: character_maximum_length for text fields
#
...
...
mysql-test/t/ctype_utf8.test
View file @
f685ae07
...
...
@@ -702,6 +702,14 @@ select * from t1 where soundex(a) = soundex('TEST');
select
*
from
t1
where
soundex
(
a
)
=
soundex
(
'test'
);
drop
table
t1
;
#
# Bug#22638 SOUNDEX broken for international characters
#
select
soundex
(
_utf8
0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB
);
select
hex
(
soundex
(
_utf8
0xE99885E8A788E99A8FE697B6E69BB4E696B0E79A84E696B0E997BB
));
select
soundex
(
_utf8
0xD091D092D093
);
select
hex
(
soundex
(
_utf8
0xD091D092D093
));
SET
collation_connection
=
'utf8_general_ci'
;
--
source
include
/
ctype_filesort
.
inc
...
...
sql/item_strfunc.cc
View file @
f685ae07
...
...
@@ -1805,7 +1805,8 @@ void Item_func_soundex::fix_length_and_dec()
{
collation
.
set
(
args
[
0
]
->
collation
);
max_length
=
args
[
0
]
->
max_length
;
set_if_bigger
(
max_length
,
4
);
set_if_bigger
(
max_length
,
4
*
collation
.
collation
->
mbminlen
);
tmp_value
.
set_charset
(
collation
.
collation
);
}
...
...
@@ -1815,14 +1816,15 @@ void Item_func_soundex::fix_length_and_dec()
else return 0
*/
static
char
soundex_toupper
(
char
ch
)
static
int
soundex_toupper
(
int
ch
)
{
return
(
ch
>=
'a'
&&
ch
<=
'z'
)
?
ch
-
'a'
+
'A'
:
ch
;
}
static
char
get_scode
(
char
*
ptr
)
static
char
get_scode
(
int
wc
)
{
uchar
ch
=
soundex_toupper
(
*
ptr
);
int
ch
=
soundex_toupper
(
wc
);
if
(
ch
<
'A'
||
ch
>
'Z'
)
{
// Thread extended alfa (country spec)
...
...
@@ -1832,46 +1834,121 @@ static char get_scode(char *ptr)
}
static
bool
my_uni_isalpha
(
int
wc
)
{
/*
Return true for all Basic Latin letters: a..z A..Z.
Return true for all Unicode characters with code higher than U+00C0:
- characters between 'z' and U+00C0 are controls and punctuations.
- "U+00C0 LATIN CAPITAL LETTER A WITH GRAVE" is the first letter after 'z'.
*/
return
(
wc
>=
'a'
&&
wc
<=
'z'
)
||
(
wc
>=
'A'
&&
wc
<=
'Z'
)
||
(
wc
>=
0xC0
);
}
String
*
Item_func_soundex
::
val_str
(
String
*
str
)
{
DBUG_ASSERT
(
fixed
==
1
);
String
*
res
=
args
[
0
]
->
val_str
(
str
);
char
last_ch
,
ch
;
CHARSET_INFO
*
cs
=
collation
.
collation
;
my_wc_t
wc
;
uint
nchars
;
int
rc
;
if
((
null_value
=
args
[
0
]
->
null_value
))
if
((
null_value
=
args
[
0
]
->
null_value
))
return
0
;
/* purecov: inspected */
if
(
tmp_value
.
alloc
(
max
(
res
->
length
(),
4
)))
if
(
tmp_value
.
alloc
(
max
(
res
->
length
(),
4
*
cs
->
mbminlen
)))
return
str
;
/* purecov: inspected */
char
*
to
=
(
char
*
)
tmp_value
.
ptr
();
char
*
from
=
(
char
*
)
res
->
ptr
(),
*
end
=
from
+
res
->
length
();
tmp_value
.
set_charset
(
cs
);
while
(
from
!=
end
&&
!
my_isalpha
(
cs
,
*
from
))
// Skip pre-space
from
++
;
/* purecov: inspected */
if
(
from
==
end
)
return
&
my_empty_string
;
// No alpha characters.
*
to
++
=
soundex_toupper
(
*
from
);
// Copy first letter
last_ch
=
get_scode
(
from
);
// code of the first letter
// for the first 'double-letter check.
// Loop on input letters until
// end of input (null) or output
// letter code count = 3
for
(
from
++
;
from
<
end
;
from
++
)
{
if
(
!
my_isalpha
(
cs
,
*
from
))
char
*
to_end
=
to
+
tmp_value
.
alloced_length
();
char
*
from
=
(
char
*
)
res
->
ptr
(),
*
end
=
from
+
res
->
length
();
for
(
;
;
)
/* Skip pre-space */
{
if
((
rc
=
cs
->
cset
->
mb_wc
(
cs
,
&
wc
,
(
uchar
*
)
from
,
(
uchar
*
)
end
))
<=
0
)
return
&
my_empty_string
;
/* EOL or invalid byte sequence */
if
(
rc
==
1
&&
cs
->
ctype
)
{
/* Single byte letter found */
if
(
my_isalpha
(
cs
,
*
from
))
{
last_ch
=
get_scode
(
*
from
);
// Code of the first letter
*
to
++=
soundex_toupper
(
*
from
++
);
// Copy first letter
break
;
}
from
++
;
}
else
{
from
+=
rc
;
if
(
my_uni_isalpha
(
wc
))
{
/* Multibyte letter found */
wc
=
soundex_toupper
(
wc
);
last_ch
=
get_scode
(
wc
);
// Code of the first letter
if
((
rc
=
cs
->
cset
->
wc_mb
(
cs
,
wc
,
(
uchar
*
)
to
,
(
uchar
*
)
to_end
))
<=
0
)
{
/* Extra safety - should not really happen */
DBUG_ASSERT
(
false
);
return
&
my_empty_string
;
}
to
+=
rc
;
break
;
}
}
}
/*
last_ch is now set to the first 'double-letter' check.
loop on input letters until end of input
*/
for
(
nchars
=
1
;
;
)
{
if
((
rc
=
cs
->
cset
->
mb_wc
(
cs
,
&
wc
,
(
uchar
*
)
from
,
(
uchar
*
)
end
))
<=
0
)
break
;
/* EOL or invalid byte sequence */
if
(
rc
==
1
&&
cs
->
ctype
)
{
if
(
!
my_isalpha
(
cs
,
*
from
++
))
continue
;
ch
=
get_scode
(
from
);
}
else
{
from
+=
rc
;
if
(
!
my_uni_isalpha
(
wc
))
continue
;
}
ch
=
get_scode
(
wc
);
if
((
ch
!=
'0'
)
&&
(
ch
!=
last_ch
))
// if not skipped or double
{
*
to
++
=
ch
;
// letter, copy to output
last_ch
=
ch
;
// save code of last input letter
// letter, copy to output
if
((
rc
=
cs
->
cset
->
wc_mb
(
cs
,
(
my_wc_t
)
ch
,
(
uchar
*
)
to
,
(
uchar
*
)
to_end
))
<=
0
)
{
// Extra safety - should not really happen
DBUG_ASSERT
(
false
);
break
;
}
to
+=
rc
;
nchars
++
;
last_ch
=
ch
;
// save code of last input letter
}
// for next double-letter check
}
for
(
end
=
(
char
*
)
tmp_value
.
ptr
()
+
4
;
to
<
end
;
to
++
)
*
to
=
'0'
;
*
to
=
0
;
// end string
/* Pad up to 4 characters with DIGIT ZERO, if the string is shorter */
if
(
nchars
<
4
)
{
uint
nbytes
=
(
4
-
nchars
)
*
cs
->
mbminlen
;
cs
->
cset
->
fill
(
cs
,
to
,
nbytes
,
'0'
);
to
+=
nbytes
;
}
tmp_value
.
length
((
uint
)
(
to
-
tmp_value
.
ptr
()));
return
&
tmp_value
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment