Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
396bbdbe
Commit
396bbdbe
authored
Mar 29, 2005
by
bar@mysql.com
Browse files
Options
Browse Files
Download
Plain Diff
Merge abarkov@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/usr/home/bar/mysql-4.1
parents
28a02c9b
2d1e9862
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
172 additions
and
0 deletions
+172
-0
mysys/charset-def.c
mysys/charset-def.c
+6
-0
strings/ctype-utf8.c
strings/ctype-utf8.c
+166
-0
No files found.
mysys/charset-def.c
View file @
396bbdbe
...
...
@@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
extern
CHARSET_INFO
my_charset_utf8_spanish2_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_roman_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_persian_uca_ci
;
#ifdef HAVE_CYBOZU_COLLATION
extern
CHARSET_INFO
my_charset_utf8_general_cs
;
#endif
#endif
#endif
/* HAVE_UCA_COLLATIONS */
...
...
@@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8
add_compiled_collation
(
&
my_charset_utf8_general_ci
);
add_compiled_collation
(
&
my_charset_utf8_bin
);
#ifdef HAVE_CYBOZU_COLLATION
add_compiled_collation
(
&
my_charset_utf8_general_cs
);
#endif
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation
(
&
my_charset_utf8_general_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_icelandic_uca_ci
);
...
...
strings/ctype-utf8.c
View file @
396bbdbe
...
...
@@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin=
&
my_collation_mb_bin_handler
};
#ifdef HAVE_CYBOZU_COLLATION
/*
* These functions bacically do the same as their original, except
* that they return 0 only when two comparing unicode strings are
* strictly the same in case-sensitive way. See "save_diff" local
* variable to what they actually do.
*/
static
int
my_strnncoll_utf8_cs
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
,
my_bool
t_is_prefix
)
{
int
s_res
,
t_res
;
my_wc_t
s_wc
,
t_wc
;
const
uchar
*
se
=
s
+
slen
;
const
uchar
*
te
=
t
+
tlen
;
int
save_diff
=
0
;
int
diff
;
while
(
s
<
se
&&
t
<
te
)
{
int
plane
;
s_res
=
my_utf8_uni
(
cs
,
&
s_wc
,
s
,
se
);
t_res
=
my_utf8_uni
(
cs
,
&
t_wc
,
t
,
te
);
if
(
s_res
<=
0
||
t_res
<=
0
)
{
/* Incorrect string, compare by char value */
return
((
int
)
s
[
0
]
-
(
int
)
t
[
0
]);
}
if
(
save_diff
==
0
)
{
save_diff
=
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
t_wc
>>
8
)
&
0xFF
;
t_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
t_wc
&
0xFF
].
sort
:
t_wc
;
if
(
s_wc
!=
t_wc
)
{
return
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
s
+=
s_res
;
t
+=
t_res
;
}
diff
=
(
(
se
-
s
)
-
(
te
-
t
)
);
return
t_is_prefix
?
t
-
te
:
((
diff
==
0
)
?
save_diff
:
diff
);
}
static
int
my_strnncollsp_utf8_cs
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
{
int
s_res
,
t_res
;
my_wc_t
s_wc
,
t_wc
;
const
uchar
*
se
=
s
+
slen
;
const
uchar
*
te
=
t
+
tlen
;
int
save_diff
=
0
;
while
(
s
<
se
&&
t
<
te
)
{
int
plane
;
s_res
=
my_utf8_uni
(
cs
,
&
s_wc
,
s
,
se
);
t_res
=
my_utf8_uni
(
cs
,
&
t_wc
,
t
,
te
);
if
(
s_res
<=
0
||
t_res
<=
0
)
{
/* Incorrect string, compare by char value */
return
((
int
)
s
[
0
]
-
(
int
)
t
[
0
]);
}
if
(
save_diff
==
0
)
{
save_diff
=
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
t_wc
>>
8
)
&
0xFF
;
t_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
t_wc
&
0xFF
].
sort
:
t_wc
;
if
(
s_wc
!=
t_wc
)
{
return
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
s
+=
s_res
;
t
+=
t_res
;
}
slen
=
se
-
s
;
tlen
=
te
-
t
;
if
(
slen
!=
tlen
)
{
int
swap
=
0
;
if
(
slen
<
tlen
)
{
slen
=
tlen
;
s
=
t
;
se
=
te
;
swap
=
-
1
;
}
/*
This following loop uses the fact that in UTF-8
all multibyte characters are greater than space,
and all multibyte head characters are greater than
space. It means if we meet a character greater
than space, it always means that the longer string
is greater. So we can reuse the same loop from the
8bit version, without having to process full multibute
sequences.
*/
for
(
;
s
<
se
;
s
++
)
{
if
(
*
s
!=
' '
)
return
((
int
)
*
s
-
(
int
)
' '
)
^
swap
;
}
}
return
save_diff
;
}
static
MY_COLLATION_HANDLER
my_collation_cs_handler
=
{
NULL
,
/* init */
my_strnncoll_utf8_cs
,
my_strnncollsp_utf8_cs
,
my_strnxfrm_utf8
,
my_like_range_simple
,
my_wildcmp_mb
,
my_strcasecmp_utf8
,
my_instr_mb
,
my_hash_sort_utf8
};
CHARSET_INFO
my_charset_utf8_general_cs
=
{
254
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_UNICODE
,
/* state */
"utf8"
,
/* cs name */
"utf8_general_cs"
,
/* name */
""
,
/* comment */
NULL
,
/* tailoring */
ctype_utf8
,
/* ctype */
to_lower_utf8
,
/* to_lower */
to_upper_utf8
,
/* to_upper */
to_upper_utf8
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
1
,
/* strxfrm_multiply */
1
,
/* mbminlen */
3
,
/* mbmaxlen */
0
,
/* min_sort_char */
255
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_cs_handler
};
#endif
/* Cybozu Hack */
#ifdef MY_TEST_UTF8
#include <stdio.h>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment