Commit 44a446dc authored by bar@mysql.com's avatar bar@mysql.com

ctype-gbk.c:

  Bug #11987
  mysql will truncate the text when the text contain GBK char:"0xA3A0" and "0xA1"
  Allow to store and retrieve even unassigned GBK codes.
  Like we did in Big5 earlier.
have_gbk.inc, have_gbk.require, ctype_gbk.result, ctype_gbk.test:
  new file
parent a7e5fa4d
-- require r/have_gbk.require
disable_query_log;
show collation like "gbk_chinese_ci";
enable_query_log;
drop table if exists t1;
SET @test_character_set= 'gbk';
SET @test_collation= 'gbk_chinese_ci';
SET @safe_character_set_server= @@character_set_server;
SET @safe_collation_server= @@collation_server;
SET character_set_server= @test_character_set;
SET collation_server= @test_collation;
CREATE DATABASE d1;
USE d1;
CREATE TABLE t1 (c CHAR(10), KEY(c));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c char(10) gbk_chinese_ci YES MUL NULL
INSERT INTO t1 VALUES ('aaa'),('aaaa'),('aaaaa');
SELECT c as want3results FROM t1 WHERE c LIKE 'aaa%';
want3results
aaa
aaaa
aaaaa
DROP TABLE t1;
CREATE TABLE t1 (c1 varchar(15), KEY c1 (c1(2)));
SHOW FULL COLUMNS FROM t1;
Field Type Collation Null Key Default Extra Privileges Comment
c1 varchar(15) gbk_chinese_ci YES MUL NULL
INSERT INTO t1 VALUES ('location'),('loberge'),('lotre'),('boabab');
SELECT c1 as want3results from t1 where c1 like 'l%';
want3results
location
loberge
lotre
SELECT c1 as want3results from t1 where c1 like 'lo%';
want3results
location
loberge
lotre
SELECT c1 as want1result from t1 where c1 like 'loc%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'loca%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locat%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locati%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'locatio%';
want1result
location
SELECT c1 as want1result from t1 where c1 like 'location%';
want1result
location
DROP TABLE t1;
DROP DATABASE d1;
USE test;
SET character_set_server= @safe_character_set_server;
SET collation_server= @safe_collation_server;
SET NAMES gbk;
SET collation_connection='gbk_chinese_ci';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
gbk_chinese_ci 6109
gbk_chinese_ci 61
gbk_chinese_ci 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gbk_chinese_ci
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
SET collation_connection='gbk_bin';
create table t1 select repeat('a',4000) a;
delete from t1;
insert into t1 values ('a'), ('a '), ('a\t');
select collation(a),hex(a) from t1 order by a;
collation(a) hex(a)
gbk_bin 6109
gbk_bin 61
gbk_bin 6120
drop table t1;
create table t1 engine=innodb select repeat('a',50) as c1;
alter table t1 add index(c1(5));
insert into t1 values ('abcdefg'),('abcde100'),('abcde110'),('abcde111');
select collation(c1) from t1 limit 1;
collation(c1)
gbk_bin
select c1 from t1 where c1 like 'abcdef%' order by c1;
c1
abcdefg
select c1 from t1 where c1 like 'abcde1%' order by c1;
c1
abcde100
abcde110
abcde111
select c1 from t1 where c1 like 'abcde11%' order by c1;
c1
abcde110
abcde111
select c1 from t1 where c1 like 'abcde111%' order by c1;
c1
abcde111
drop table t1;
SET NAMES gbk;
CREATE TABLE t1 (a text) character set gbk;
INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
SELECT hex(a) FROM t1 ORDER BY a;
hex(a)
A1A1
A3A0
DROP TABLE t1;
Collation Charset Id Default Compiled Sortlen
gbk_chinese_ci gbk 28 Yes Yes 1
-- source include/have_gbk.inc
#
# Tests with the gbk character set
#
--disable_warnings
drop table if exists t1;
--enable_warnings
SET @test_character_set= 'gbk';
SET @test_collation= 'gbk_chinese_ci';
-- source include/ctype_common.inc
SET NAMES gbk;
SET collation_connection='gbk_chinese_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
SET collation_connection='gbk_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_innodb_like.inc
#
# Bug#11987 mysql will truncate the text when
# the text contain GBK char:"0xA3A0" and "0xA1"
#
SET NAMES gbk;
CREATE TABLE t1 (a text) character set gbk;
INSERT INTO t1 VALUES (0xA3A0),(0xA1A1);
SELECT hex(a) FROM t1 ORDER BY a;
DROP TABLE t1;
...@@ -9925,6 +9925,43 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)), ...@@ -9925,6 +9925,43 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
} }
/*
Returns well formed length of a GBK string.
*/
static
uint my_well_formed_len_gbk(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
uint pos, int *error)
{
const char *b0= b;
const char *emb= e - 1; /* Last possible end of an MB character */
*error= 0;
while (pos-- && b < e)
{
if ((uchar) b[0] < 128)
{
/* Single byte ascii character */
b++;
}
else if ((b < emb) && isgbkcode((uchar)*b, (uchar)b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
*error= 1;
break;
}
}
return b - b0;
}
static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_COLLATION_HANDLER my_collation_ci_handler =
{ {
NULL, /* init */ NULL, /* init */
...@@ -9945,7 +9982,7 @@ static MY_CHARSET_HANDLER my_charset_handler= ...@@ -9945,7 +9982,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_gbk, mbcharlen_gbk,
my_numchars_mb, my_numchars_mb,
my_charpos_mb, my_charpos_mb,
my_well_formed_len_mb, my_well_formed_len_gbk,
my_lengthsp_8bit, my_lengthsp_8bit,
my_numcells_8bit, my_numcells_8bit,
my_mb_wc_gbk, my_mb_wc_gbk,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment