Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8

Problem: Too confusing error message when cannot convert
between string and column character sets on INSERT and UPDATE.
Fix: producing a better error message, instead of "Data too long"
in such cases
Additional changes: Adding "DROP TABLE IF EXISTS" into several
tests to be safe against failures in previous tests. 
parent bbf84d6b
...@@ -2,3 +2,4 @@ ...@@ -2,3 +2,4 @@
44ec850ac2k4y2Omgr92GiWPBAVKGQ 44ec850ac2k4y2Omgr92GiWPBAVKGQ
44edb86b1iE5knJ97MbliK_3lCiAXA 44edb86b1iE5knJ97MbliK_3lCiAXA
44f33f3aj5KW5qweQeekY1LU0E9ZCg 44f33f3aj5KW5qweQeekY1LU0E9ZCg
452c6c6dAjuNghfc1ObZ_UQ5SCl85g
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
# Test for strict-mode autoincrement # Test for strict-mode autoincrement
# #
--disable_warnings
drop table if exists t1;
--enable_warnings
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
eval create table t1 eval create table t1
( (
......
...@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r); ...@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test'); insert into t1 values ('test','test');
insert into t1 values ('',''); insert into t1 values ('','');
Warnings: Warnings:
Warning 1265 Data truncated for column 'a' at row 1 Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1 Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'b' at row 1
drop table t1; drop table t1;
set names koi8r; set names koi8r;
create table t1 (a char(10) character set cp1251); create table t1 (a char(10) character set cp1251);
......
...@@ -197,7 +197,7 @@ drop table t1; ...@@ -197,7 +197,7 @@ drop table t1;
create table t1 (s1 char(10) character set utf8); create table t1 (s1 char(10) character set utf8);
insert into t1 values (0x41FF); insert into t1 values (0x41FF);
Warnings: Warnings:
Warning 1265 Data truncated for column 's1' at row 1 Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1; select hex(s1) from t1;
hex(s1) hex(s1)
41 41
...@@ -205,7 +205,7 @@ drop table t1; ...@@ -205,7 +205,7 @@ drop table t1;
create table t1 (s1 varchar(10) character set utf8); create table t1 (s1 varchar(10) character set utf8);
insert into t1 values (0x41FF); insert into t1 values (0x41FF);
Warnings: Warnings:
Warning 1265 Data truncated for column 's1' at row 1 Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1; select hex(s1) from t1;
hex(s1) hex(s1)
41 41
...@@ -213,7 +213,7 @@ drop table t1; ...@@ -213,7 +213,7 @@ drop table t1;
create table t1 (s1 text character set utf8); create table t1 (s1 text character set utf8);
insert into t1 values (0x41FF); insert into t1 values (0x41FF);
Warnings: Warnings:
Warning 1265 Data truncated for column 's1' at row 1 Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1; select hex(s1) from t1;
hex(s1) hex(s1)
41 41
......
...@@ -372,10 +372,10 @@ t collation(t) FORMAT(MATCH t AGAINST ('Osnabruck'),6) ...@@ -372,10 +372,10 @@ t collation(t) FORMAT(MATCH t AGAINST ('Osnabruck'),6)
aus Osnabrck utf8_general_ci 1.591140 aus Osnabrck utf8_general_ci 1.591140
alter table t1 modify t varchar(200) collate latin1_german2_ci not null; alter table t1 modify t varchar(200) collate latin1_german2_ci not null;
Warnings: Warnings:
Warning 1265 Data truncated for column 't' at row 3 Warning 1366 Incorrect string value: '\xD0\xAD\xD1\x82\xD0\xBE...' for column 't' at row 3
Warning 1265 Data truncated for column 't' at row 4 Warning 1366 Incorrect string value: '\xD0\x9E\xD1\x82\xD0\xBB...' for column 't' at row 4
Warning 1265 Data truncated for column 't' at row 5 Warning 1366 Incorrect string value: '\xD0\x9D\xD0\xB5 \xD0...' for column 't' at row 5
Warning 1265 Data truncated for column 't' at row 6 Warning 1366 Incorrect string value: '\xD0\xB8 \xD0\xB1\xD1...' for column 't' at row 6
SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck'); SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t collation(t) t collation(t)
aus Osnabrck latin1_german2_ci aus Osnabrck latin1_german2_ci
......
drop table if exists t1;
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
create table t1 create table t1
( (
......
drop table if exists t1;
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
create table t1 create table t1
( (
......
drop table if exists t1;
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
create table t1 create table t1
( (
......
drop table if exists t1;
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
create table t1 create table t1
( (
......
drop table if exists t1;
set @org_mode=@@sql_mode; set @org_mode=@@sql_mode;
create table t1 create table t1
( (
......
This diff is collapsed.
...@@ -854,6 +854,162 @@ outp: ...@@ -854,6 +854,162 @@ outp:
} }
/*
copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)
SYNOPSIS
well_formed_copy_nhars()
to Store result here
to_length Maxinum length of "to" string
to_cs Character set of "to" string
from Copy from here
from_length Length of from string
from_cs From character set
nchars Copy not more that nchars characters
well_formed_error_pos Return position when "from" is not well formed
or NULL otherwise.
cannot_convert_error_pos Return position where a not convertable
character met, or NULL otherwise.
from_end_pos Return position where scanning of "from"
string stopped.
NOTES
RETURN
length of bytes copied to 'to'
*/
uint32
well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos)
{
uint res;
if ((to_cs == &my_charset_bin) ||
(from_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs))
{
if (to_length < to_cs->mbminlen || !nchars)
{
*from_end_pos= from;
*cannot_convert_error_pos= NULL;
*well_formed_error_pos= NULL;
return 0;
}
if (to_cs == &my_charset_bin)
{
res= min(min(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
}
else
{
int well_formed_error;
uint from_offset;
if ((from_offset= (from_length % to_cs->mbminlen)) &&
(from_cs == &my_charset_bin))
{
/*
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
INSERT INTO t1 (ucs2_column) VALUES (0x01);
0x01 -> 0x0001
*/
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
nchars--;
from+= from_offset;
from_length-= from_offset;
to+= to_cs->mbminlen;
to_length-= to_cs->mbminlen;
}
set_if_smaller(from_length, to_length);
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
nchars, &well_formed_error);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= well_formed_error ? from + res : NULL;
*cannot_convert_error_pos= NULL;
if (from_offset)
res+= to_cs->mbminlen;
}
}
else
{
int cnvres;
my_wc_t wc;
int (*mb_wc)(struct charset_info_st *, my_wc_t *,
const uchar *, const uchar *)= from_cs->cset->mb_wc;
int (*wc_mb)(struct charset_info_st *, my_wc_t,
uchar *s, uchar *e)= to_cs->cset->wc_mb;
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
const char *from_prev= from;
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
else if (cnvres > MY_CS_TOOSMALL)
{
/*
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
else
break; // Not enough characters
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
else
break;
}
*from_end_pos= from;
res= to - to_start;
}
return (uint32) res;
}
void String::print(String *str) void String::print(String *str)
{ {
char *st= (char*)Ptr, *end= st+str_length; char *st= (char*)Ptr, *end= st+str_length;
......
...@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); ...@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length, const char *from, uint32 from_length,
CHARSET_INFO *from_cs, uint *errors); CHARSET_INFO *from_cs, uint *errors);
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
class String class String
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment