Commit 00c1c19e authored by unknown's avatar unknown

Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8

Problem: Too confusing error message when cannot convert
between string and column character sets on INSERT and UPDATE.
Fix: producing a better error message, instead of "Data too long"
in such cases
Additional changes: Adding "DROP TABLE IF EXISTS" into several
tests to be safe against failures in previous tests. 


mysql-test/include/strict_autoinc.inc:
  Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/ctype_recoding.result:
  Fixing test results
mysql-test/r/ctype_utf8.result:
  Fixing test results
mysql-test/r/fulltext.result:
  Fixing test results
mysql-test/r/strict_autoinc_1myisam.result:
  Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_2innodb.result:
  Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_3heap.result:
  Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_4bdb.result:
  Adding DROP TABLE to be safe against previous tests failure.
mysql-test/r/strict_autoinc_5ndb.result:
  Adding DROP TABLE to be safe against previous tests failure.
sql/field.cc:
  - producing better error messages than
    "DATA TRUNCATED" or "DATA TOO LONG" (in strict mode)
    in case of "not well formed source" and
    "cannot convert to field character set"
  - Performance improvements: copying directly to
    the target, instead of using an intermediate
    String.
  - Moving duplicate code into report_data_too_long() function.
sql/sql_string.cc:
  Adding a new function to convert strings between character sets,
  but not more than "nchar" characters - a helper function for
  Field_string::store(), Field_varstring::store() and Field_blob::store().
sql/sql_string.h:
  Adding new function prototype.
parent 2bfeecca
......@@ -2,3 +2,4 @@
44ec850ac2k4y2Omgr92GiWPBAVKGQ
44edb86b1iE5knJ97MbliK_3lCiAXA
44f33f3aj5KW5qweQeekY1LU0E9ZCg
452c6c6dAjuNghfc1ObZ_UQ5SCl85g
......@@ -2,6 +2,10 @@
# Test for strict-mode autoincrement
#
--disable_warnings
drop table if exists t1;
--enable_warnings
set @org_mode=@@sql_mode;
eval create table t1
(
......
......@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test');
insert into t1 values ('','');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'b' at row 1
drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
......
......@@ -197,7 +197,7 @@ drop table t1;
create table t1 (s1 char(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -205,7 +205,7 @@ drop table t1;
create table t1 (s1 varchar(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -213,7 +213,7 @@ drop table t1;
create table t1 (s1 text character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......
......@@ -372,10 +372,10 @@ t collation(t) FORMAT(MATCH t AGAINST ('Osnabruck'),6)
aus Osnabrck utf8_general_ci 1.591140
alter table t1 modify t varchar(200) collate latin1_german2_ci not null;
Warnings:
Warning 1265 Data truncated for column 't' at row 3
Warning 1265 Data truncated for column 't' at row 4
Warning 1265 Data truncated for column 't' at row 5
Warning 1265 Data truncated for column 't' at row 6
Warning 1366 Incorrect string value: '\xD0\xAD\xD1\x82\xD0\xBE...' for column 't' at row 3
Warning 1366 Incorrect string value: '\xD0\x9E\xD1\x82\xD0\xBB...' for column 't' at row 4
Warning 1366 Incorrect string value: '\xD0\x9D\xD0\xB5 \xD0...' for column 't' at row 5
Warning 1366 Incorrect string value: '\xD0\xB8 \xD0\xB1\xD1...' for column 't' at row 6
SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t collation(t)
aus Osnabrck latin1_german2_ci
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
......@@ -5799,37 +5799,148 @@ void Field_datetime::sql_type(String &res) const
** A string may be varchar or binary
****************************************************************************/
/*
Report "not well formed" or "cannot convert" error
after storing a character string info a field.
SYNOPSIS
check_string_copy_error()
field - Field
well_formed_error_pos - where not well formed data was first met
cannot_convert_error_pos - where a not-convertable character was first met
end - end of the string
NOTES
As of version 5.0 both cases return the same error:
"Invalid string value: 'xxx' for column 't' at row 1"
Future versions will possibly introduce a new error message:
"Cannot convert character string: 'xxx' for column 't' at row 1"
RETURN
FALSE - If errors didn't happen
TRUE - If an error happened
*/
static bool
check_string_copy_error(Field_str *field,
const char *well_formed_error_pos,
const char *cannot_convert_error_pos,
const char *end)
{
const char *pos, *end_orig;
char tmp[64], *t;
if (!(pos= well_formed_error_pos) &&
!(pos= cannot_convert_error_pos))
return FALSE;
end_orig= end;
set_if_smaller(end, pos + 6);
for (t= tmp; pos < end; pos++)
{
if (((unsigned char) *pos) >= 0x20 &&
((unsigned char) *pos) <= 0x7F)
{
*t++= *pos;
}
else
{
*t++= '\\';
*t++= 'x';
*t++= _dig_vec_upper[((unsigned char) *pos) >> 4];
*t++= _dig_vec_upper[((unsigned char) *pos) & 15];
}
}
if (end_orig > end)
{
*t++= '.';
*t++= '.';
*t++= '.';
}
*t= '\0';
push_warning_printf(field->table->in_use,
field->table->in_use->abort_on_warning ?
MYSQL_ERROR::WARN_LEVEL_ERROR :
MYSQL_ERROR::WARN_LEVEL_WARN,
ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
"string", tmp, field->field_name,
(ulong) field->table->in_use->row_count);
return TRUE;
}
/*
Send a truncation warning or a truncation error
after storing a too long character string info a field.
SYNOPSIS
report_data_too_long()
field - Field
RETURN
N/A
*/
inline void
report_data_too_long(Field_str *field)
{
if (field->table->in_use->abort_on_warning)
field->set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
}
/*
Test if the given string contains important data:
not spaces for character string,
or any data for binary string.
SYNOPSIS
test_if_important_data()
cs Character set
str String to test
strend String end
RETURN
FALSE - If string does not have important data
TRUE - If string has some important data
*/
static bool
test_if_important_data(CHARSET_INFO *cs, const char *str, const char *strend)
{
if (cs != &my_charset_bin)
str+= cs->cset->scan(cs, str, strend, MY_SEQ_SPACES);
return (str < strend);
}
/* Copy a string and fill with space */
int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
{
int error= 0, well_formed_error;
uint32 not_used;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
/* See the comment for Field_long::store(long long) */
DBUG_ASSERT(table->in_use == current_thd);
/* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used))
{
uint conv_errors;
tmpstr.copy(from, length, cs, field_charset, &conv_errors);
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error= 2;
}
/* Make sure we don't break a multibyte sequence or copy malformed data. */
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from+length,
field_length/
field_charset->mbmaxlen,
&well_formed_error);
memmove(ptr, from, copy_length);
copy_length= well_formed_copy_nchars(field_charset,
ptr, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
/* Append spaces if the string was shorter than the field. */
if (copy_length < field_length)
......@@ -5837,32 +5948,23 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
field_length-copy_length,
field_charset->pad_char);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
/*
Check if we lost any important data (anything in a binary string,
or any non-space in others).
*/
if ((copy_length < length) && table->in_use->count_cuted_fields)
if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
if (binary())
error= 2;
else
if (test_if_important_data(field_charset, from_end_pos, from + length))
{
const char *end=from+length;
from+= copy_length;
from+= field_charset->cset->scan(field_charset, from, end,
MY_SEQ_SPACES);
if (from != end)
error= 2;
report_data_too_long(this);
return 2;
}
}
if (error)
{
if (table->in_use->abort_on_warning)
set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
}
return error;
return 0;
}
......@@ -6195,58 +6297,35 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table,
int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
{
uint32 not_used, copy_length;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
int error_code= 0, well_formed_error;
enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN;
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
copy_length= well_formed_copy_nchars(field_charset,
ptr + length_bytes, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
/* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used))
{
uint conv_errors;
tmpstr.copy(from, length, cs, field_charset, &conv_errors);
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error_code= WARN_DATA_TRUNCATED;
}
/*
Make sure we don't break a multibyte sequence
as well as don't copy a malformed data.
*/
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from+length,
field_length/
field_charset->mbmaxlen,
&well_formed_error);
memmove(ptr + length_bytes, from, copy_length);
if (length_bytes == 1)
*ptr= (uchar) copy_length;
else
int2store(ptr, copy_length);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
// Check if we lost something other than just trailing spaces
if ((copy_length < length) && table->in_use->count_cuted_fields &&
!error_code)
{
if (!binary())
{
const char *end= from + length;
from+= copy_length;
from+= field_charset->cset->scan(field_charset, from, end, MY_SEQ_SPACES);
/* If we lost only spaces then produce a NOTE, not a WARNING */
if (from == end)
level= MYSQL_ERROR::WARN_LEVEL_NOTE;
}
error_code= WARN_DATA_TRUNCATED;
}
if (error_code)
if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
if (level == MYSQL_ERROR::WARN_LEVEL_WARN &&
table->in_use->abort_on_warning)
error_code= ER_DATA_TOO_LONG;
set_warning(level, error_code, 1);
if (test_if_important_data(field_charset, from_end_pos, from + length))
report_data_too_long(this);
else /* If we lost only spaces then produce a NOTE, not a WARNING */
set_warning(MYSQL_ERROR::WARN_LEVEL_NOTE, WARN_DATA_TRUNCATED, 1);
return 2;
}
return 0;
......@@ -6828,68 +6907,70 @@ void Field_blob::put_length(char *pos, uint32 length)
int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
{
int error= 0, well_formed_error;
uint copy_length, new_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos, *tmp;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
if (!length)
{
bzero(ptr,Field_blob::pack_length());
return 0;
}
else
{
bool was_conversion;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
uint copy_length;
uint32 not_used;
/* Convert character set if necessary */
if ((was_conversion= String::needs_conversion(length, cs, field_charset,
&not_used)))
{
uint conv_errors;
if (tmpstr.copy(from, length, cs, field_charset, &conv_errors))
{
/* Fatal OOM error */
bzero(ptr,Field_blob::pack_length());
return -1;
}
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error= 2;
}
copy_length= max_data_length();
/*
copy_length is OK as last argument to well_formed_len as this is never
used to limit the length of the data. The cut of long data is done with
the 'min()' call below.
*/
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from +
min(length, copy_length),
copy_length,
&well_formed_error);
if (copy_length < length)
error= 2;
Field_blob::store_length(copy_length);
if (was_conversion || table->copy_blobs || copy_length <= MAX_FIELD_WIDTH)
{ // Must make a copy
if (from != value.ptr()) // For valgrind
{
value.copy(from,copy_length,charset());
from=value.ptr();
}
if (from == value.ptr())
{
uint32 dummy_offset;
if (!String::needs_conversion(length, cs, field_charset, &dummy_offset))
{
Field_blob::store_length(length);
bmove(ptr+packlength,(char*) &from,sizeof(char*));
return 0;
}
bmove(ptr+packlength,(char*) &from,sizeof(char*));
if (tmpstr.copy(from, length, cs))
goto oom_error;
from= tmpstr.ptr();
}
if (error)
new_length= min(max_data_length(), field_charset->mbmaxlen * length);
if (value.alloc(new_length))
goto oom_error;
/*
"length" is OK as "nchars" argument to well_formed_copy_nchars as this
is never used to limit the length of the data. The cut of long data
is done with the new_length value.
*/
copy_length= well_formed_copy_nchars(field_charset,
(char*) value.ptr(), new_length,
cs, from, length,
length,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
Field_blob::store_length(copy_length);
tmp= value.ptr();
bmove(ptr+packlength,(char*) &tmp,sizeof(char*));
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
if (copy_length < length)
{
if (table->in_use->abort_on_warning)
set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
report_data_too_long(this);
return 2;
}
return 0;
oom_error:
/* Fatal OOM error */
bzero(ptr,Field_blob::pack_length());
return -1;
}
......
......@@ -854,6 +854,162 @@ outp:
}
/*
copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)
SYNOPSIS
well_formed_copy_nhars()
to Store result here
to_length Maxinum length of "to" string
to_cs Character set of "to" string
from Copy from here
from_length Length of from string
from_cs From character set
nchars Copy not more that nchars characters
well_formed_error_pos Return position when "from" is not well formed
or NULL otherwise.
cannot_convert_error_pos Return position where a not convertable
character met, or NULL otherwise.
from_end_pos Return position where scanning of "from"
string stopped.
NOTES
RETURN
length of bytes copied to 'to'
*/
uint32
well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos)
{
uint res;
if ((to_cs == &my_charset_bin) ||
(from_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs))
{
if (to_length < to_cs->mbminlen || !nchars)
{
*from_end_pos= from;
*cannot_convert_error_pos= NULL;
*well_formed_error_pos= NULL;
return 0;
}
if (to_cs == &my_charset_bin)
{
res= min(min(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
}
else
{
int well_formed_error;
uint from_offset;
if ((from_offset= (from_length % to_cs->mbminlen)) &&
(from_cs == &my_charset_bin))
{
/*
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
INSERT INTO t1 (ucs2_column) VALUES (0x01);
0x01 -> 0x0001
*/
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
nchars--;
from+= from_offset;
from_length-= from_offset;
to+= to_cs->mbminlen;
to_length-= to_cs->mbminlen;
}
set_if_smaller(from_length, to_length);
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
nchars, &well_formed_error);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= well_formed_error ? from + res : NULL;
*cannot_convert_error_pos= NULL;
if (from_offset)
res+= to_cs->mbminlen;
}
}
else
{
int cnvres;
my_wc_t wc;
int (*mb_wc)(struct charset_info_st *, my_wc_t *,
const uchar *, const uchar *)= from_cs->cset->mb_wc;
int (*wc_mb)(struct charset_info_st *, my_wc_t,
uchar *s, uchar *e)= to_cs->cset->wc_mb;
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
const char *from_prev= from;
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
else if (cnvres > MY_CS_TOOSMALL)
{
/*
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
else
break; // Not enough characters
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
else
break;
}
*from_end_pos= from;
res= to - to_start;
}
return (uint32) res;
}
void String::print(String *str)
{
char *st= (char*)Ptr, *end= st+str_length;
......
......@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length,
CHARSET_INFO *from_cs, uint *errors);
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
class String
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment