Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8

Problem: Too confusing error message when cannot convert between string and column character sets on INSERT and UPDATE. Fix: producing a better error message, instead of "Data too long" in such cases Additional changes: Adding "DROP TABLE IF EXISTS" into several tests to be safe against failures in previous tests.

Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8
Problem: Too confusing error message when cannot convert between string and column character sets on INSERT and UPDATE. Fix: producing a better error message, instead of "Data too long" in such cases Additional changes: Adding "DROP TABLE IF EXISTS" into several tests to be safe against failures in previous tests.
0e3a1eec · bar@mysql.com/bar.intranet.mysql.r18.ru · bbf84d6b · 0e3a1eec · 0e3a1eec · 0e3a1eec
Commit 0e3a1eec authored Oct 30, 2006 by bar@mysql.com/bar.intranet.mysql.r18.ru
13 changed files
--- a/BitKeeper/etc/collapsed
+++ b/BitKeeper/etc/collapsed
@@ -2,3 +2,4 @@
 44ec850ac2k4y2Omgr92GiWPBAVKGQ
 44edb86b1iE5knJ97MbliK_3lCiAXA
 44f33f3aj5KW5qweQeekY1LU0E9ZCg
+452c6c6dAjuNghfc1ObZ_UQ5SCl85g
--- a/mysql-test/include/strict_autoinc.inc
+++ b/mysql-test/include/strict_autoinc.inc
@@ -2,6 +2,10 @@
 # Test for strict-mode autoincrement
 #
+--disable_warnings
+drop table if exists t1;
+--enable_warnings
 set @org_mode=@@sql_mode;
 eval create table t1
 (

--- a/mysql-test/r/ctype_recoding.result
+++ b/mysql-test/r/ctype_recoding.result
@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
 insert into t1 values ('test','test');
 insert into t1 values ('','');
 Warnings:
-Warning	1265	Data truncated for column 'a' at row 1
+Warning	1366	Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'a' at row 1
-Warning	1265	Data truncated for column 'b' at row 1
+Warning	1366	Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'b' at row 1
 drop table t1;
 set names koi8r;
 create table t1 (a char(10) character set cp1251);

--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -197,7 +197,7 @@ drop table t1;
 create table t1 (s1 char(10) character set utf8);
 insert into t1 values (0x41FF);
 Warnings:
-Warning	1265	Data truncated for column 's1' at row 1
+Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
 41
@@ -205,7 +205,7 @@ drop table t1;
 create table t1 (s1 varchar(10) character set utf8);
 insert into t1 values (0x41FF);
 Warnings:
-Warning	1265	Data truncated for column 's1' at row 1
+Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
 41
@@ -213,7 +213,7 @@ drop table t1;
 create table t1 (s1 text character set utf8);
 insert into t1 values (0x41FF);
 Warnings:
-Warning	1265	Data truncated for column 's1' at row 1
+Warning	1366	Incorrect string value: '\xFF' for column 's1' at row 1
 select hex(s1) from t1;
 hex(s1)
 41

--- a/mysql-test/r/fulltext.result
+++ b/mysql-test/r/fulltext.result
@@ -372,10 +372,10 @@ t	collation(t)	FORMAT(MATCH t AGAINST ('Osnabruck'),6)
 aus Osnabrck	utf8_general_ci	1.591140
 alter table t1 modify t varchar(200) collate latin1_german2_ci not null;
 Warnings:
-Warning	1265	Data truncated for column 't' at row 3
+Warning	1366	Incorrect string value: '\xD0\xAD\xD1\x82\xD0\xBE...' for column 't' at row 3
-Warning	1265	Data truncated for column 't' at row 4
+Warning	1366	Incorrect string value: '\xD0\x9E\xD1\x82\xD0\xBB...' for column 't' at row 4
-Warning	1265	Data truncated for column 't' at row 5
+Warning	1366	Incorrect string value: '\xD0\x9D\xD0\xB5 \xD0...' for column 't' at row 5
-Warning	1265	Data truncated for column 't' at row 6
+Warning	1366	Incorrect string value: '\xD0\xB8 \xD0\xB1\xD1...' for column 't' at row 6
 SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
 t	collation(t)
 aus Osnabrck	latin1_german2_ci

--- a/mysql-test/r/strict_autoinc_1myisam.result
+++ b/mysql-test/r/strict_autoinc_1myisam.result
+drop table if exists t1;
 set @org_mode=@@sql_mode;
 create table t1
 (

--- a/mysql-test/r/strict_autoinc_2innodb.result
+++ b/mysql-test/r/strict_autoinc_2innodb.result
+drop table if exists t1;
 set @org_mode=@@sql_mode;
 create table t1
 (

--- a/mysql-test/r/strict_autoinc_3heap.result
+++ b/mysql-test/r/strict_autoinc_3heap.result
+drop table if exists t1;
 set @org_mode=@@sql_mode;
 create table t1
 (

--- a/mysql-test/r/strict_autoinc_4bdb.result
+++ b/mysql-test/r/strict_autoinc_4bdb.result
+drop table if exists t1;
 set @org_mode=@@sql_mode;
 create table t1
 (

--- a/mysql-test/r/strict_autoinc_5ndb.result
+++ b/mysql-test/r/strict_autoinc_5ndb.result
+drop table if exists t1;
 set @org_mode=@@sql_mode;
 create table t1
 (

--- a/sql/field.cc
+++ b/sql/field.cc
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -854,6 +854,162 @@ outp:
 }
+/*
+  copy a string,
+  with optional character set conversion,
+  with optional left padding (for binary -> UCS2 conversion)
+  SYNOPSIS
+    well_formed_copy_nhars()
+    to			     Store result here
+    to_length                Maxinum length of "to" string
+    to_cs		     Character set of "to" string
+    from		     Copy from here
+    from_length		     Length of from string
+    from_cs		     From character set
+    nchars                   Copy not more that nchars characters
+    well_formed_error_pos    Return position when "from" is not well formed
+                             or NULL otherwise.
+    cannot_convert_error_pos Return position where a not convertable
+                             character met, or NULL otherwise.
+    from_end_pos             Return position where scanning of "from"
+                             string stopped.
+  NOTES
+  RETURN
+    length of bytes copied to 'to'
+*/
+uint32
+well_formed_copy_nchars(CHARSET_INFO *to_cs,
+                        char *to, uint to_length,
+                        CHARSET_INFO *from_cs,
+                        const char *from, uint from_length,
+                        uint nchars,
+                        const char **well_formed_error_pos,
+                        const char **cannot_convert_error_pos,
+                        const char **from_end_pos)
+{
+  uint res;
+  if ((to_cs == &my_charset_bin) || 
+      (from_cs == &my_charset_bin) ||
+      (to_cs == from_cs) ||
+      my_charset_same(from_cs, to_cs))
+  {
+    if (to_length < to_cs->mbminlen || !nchars)
+    {
+      *from_end_pos= from;
+      *cannot_convert_error_pos= NULL;
+      *well_formed_error_pos= NULL;
+      return 0;
+    }
+    if (to_cs == &my_charset_bin)
+    {
+      res= min(min(nchars, to_length), from_length);
+      memmove(to, from, res);
+      *from_end_pos= from + res;
+      *well_formed_error_pos= NULL;
+      *cannot_convert_error_pos= NULL;
+    }
+    else
+    {
+      int well_formed_error;
+      uint from_offset;
+      if ((from_offset= (from_length % to_cs->mbminlen)) &&
+          (from_cs == &my_charset_bin))
+      {
+        /*
+          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
+          INSERT INTO t1 (ucs2_column) VALUES (0x01);
+          0x01 -> 0x0001
+        */
+        uint pad_length= to_cs->mbminlen - from_offset;
+        bzero(to, pad_length);
+        memmove(to + pad_length, from, from_offset);
+        nchars--;
+        from+= from_offset;
+        from_length-= from_offset;
+        to+= to_cs->mbminlen;
+        to_length-= to_cs->mbminlen;
+      }
+      set_if_smaller(from_length, to_length);
+      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
+                                        nchars, &well_formed_error);
+      memmove(to, from, res);
+      *from_end_pos= from + res;
+      *well_formed_error_pos= well_formed_error ? from + res : NULL;
+      *cannot_convert_error_pos= NULL;
+      if (from_offset)
+        res+= to_cs->mbminlen;
+    }
+  }
+  else
+  {
+    int cnvres;
+    my_wc_t wc;
+    int (*mb_wc)(struct charset_info_st *, my_wc_t *,
+                 const uchar *, const uchar *)= from_cs->cset->mb_wc;
+    int (*wc_mb)(struct charset_info_st *, my_wc_t,
+                 uchar *s, uchar *e)= to_cs->cset->wc_mb;
+    const uchar *from_end= (const uchar*) from + from_length;
+    uchar *to_end= (uchar*) to + to_length;
+    char *to_start= to;
+    *well_formed_error_pos= NULL;
+    *cannot_convert_error_pos= NULL;
+    for ( ; nchars; nchars--)
+    {
+      const char *from_prev= from;
+      if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+        from+= cnvres;
+      else if (cnvres == MY_CS_ILSEQ)
+      {
+        if (!*well_formed_error_pos)
+          *well_formed_error_pos= from;
+        from++;
+        wc= '?';
+      }
+      else if (cnvres > MY_CS_TOOSMALL)
+      {
+        /*
+          A correct multibyte sequence detected
+          But it doesn't have Unicode mapping.
+        */
+        if (!*cannot_convert_error_pos)
+          *cannot_convert_error_pos= from;
+        from+= (-cnvres);
+        wc= '?';
+      }
+      else
+        break;  // Not enough characters
+outp:
+      if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+        to+= cnvres;
+      else if (cnvres == MY_CS_ILUNI && wc != '?')
+      {
+        if (!*cannot_convert_error_pos)
+          *cannot_convert_error_pos= from_prev;
+        wc= '?';
+        goto outp;
+      }
+      else
+        break;
+    }
+    *from_end_pos= from;
+    res= to - to_start;
+  }
+  return (uint32) res;
+}
 void String::print(String *str)
 {
  char *st= (char*)Ptr, *end= st+str_length;

--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
 uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
 			const char *from, uint32 from_length,
 			CHARSET_INFO *from_cs, uint *errors);
+uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
+                               char *to, uint to_length,
+                               CHARSET_INFO *from_cs,
+                               const char *from, uint from_length,
+                               uint nchars,
+                               const char **well_formed_error_pos,
+                               const char **cannot_convert_error_pos,
+                               const char **from_end_pos);
 class String
 {