Bug#45803 Inaccurate estimates for partial key values with IBMDB2I

Some collations were causing IBMDB2I to report inaccurate key range estimations to the optimizer for LIKE clauses that select substrings. This can be seen by running EXPLAIN. This problem primarily affects multi-byte and unicode character sets. This patch involves substantial changes to several modules. There are a number of problems with the character set and collation handling. These problems have been or are being fixed, and a comprehensive test has been included which should provide much better coverage than there was before. This test is enabled only for IBM i 6.1, because that version has support for the greatest number of collations.

Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
Some collations were causing IBMDB2I to report inaccurate key range estimations to the optimizer for LIKE clauses that select substrings. This can be seen by running EXPLAIN. This problem primarily affects multi-byte and unicode character sets. This patch involves substantial changes to several modules. There are a number of problems with the character set and collation handling. These problems have been or are being fixed, and a comprehensive test has been included which should provide much better coverage than there was before. This test is enabled only for IBM i 6.1, because that version has support for the greatest number of collations.
148141fa · V Narayanan · 743fc577 · 148141fa · 148141fa · 148141fa
Commit 148141fa authored Jul 06, 2009 by V Narayanan
6 changed files
--- a/mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result
+++ b/mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result
--- a/mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test
+++ b/mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test
+source suite/ibmdb2i/include/have_ibmdb2i.inc;
+source suite/ibmdb2i/include/have_i61.inc;
+--disable_warnings
+drop table if exists t1, ffd, fd;
+--enable_warnings
+
+--disable_abort_on_error
+--error 0,255
+exec system "DLTF QGPL/FFDOUT" > /dev/null;
+--error 0,255
+exec system "DLTF QGPL/FDOUT" > /dev/null;
+--enable_abort_on_error
+let $count= query_get_value(select count(*) from information_schema.COLLATIONS where COLLATION_NAME <> "binary", count(*),1);
+
+while ($count)
+{
+  let $collation = query_get_value(select COLLATION_NAME from information_schema.COLLATIONS where COLLATION_NAME <> "binary" order by COLLATION_NAME desc, COLLATION_NAME, $count);
+  error 0,1005,2504,2028;
+  eval CREATE TABLE t1 ($collation integer, c char(10), v varchar(20), index(c), index(v)) collate $collation engine=ibmdb2i;
+  if (!$mysql_errno)
+  {
+      insert into t1 (c,v) values ("abc","def"),("abcd", "def"),("abcde","defg"),("aaaa","bbbb");
+      insert into t1 select * from t1;
+      explain select c,v from t1 force index(c) where c like "ab%";
+      explain select c,v from t1 force index(v) where v like "de%";
+      drop table t1;
+      eval create table t1 ($collation char(10) primary key) collate $collation engine=ibmdb2i;
+      system system "DSPFFD FILE(\"test\"/\"t1\") OUTPUT(*OUTFILE) OUTFILE(QGPL/FFDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+      system system "DSPFD FILE(\"test\"/\"t1\") TYPE(*SEQ) OUTPUT(*OUTFILE) OUTFILE(QGPL/FDOUT) OUTMBR(*FIRST *ADD)" > /dev/null;
+      drop table t1;
+  }
+  dec $count;
+}
+
+create table ffd (WHCHD1 CHAR(20), WHCSID decimal(5,0)) engine=ibmdb2i;
+system system "CPYF FROMFILE(QGPL/FFDOUT) TOFILE(\"test\"/\"ffd\") mbropt(*replace) fmtopt(*drop *map)"  > /dev/null;
+create table fd (SQSSEQ CHAR(10)) engine=ibmdb2i;
+system system "CPYF FROMFILE(QGPL/FDOUT) TOFILE(\"test\"/\"fd\") mbropt(*replace) fmtopt(*drop *map)" > /dev/null;
+create temporary table intermed (row integer key auto_increment, cs char(30), ccsid integer);
+insert into intermed (cs, ccsid) select * from ffd;
+create temporary table intermed2 (row integer key auto_increment, srtseq char(10));
+insert into intermed2 (srtseq) select * from fd;
+select ccsid, cs, srtseq from intermed inner join intermed2 on intermed.row = intermed2.row;
+drop table ffd, fd;
--- a/storage/ibmdb2i/db2i_conversion.cc
+++ b/storage/ibmdb2i/db2i_conversion.cc
@@ -137,7 +137,9 @@ int ha_ibmdb2i::convertFieldChars(enum_conversionDirection direction,
                                  char* output, 
                                  size_t ilen, 
                                  size_t olen, 
-                                  size_t* outDataLen)
+                                  size_t* outDataLen,
+                                  bool tacitErrors,
+                                  size_t* substChars)
 {
  DBUG_PRINT("ha_ibmdb2i::convertFieldChars",("Direction: %d; length = %d", direction, ilen));
  
@@ -157,27 +159,27 @@ int ha_ibmdb2i::convertFieldChars(enum_conversionDirection direction,
  size_t initOLen= olen;
  size_t substitutedChars = 0;
  int rc = iconv(conversion, (char**)&input, &ilen, &output, &olen, &substitutedChars );
+  if (outDataLen) *outDataLen = initOLen - olen;
+  if (substChars) *substChars = substitutedChars;
  if (unlikely(rc < 0))
  {
    int er = errno;
    if (er == EILSEQ)
    {
-      getErrTxt(DB2I_ERR_ILL_CHAR, table->field[fieldID]->field_name);
+      if (!tacitErrors) getErrTxt(DB2I_ERR_ILL_CHAR, table->field[fieldID]->field_name);
      return (DB2I_ERR_ILL_CHAR);
    }
    else
    {
-      getErrTxt(DB2I_ERR_ICONV,er);
+      if (!tacitErrors) getErrTxt(DB2I_ERR_ICONV,er);
      return (DB2I_ERR_ICONV);
    }
  }
-  if (unlikely(substitutedChars))
+  if (unlikely(substitutedChars) && (!tacitErrors))
  {
    warning(ha_thd(), DB2I_ERR_SUB_CHARS, table->field[fieldID]->field_name);
  }

-  if (outDataLen) *outDataLen = initOLen - olen;
-
  return (0);
 }

@@ -555,12 +557,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
                return 1;
              if (fieldCharSet->mbmaxlen > 1)
              {
-                if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                {
                  sprintf(stringBuildBuffer, "GRAPHIC(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                  db2Ccsid = 13488;
                }
-                else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                         strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                {
                  sprintf(stringBuildBuffer, "CHAR(%d)", max(fieldLength, 1)); // Number of bytes
@@ -584,12 +586,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
              {
                if (fieldCharSet->mbmaxlen > 1)
                {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                  {
                    sprintf(stringBuildBuffer, "VARGRAPHIC(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                    db2Ccsid = 13488;
                  }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                           strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                  {
                    sprintf(stringBuildBuffer, "VARCHAR(%d)", max(fieldLength, 1)); // Number of bytes
@@ -611,12 +613,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
              {
                if (fieldCharSet->mbmaxlen > 1)
                {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                  {
                    sprintf(stringBuildBuffer, "LONG VARGRAPHIC ");
                    db2Ccsid = 13488;
                  }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                           strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                  {
                    sprintf(stringBuildBuffer, "LONG VARCHAR ");
@@ -639,12 +641,12 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,

                if (fieldCharSet->mbmaxlen > 1)
                {
-                  if (strncmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")) == 0 ) // UCS2
+                  if (memcmp(fieldCharSet->name, "ucs2_", sizeof("ucs2_")-1) == 0 ) // UCS2
                  {
                    sprintf(stringBuildBuffer, "DBCLOB(%d)", max(fieldLength / fieldCharSet->mbmaxlen, 1)); // Number of characters
                    db2Ccsid = 13488;
                  }
-                  else if (strncmp(fieldCharSet->name, "utf8_", sizeof("utf8_")) == 0 &&
+                  else if (memcmp(fieldCharSet->name, "utf8_", sizeof("utf8_")-1) == 0 &&
                           strcmp(fieldCharSet->name, "utf8_general_ci") != 0) 
                  {
                    sprintf(stringBuildBuffer, "CLOB(%d)", max(fieldLength, 1)); // Number of bytes
@@ -671,11 +673,15 @@ int ha_ibmdb2i::getFieldTypeMapping(Field* field,
                return rtnCode;
            }
            
+            if (db2Ccsid != 1208 &&
+                db2Ccsid != 13488)
+            {
              // Check whether there is a character conversion available.
              iconv_t temp;
              int32 rc = getConversion(toDB2, fieldCharSet, db2Ccsid, temp);
              if (unlikely(rc))
                return rc;
+            }
            
            sprintf(stringBuildBuffer, " CCSID %d ", db2Ccsid);
            mapping.append(stringBuildBuffer);

--- a/storage/ibmdb2i/db2i_myconv.h
+++ b/storage/ibmdb2i/db2i_myconv.h
@@ -220,6 +220,7 @@ INTERN  size_t	        myconv_dmap(myconv_t    cd,
      } else {
        *pOut=dmapS2S[*pIn];
        if (*pOut == 0x00) {
+          errno=EILSEQ;  /* 116 */
          *outBytesLeft-=(*inBytesLeft-inLen);
          *inBytesLeft=inLen;
          *outBuf=pOut;

--- a/storage/ibmdb2i/db2i_rir.cc
+++ b/storage/ibmdb2i/db2i_rir.cc
--- a/storage/ibmdb2i/ha_ibmdb2i.h
+++ b/storage/ibmdb2i/ha_ibmdb2i.h
@@ -383,7 +383,15 @@ private:
  int32 prepareWriteBufferForLobs();
  uint32 adjustLobBuffersForRead();
  bool lobFieldsRequested();
-  int convertFieldChars(enum_conversionDirection direction, uint16 fieldID, const char* input, char* output, size_t ilen, size_t olen, size_t* outDataLen);
+  int convertFieldChars(enum_conversionDirection direction, 
+                        uint16 fieldID, 
+                        const char* input, 
+                        char* output, 
+                        size_t ilen, 
+                        size_t olen, 
+                        size_t* outDataLen,
+                        bool tacitErrors=FALSE,
+                        size_t* substChars=NULL);

  /**
    Fast integer log2 function