Merge kboortz@bk-internal.mysql.com:/home/bk/mysql-4.1

into mysql.com:/Users/kent/mysql/bk/mysql-4.1-perl

Merge kboortz@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/Users/kent/mysql/bk/mysql-4.1-perl
2c546db3 · unknown · 346d0241 · 9b6d32d6 · 2c546db3 · 2c546db3
Commit 2c546db3 authored Mar 29, 2005 by unknown
Show whitespace changes
Inline Side-by-side

Showing with 293 additions and 14 deletions

mysys/charset-def.c mysys/charset-def.c +6 -0

mysys/default.c mysys/default.c +121 -14

strings/ctype-utf8.c strings/ctype-utf8.c +166 -0

No files found.
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
 extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
 extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
 extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
+#ifdef HAVE_CYBOZU_COLLATION
+extern CHARSET_INFO my_charset_utf8_general_cs;
+#endif
 #endif

 #endif /* HAVE_UCA_COLLATIONS */
@@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
 #ifdef HAVE_CHARSET_utf8
  add_compiled_collation(&my_charset_utf8_general_ci);
  add_compiled_collation(&my_charset_utf8_bin);
+#ifdef HAVE_CYBOZU_COLLATION
+  add_compiled_collation(&my_charset_utf8_general_cs);
+#endif
 #ifdef HAVE_UCA_COLLATIONS
  add_compiled_collation(&my_charset_utf8_general_uca_ci);
  add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);

--- a/mysys/default.c
+++ b/mysys/default.c
@@ -73,7 +73,7 @@ static int search_default_file(DYNAMIC_ARRAY *args,MEM_ROOT *alloc,
 static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
 					const char *dir, const char *ext,
 					const char *config_file,
-					TYPELIB *group);
+					TYPELIB *group, int recursion_level);

 static char *remove_end_comment(char *ptr);

@@ -194,7 +194,7 @@ int load_defaults(const char *conf_file, const char **groups,
  {
    if ((error= search_default_file_with_ext(&args, &alloc, "", "",
 					     forced_default_file,
-					     &group)) < 0)
+					     &group, 0)) < 0)
      goto err;
    if (error > 0)
    {
@@ -311,7 +311,7 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
  {
    int error;
    if ((error= search_default_file_with_ext(args, alloc, dir, *ext,
-					     config_file, group)) < 0)
+					     config_file, group, 0)) < 0)
      return error;
  }
  return 0;
@@ -326,9 +326,11 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
    args			Store pointer to found options here
    alloc			Allocate strings in this object
    dir				directory to read
-    config_file			Name of configuration file
    ext				Extension for configuration file
+    config_file                 Name of configuration file
    group			groups to read
+    recursion_level             the level of recursion, got while processing
+                                "!include" or "!includedir"

  RETURN
    0   Success
@@ -340,12 +342,18 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
 static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
 					const char *dir, const char *ext,
 					const char *config_file,
-					TYPELIB *group)
+					TYPELIB *group, int recursion_level)
 {
-  char name[FN_REFLEN+10],buff[4096],*ptr,*end,*value,*tmp;
+  char name[FN_REFLEN + 10], buff[4096], *ptr, *end, *value, *tmp, **tmp_ext;
+  static const char includedir_keyword[]= "includedir";
+  static const char include_keyword[]= "include";
+  const int max_recursion_level= 10;
  FILE *fp;
-  uint line=0;
-  my_bool read_values=0,found_group=0;
+  uint line= 0;
+  my_bool read_values= 0, found_group= 0;
+  uint i;
+  MY_DIR *search_dir;
+  FILEINFO *search_file;

  if ((dir ? strlen(dir) : 0 )+strlen(config_file) >= FN_REFLEN-3)
    return 0;					/* Ignore wrong paths */
@@ -374,22 +382,121 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
    if ((stat_info.st_mode & S_IWOTH) &&
 	(stat_info.st_mode & S_IFMT) == S_IFREG)
    {
-      fprintf(stderr, "warning: World-writeable config file %s is ignored\n",
+      fprintf(stderr, "warning: World-writable config file %s is ignored\n",
              name);
      return 0;
    }
  }
 #endif
-  if (!(fp = my_fopen(fn_format(name,name,"","",4),O_RDONLY,MYF(0))))
+  if (!(fp= my_fopen(fn_format(name, name, "", "", 4), O_RDONLY, MYF(0))))
    return 0;					/* Ignore wrong files */

-  while (fgets(buff,sizeof(buff)-1,fp))
+  while (fgets(buff, sizeof(buff) - 1, fp))
  {
    line++;
    /* Ignore comment and empty lines */
-    for (ptr=buff ; my_isspace(&my_charset_latin1,*ptr) ; ptr++ ) ;
+    for (ptr= buff; my_isspace(&my_charset_latin1, *ptr); ptr++)
+    {}
+
    if (*ptr == '#' || *ptr == ';' || !*ptr)
      continue;
+
+    /* Configuration File Directives */
+    if ((*ptr == '!') && (recursion_level < max_recursion_level))
+    {
+      /* skip over `!' and following whitespace */
+      for (++ptr; my_isspace(&my_charset_latin1, ptr[0]); ptr++)
+      {}
+
+      if ((!strncmp(ptr, includedir_keyword, sizeof(includedir_keyword) - 1))
+         && my_isspace(&my_charset_latin1, ptr[sizeof(includedir_keyword) - 1]))
+      {
+        /* skip over "includedir" and following whitespace */
+        for (ptr+= sizeof(includedir_keyword) - 1;
+            my_isspace(&my_charset_latin1, ptr[0]); ptr++)
+        {}
+
+        /* trim trailing whitespace from directory name */
+        end= ptr + strlen(ptr) - 1;
+        /*
+          This would work fine even if no whitespaces are met
+          since fgets() stores the newline character in the buffer
+        */
+        for (; my_isspace(&my_charset_latin1, *(end - 1)); end--)
+        {}
+        end[0]= 0;
+
+        /* print error msg if there is nothing after !inludedir directive */
+        if (end == ptr)
+        {
+          fprintf(stderr,
+                  "error: Wrong !includedir directive in config "
+                  "file: %s at line %d\n",
+                  name,line);
+          goto err;
+        }
+
+        if (!(search_dir= my_dir(ptr, MYF(MY_WME))))
+          goto err;
+
+        for (i= 0; i < (uint) search_dir->number_off_files; i++)
+        {
+          search_file= search_dir->dir_entry + i;
+          ext= fn_ext(search_file->name);
+
+          /* check extenstion */
+          for (tmp_ext= (char**) f_extensions; *tmp_ext; *tmp_ext++)
+          {
+            if (!strcmp(ext, *tmp_ext))
+              break;
+          }
+
+          if (*tmp_ext)
+          {
+            if (!(tmp= alloc_root(alloc, 2 + strlen(search_file->name)
+                                          + strlen(ptr))))
+              goto err;
+
+            fn_format(tmp, search_file->name, ptr, "",
+                      MY_UNPACK_FILENAME | MY_SAFE_PATH);
+
+            search_default_file_with_ext(args, alloc, "", "", tmp, group,
+                                         recursion_level + 1);
+          }
+        }
+
+        my_dirend(search_dir);
+      }
+      else if ((!strncmp(ptr, include_keyword, sizeof(include_keyword) - 1))
+          && my_isspace(&my_charset_latin1, ptr[sizeof(include_keyword) - 1]))
+      {
+        /* skip over `include' and following whitespace */
+        for (ptr+= sizeof(include_keyword) - 1;
+            my_isspace(&my_charset_latin1, ptr[0]); ptr++)
+        {}
+
+        /* trim trailing whitespace from filename */
+        end= ptr + strlen(ptr) - 1;
+        for (; my_isspace(&my_charset_latin1, *(end - 1)) ; end--)
+        {}
+        end[0]= 0;
+
+        if (end == ptr)
+        {
+          fprintf(stderr,
+                  "error: Wrong !include directive in config "
+                  "file: %s at line %d\n",
+                  name,line);
+          goto err;
+        }
+
+        search_default_file_with_ext(args, alloc, "", "", ptr, group,
+                                     recursion_level + 1);
+      }
+
+      continue;
+    }
+
    if (*ptr == '[')				/* Group name */
    {
      found_group=1;

--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin=
    &my_collation_mb_bin_handler
 };

+#ifdef HAVE_CYBOZU_COLLATION
+
+/*
+ * These functions bacically do the same as their original, except
+ * that they return 0 only when two comparing unicode strings are
+ * strictly the same in case-sensitive way.  See "save_diff" local
+ * variable to what they actually do.
+ */
+
+static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, 
+                                const uchar *s, uint slen,
+                                const uchar *t, uint tlen,
+                                my_bool t_is_prefix)
+{
+  int s_res,t_res;
+  my_wc_t s_wc,t_wc;
+  const uchar *se=s+slen;
+  const uchar *te=t+tlen;
+  int save_diff = 0;
+  int diff;
+
+  while ( s < se && t < te )
+  {
+    int plane;
+    s_res=my_utf8_uni(cs,&s_wc, s, se);
+    t_res=my_utf8_uni(cs,&t_wc, t, te);
+    
+    if ( s_res <= 0 || t_res <= 0 )
+
+    {
+      /* Incorrect string, compare by char value */
+      return ((int)s[0]-(int)t[0]); 
+    }
+    
+    if ( save_diff == 0 )
+    {
+      save_diff = ((int)s_wc) - ((int)t_wc);
+    }
+    plane=(s_wc>>8) & 0xFF;
+    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+    plane=(t_wc>>8) & 0xFF;
+    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+    if ( s_wc != t_wc )
+    {
+      return  ((int) s_wc) - ((int) t_wc);
+    }
+    
+    s+=s_res;
+    t+=t_res;
+  }
+  diff = ( (se-s) - (te-t) );
+  return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff);
+}
+
+static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, 
+                                  const uchar *s, uint slen,
+                                  const uchar *t, uint tlen)
+{
+  int s_res,t_res;
+  my_wc_t s_wc,t_wc;
+  const uchar *se= s+slen;
+  const uchar *te= t+tlen;
+  int save_diff = 0;
+  
+  while ( s < se && t < te )
+  {
+    int plane;
+    s_res=my_utf8_uni(cs,&s_wc, s, se);
+    t_res=my_utf8_uni(cs,&t_wc, t, te);
+    
+    if ( s_res <= 0 || t_res <= 0 )
+    {
+      /* Incorrect string, compare by char value */
+      return ((int)s[0]-(int)t[0]); 
+    }
+    
+    if ( save_diff == 0 )
+    {
+      save_diff = ((int)s_wc) - ((int)t_wc);
+    }
+    plane=(s_wc>>8) & 0xFF;
+    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+    plane=(t_wc>>8) & 0xFF;
+    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+    if ( s_wc != t_wc )
+    {
+      return  ((int) s_wc) - ((int) t_wc);
+    }
+    
+    s+=s_res;
+    t+=t_res;
+  }
+  
+  slen= se-s;
+  tlen= te-t;
+  
+  if (slen != tlen)
+  {
+    int swap= 0;
+    if (slen < tlen)
+    {
+      slen= tlen;
+      s= t;
+      se= te;
+      swap= -1;
+    }
+    /*
+      This following loop uses the fact that in UTF-8
+      all multibyte characters are greater than space,
+      and all multibyte head characters are greater than
+      space. It means if we meet a character greater
+      than space, it always means that the longer string
+      is greater. So we can reuse the same loop from the
+      8bit version, without having to process full multibute
+      sequences.
+    */
+    for ( ; s < se; s++)
+    {
+      if (*s != ' ')
+        return ((int)*s -  (int) ' ') ^ swap;
+    }
+  }
+  return save_diff;
+}
+
+static MY_COLLATION_HANDLER my_collation_cs_handler =
+{
+    NULL,		/* init */
+    my_strnncoll_utf8_cs,
+    my_strnncollsp_utf8_cs,
+    my_strnxfrm_utf8,
+    my_like_range_simple,
+    my_wildcmp_mb,
+    my_strcasecmp_utf8,
+    my_instr_mb,
+    my_hash_sort_utf8
+};
+
+CHARSET_INFO my_charset_utf8_general_cs=
+{
+    254,0,0,		/* number       */
+    MY_CS_COMPILED|MY_CS_UNICODE,	/* state  */
+    "utf8",		/* cs name      */
+    "utf8_general_cs",	/* name         */
+    "",			/* comment      */
+    NULL,		/* tailoring    */
+    ctype_utf8,		/* ctype        */
+    to_lower_utf8,	/* to_lower     */
+    to_upper_utf8,	/* to_upper     */
+    to_upper_utf8,	/* sort_order   */
+    NULL,		/* contractions */
+    NULL,		/* sort_order_big*/
+    NULL,		/* tab_to_uni   */
+    NULL,		/* tab_from_uni */
+    NULL,		/* state_map    */
+    NULL,		/* ident_map    */
+    1,			/* strxfrm_multiply */
+    1,			/* mbminlen     */
+    3,			/* mbmaxlen     */
+    0,			/* min_sort_char */
+    255,		/* max_sort_char */
+    &my_charset_utf8_handler,
+    &my_collation_cs_handler
+};
+#endif	/* Cybozu Hack */
+

 #ifdef MY_TEST_UTF8
 #include <stdio.h>