Commit fc09f13c authored by unknown's avatar unknown

--ft_stopword_file command-line option

parent 30a35bcf
...@@ -46,6 +46,7 @@ struct st_ft_info ...@@ -46,6 +46,7 @@ struct st_ft_info
}; };
#endif #endif
extern const char *ft_stopword_file;
extern const char *ft_precompiled_stopwords[]; extern const char *ft_precompiled_stopwords[];
extern ulong ft_min_word_len; extern ulong ft_min_word_len;
...@@ -53,7 +54,7 @@ extern ulong ft_max_word_len; ...@@ -53,7 +54,7 @@ extern ulong ft_max_word_len;
extern ulong ft_max_word_len_for_sort; extern ulong ft_max_word_len_for_sort;
extern const char *ft_boolean_syntax; extern const char *ft_boolean_syntax;
int ft_init_stopwords(const char **); int ft_init_stopwords(void);
void ft_free_stopwords(void); void ft_free_stopwords(void);
#define FT_NL 0 #define FT_NL 0
......
...@@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr, ...@@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
query, query_len, presort); query, query_len, presort);
} }
const char *ft_stopword_file = 0;
const char *ft_precompiled_stopwords[] = { const char *ft_precompiled_stopwords[] = {
#ifdef COMPILE_STOPWORDS_IN #ifdef COMPILE_STOPWORDS_IN
......
...@@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), ...@@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
(uchar *)w2->pos,w2->len,0); (uchar *)w2->pos,w2->len,0);
} }
int ft_init_stopwords(const char **sws) static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
void *arg __attribute__((unused)))
{ {
FT_STOPWORD sw; if (action == free_free && ft_stopword_file)
my_free(w->pos, MYF(0));
}
static int ft_add_stopword(const char *w)
{
FT_STOPWORD sw;
return !w ||
(((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
(tree_insert(stopwords3, &sw, 0)==NULL));
}
if(!stopwords3) int ft_init_stopwords()
{
if (!stopwords3)
{ {
if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1; if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0, return -1;
NULL, NULL); init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
0, (tree_element_free)&FT_STOPWORD_free, NULL);
} }
if(!sws) return 0; if (ft_stopword_file)
{
File fd;
my_off_t len;
byte *buffer, *start, *end;
FT_WORD w;
int err=-1;
if (!*ft_stopword_file)
return 0;
for(;*sws;sws++) if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
return -1;
len=my_seek(fd, 0L, MY_SEEK_END, MYF(0));
my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
{ {
if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue; my_close(fd, MYF(MY_WME));
if(!tree_insert(stopwords3, &sw, 0)) return -1;
}
len=my_read(fd, buffer, len, MYF(MY_WME));
end=start+len;
while (ft_simple_get_word(&start, end, &w))
{ {
delete_tree(stopwords3); /* purecov: inspected */ if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
return -1; /* purecov: inspected */ goto err1;
}
err=0;
err1:
my_free(buffer, MYF(0));
err0:
my_close(fd, MYF(MY_WME));
return err;
}
else
{
/* compatibility mode: to be removed */
char **sws=ft_precompiled_stopwords;
for (;*sws;sws++)
{
if (ft_add_stopword(*sws))
return -1;
} }
} }
return 0; return 0;
......
...@@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename) ...@@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
else else
{ {
if (share->fulltext_index) if (share->fulltext_index)
ft_init_stopwords(ft_precompiled_stopwords); /* SerG */ ft_init_stopwords();
if (!(param->testflag & T_READONLY)) if (!(param->testflag & T_READONLY))
lock_type = F_WRLCK; /* table is changed */ lock_type = F_WRLCK; /* table is changed */
......
...@@ -2072,8 +2072,8 @@ int main(int argc, char **argv) ...@@ -2072,8 +2072,8 @@ int main(int argc, char **argv)
#endif #endif
if (opt_myisam_log) if (opt_myisam_log)
(void) mi_log( 1 ); (void) mi_log(1);
ft_init_stopwords(ft_precompiled_stopwords); ft_init_stopwords();
#ifdef __WIN__ #ifdef __WIN__
if (!opt_console) if (!opt_console)
...@@ -2929,7 +2929,7 @@ enum options { ...@@ -2929,7 +2929,7 @@ enum options {
OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT, OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT,
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE, OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN, OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN,
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE,
OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME, OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME,
OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET, OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
...@@ -3415,7 +3415,8 @@ struct my_option my_long_options[] = ...@@ -3415,7 +3415,8 @@ struct my_option my_long_options[] =
(gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0, (gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0,
0, 0, 0}, 0, 0, 0},
{ "back_log", OPT_BACK_LOG, { "back_log", OPT_BACK_LOG,
"The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG, "The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.",
(gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 }, REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 },
#ifdef HAVE_BERKELEY_DB #ifdef HAVE_BERKELEY_DB
{ "bdb_cache_size", OPT_BDB_CACHE_SIZE, { "bdb_cache_size", OPT_BDB_CACHE_SIZE,
...@@ -3468,9 +3469,13 @@ struct my_option my_long_options[] = ...@@ -3468,9 +3469,13 @@ struct my_option my_long_options[] =
(gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG, (gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG,
REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0}, REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT, { "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT,
"Undocumented", (gptr*) &ft_max_word_len_for_sort, "The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.",
(gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, REQUIRED_ARG, 20, 4, (gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG,
HA_FT_MAXLEN, 0, 1, 0}, REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_stopword_file", OPT_FT_STOPWORD_FILE,
"Use stopwords from this file instead of built-in list.",
(gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
#ifdef HAVE_INNOBASE_DB #ifdef HAVE_INNOBASE_DB
{"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS, {"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.", "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment