Commit 589ac1cf authored by Sergey Vojtovich's avatar Sergey Vojtovich

BUG#37245 - Full text search problem

Certain boolean mode queries with truncation operator did
not return matching records and calculate relevancy
incorrectly.

myisam/ft_boolean_search.c:
  Sort ftb->list in ascending order. This helps to fix binary
  search in ft_boolean_find_relevance() without rewriting it
  much.
  
  Fixed binary search in ft_boolean_find_relevance(), so it finds
  right-most element in an array.
  
  Fixed that ft_boolean_find_relevance() didn't return match for
  words with truncation operator in case query has other non-
  matching words.
mysql-test/r/fulltext.result:
  A test case for BUG#37245.
mysql-test/t/fulltext.test:
  A test case for BUG#37245.
parent 78119b2d
......@@ -122,11 +122,11 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word DESC, ndepth DESC */
int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1,
(uchar*) (*a)->word+1,(*a)->len-1,0,0);
/* ORDER BY word, ndepth */
int i= mi_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
(uchar*) (*b)->word + 1, (*b)->len - 1, 0, 0);
if (!i)
i=CMP_NUM((*b)->ndepth,(*a)->ndepth);
i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
}
......@@ -674,23 +674,49 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
(byte *) end, &word, TRUE))
{
int a, b, c;
/*
Find right-most element in the array of query words matching this
word from a document.
*/
for (a=0, b=ftb->queue.elements, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1, ftbw->len-1,
(my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0) >0)
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0) < 0)
b=c;
else
a=c;
}
/*
If there were no words with truncation operator, we iterate to the
beginning of an array until array element is equal to the word from
a document. This is done mainly because the same word may be
mentioned twice (or more) in the query.
In case query has words with truncation operator we must iterate
to the beginning of the array. There may be non-matching query words
between matching word with truncation operator and the right-most
matching element. E.g., if we're looking for 'aaa15' in an array of
'aaa1* aaa14 aaa15 aaa16'.
Worse of that there still may be match even if the binary search
above didn't find matching element. E.g., if we're looking for
'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
stop at 'aaa16'.
*/
for (; c>=0; c--)
{
ftbw=ftb->list[c];
if (mi_compare_text(ftb->charset, (uchar*) word.pos, word.len,
(uchar*) ftbw->word+1,ftbw->len-1,
(my_bool) (ftbw->flags&FTB_FLAG_TRUNC),0))
{
if (ftb->with_scan & FTB_FLAG_TRUNC)
continue;
else
break;
}
if (ftbw->docid[1] == docid)
continue;
ftbw->docid[1]=docid;
......
......@@ -497,3 +497,12 @@ WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref b b 5 const 4 Using where
DROP TABLE t1;
CREATE TABLE t1(a CHAR(10));
INSERT INTO t1 VALUES('aaa15');
SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE) FROM t1;
MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE)
1
SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE) FROM t1;
MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE)
2
DROP TABLE t1;
......@@ -423,3 +423,12 @@ EXPLAIN SELECT * FROM t1 FORCE INDEX(b)
WHERE MATCH(a) AGAINST('test' IN BOOLEAN MODE) AND b=1;
DROP TABLE t1;
#
# BUG#37245 - Full text search problem
#
CREATE TABLE t1(a CHAR(10));
INSERT INTO t1 VALUES('aaa15');
SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa16' IN BOOLEAN MODE) FROM t1;
SELECT MATCH(a) AGAINST('aaa1* aaa14 aaa15 aaa16' IN BOOLEAN MODE) FROM t1;
DROP TABLE t1;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment