Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
3cc69d5a
Commit
3cc69d5a
authored
Apr 18, 2002
by
serg@serg.mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
phrase search
parent
08384a3a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
77 additions
and
27 deletions
+77
-27
myisam/ft_boolean_search.c
myisam/ft_boolean_search.c
+57
-22
myisam/ft_parser.c
myisam/ft_parser.c
+11
-3
myisam/ftdefs.h
myisam/ftdefs.h
+4
-1
mysql-test/r/fulltext.result
mysql-test/r/fulltext.result
+3
-0
mysql-test/t/fulltext.test
mysql-test/t/fulltext.test
+2
-1
No files found.
myisam/ft_boolean_search.c
View file @
3cc69d5a
...
...
@@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
typedef
struct
st_ftb_expr
FTB_EXPR
;
struct
st_ftb_expr
{
FTB_EXPR
*
up
;
byte
*
quot
,
*
qend
;
float
weight
;
uint
flags
;
my_off_t
docid
[
2
];
/* for index search and for scan */
...
...
@@ -113,7 +114,7 @@ int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
}
void
_ftb_parse_query
(
FTB
*
ftb
,
byte
**
start
,
byte
*
end
,
FTB_EXPR
*
up
,
uint
depth
)
FTB_EXPR
*
up
,
uint
depth
)
{
byte
res
;
FTB_PARAM
param
;
...
...
@@ -126,16 +127,17 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
return
;
param
.
prev
=
' '
;
param
.
quot
=
up
->
quot
;
while
((
res
=
ft_get_word
(
start
,
end
,
&
w
,
&
param
)))
{
int
r
=
param
.
plusminus
;
int
r
=
param
.
plusminus
;
float
weight
=
(
float
)
(
param
.
pmsign
?
nwghts
:
wghts
)[(
r
>
5
)
?
5
:
((
r
<-
5
)
?-
5
:
r
)];
switch
(
res
)
{
case
1
:
/* word found */
ftbw
=
(
FTB_WORD
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_WORD
)
+
(
param
.
trunc
?
MI_MAX_KEY_BUFF
:
w
.
len
+
extra
));
sizeof
(
FTB_WORD
)
+
(
param
.
trunc
?
MI_MAX_KEY_BUFF
:
w
.
len
+
extra
));
ftbw
->
len
=
w
.
len
+
1
;
ftbw
->
flags
=
0
;
if
(
param
.
yesno
>
0
)
ftbw
->
flags
|=
FTB_FLAG_YES
;
...
...
@@ -149,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbw
->
word
[
0
]
=
w
.
len
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
queue_insert
(
&
ftb
->
queue
,
(
byte
*
)
ftbw
);
ftb
->
with_scan
|=
param
.
trunc
;
ftb
->
with_scan
|=
(
param
.
trunc
&
FTB_FLAG_TRUNC
)
;
break
;
case
2
:
/* left bracket */
ftbe
=
(
FTB_EXPR
*
)
alloc_root
(
&
ftb
->
mem_root
,
sizeof
(
FTB_EXPR
));
...
...
@@ -160,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
ftbe
->
up
=
up
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_POS_ERROR
;
if
((
ftbe
->
quot
=
param
.
quot
))
ftb
->
with_scan
|=
2
;
if
(
param
.
yesno
>
0
)
up
->
ythresh
++
;
_ftb_parse_query
(
ftb
,
start
,
end
,
ftbe
,
depth
+
1
);
break
;
case
3
:
/* right bracket */
if
(
up
->
quot
)
up
->
qend
=
param
.
quot
;
return
;
}
}
...
...
@@ -209,7 +213,7 @@ void _ftb_init_index_search(FT_INFO *ftb)
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
word
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
0
);
0
);
}
if
(
r
)
/* not found */
{
...
...
@@ -260,7 +264,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
ftbe
->
weight
=
1
;
ftbe
->
flags
=
FTB_FLAG_YES
;
ftbe
->
nos
=
1
;
ftbe
->
up
=
0
;
ftbe
->
quot
=
ftbe
->
up
=
0
;
ftbe
->
ythresh
=
ftbe
->
yweaks
=
0
;
ftbe
->
docid
[
0
]
=
ftbe
->
docid
[
1
]
=
HA_POS_ERROR
;
ftb
->
root
=
ftbe
;
...
...
@@ -270,16 +274,39 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
memcpy
(
ftb
->
list
,
ftb
->
queue
.
root
+
1
,
sizeof
(
FTB_WORD
*
)
*
ftb
->
queue
.
elements
);
qsort2
(
ftb
->
list
,
ftb
->
queue
.
elements
,
sizeof
(
FTB_WORD
*
),
(
qsort2_cmp
)
FTB_WORD_cmp_list
,
ftb
->
charset
);
if
(
ftb
->
queue
.
elements
<
2
)
ftb
->
with_scan
=
0
;
if
(
ftb
->
queue
.
elements
<
2
)
ftb
->
with_scan
&=
~
FTB_FLAG_TRUNC
;
ftb
->
state
=
READY
;
return
ftb
;
}
void
_ftb_climb_the_tree
(
FTB_WORD
*
ftbw
,
uint
mode
)
/* returns 1 if str0 contain str1 */
int
_ftb_strstr
(
const
byte
*
s0
,
const
byte
*
e0
,
const
byte
*
s1
,
const
byte
*
e1
,
CHARSET_INFO
*
cs
)
{
const
byte
*
p
;
while
(
s0
<
e0
)
{
while
(
s0
<
e0
&&
cs
->
to_upper
[
*
s0
++
]
!=
cs
->
to_upper
[
*
s1
])
/* no-op */
;
if
(
s0
>=
e0
)
return
0
;
p
=
s1
+
1
;
while
(
s0
<
e0
&&
p
<
e1
&&
cs
->
to_upper
[
*
s0
++
]
==
cs
->
to_upper
[
*
p
++
])
/* no-op */
;
if
(
p
>=
e1
)
return
1
;
}
return
0
;
}
void
_ftb_climb_the_tree
(
FTB
*
ftb
,
FTB_WORD
*
ftbw
,
FT_SEG_ITERATOR
*
ftsi_orig
)
{
FT_SEG_ITERATOR
ftsi
;
FTB_EXPR
*
ftbe
;
float
weight
=
ftbw
->
weight
;
int
yn
=
ftbw
->
flags
,
ythresh
;
int
yn
=
ftbw
->
flags
,
ythresh
,
mode
=
(
ftsi_orig
!=
0
)
;
my_off_t
curdoc
=
ftbw
->
docid
[
mode
];
for
(
ftbe
=
ftbw
->
up
;
ftbe
;
ftbe
=
ftbe
->
up
)
...
...
@@ -300,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode)
{
yn
=
ftbe
->
flags
;
weight
=
ftbe
->
cur_weight
*
ftbe
->
weight
;
if
(
mode
&&
ftbe
->
quot
)
{
int
not_found
=
1
;
memcpy
(
&
ftsi
,
ftsi_orig
,
sizeof
(
ftsi
));
while
(
_mi_ft_segiterator
(
&
ftsi
)
&&
not_found
)
{
if
(
!
ftsi
.
pos
)
continue
;
not_found
=
!
_ftb_strstr
(
ftsi
.
pos
,
ftsi
.
pos
+
ftsi
.
len
,
ftbe
->
quot
,
ftbe
->
qend
,
ftb
->
charset
);
}
if
(
not_found
)
break
;
}
/* ftbe->quot */
}
else
break
;
...
...
@@ -356,7 +397,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
{
while
(
curdoc
==
(
ftbw
=
(
FTB_WORD
*
)
queue_top
(
&
ftb
->
queue
))
->
docid
[
0
])
{
_ftb_climb_the_tree
(
ftb
w
,
0
);
_ftb_climb_the_tree
(
ftb
,
ftbw
,
0
);
/* update queue */
r
=
_mi_search
(
info
,
keyinfo
,
(
uchar
*
)
ftbw
->
word
,
USE_WHOLE_KEY
,
...
...
@@ -367,7 +408,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
info
->
lastkey
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
word
+
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
ftbw
->
len
-
(
ftbw
->
flags
&
FTB_FLAG_TRUNC
),
0
);
}
if
(
r
)
/* not found */
...
...
@@ -414,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
FT_WORD
word
;
FTB_WORD
*
ftbw
;
FTB_EXPR
*
ftbe
;
FT_SEG_ITERATOR
ftsi
;
FT_SEG_ITERATOR
ftsi
,
ftsi2
;
const
byte
*
end
;
my_off_t
docid
=
ftb
->
info
->
lastpos
;
...
...
@@ -423,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
if
(
!
ftb
->
queue
.
elements
)
return
0
;
#if NOT_USED
if
(
ftb
->
state
==
READY
||
ftb
->
state
==
INDEX_DONE
)
ftb
->
state
=
SCAN
;
else
if
(
ftb
->
state
!=
SCAN
)
return
-
3
.
0
;
#endif
if
(
ftb
->
keynr
==
NO_SUCH_KEY
)
_mi_ft_segiterator_dummy_init
(
record
,
length
,
&
ftsi
);
else
_mi_ft_segiterator_init
(
ftb
->
info
,
ftb
->
keynr
,
record
,
&
ftsi
);
memcpy
(
&
ftsi2
,
&
ftsi
,
sizeof
(
ftsi
));
while
(
_mi_ft_segiterator
(
&
ftsi
))
{
...
...
@@ -464,7 +499,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
if
(
ftbw
->
docid
[
1
]
==
docid
)
continue
;
ftbw
->
docid
[
1
]
=
docid
;
_ftb_climb_the_tree
(
ftb
w
,
1
);
_ftb_climb_the_tree
(
ftb
,
ftbw
,
&
ftsi2
);
}
}
}
...
...
myisam/ft_parser.c
View file @
3cc69d5a
...
...
@@ -133,13 +133,20 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
for
(;
doc
<
end
;
doc
++
)
{
if
(
true_word_char
(
*
doc
))
break
;
if
(
*
doc
==
FTB_LBR
||
*
doc
==
FTB_RBR
)
if
(
*
doc
==
FTB_RQUOT
&&
param
->
quot
)
{
param
->
quot
=
doc
-
1
;
*
start
=
doc
+
1
;
return
3
;
/* FTB_RBR */
}
if
((
*
doc
==
FTB_LBR
||
*
doc
==
FTB_RBR
||
*
doc
==
FTB_LQUOT
)
&&
!
param
->
quot
)
{
/* param->prev=' '; */
*
start
=
doc
+
1
;
if
(
*
doc
==
FTB_LQUOT
)
param
->
quot
=*
start
;
return
(
*
doc
==
FTB_RBR
)
+
2
;
}
if
(
param
->
prev
==
' '
)
if
(
param
->
prev
==
' '
&&
!
param
->
quot
)
{
if
(
*
doc
==
FTB_YES
)
{
param
->
yesno
=+
1
;
continue
;
}
else
if
(
*
doc
==
FTB_EGAL
)
{
param
->
yesno
=
0
;
continue
;
}
else
...
...
@@ -149,7 +156,8 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if
(
*
doc
==
FTB_NEG
)
{
param
->
pmsign
=!
param
->
pmsign
;
continue
;
}
}
param
->
prev
=*
doc
;
param
->
yesno
=
param
->
plusminus
=
param
->
pmsign
=
0
;
param
->
yesno
=
(
param
->
quot
!=
0
);
param
->
plusminus
=
param
->
pmsign
=
0
;
}
mwc
=
0
;
...
...
myisam/ftdefs.h
View file @
3cc69d5a
...
...
@@ -95,6 +95,8 @@ extern ulong collstat;
#define FTB_RBR (ft_boolean_syntax[6])
#define FTB_NEG (ft_boolean_syntax[7])
#define FTB_TRUNC (ft_boolean_syntax[8])
#define FTB_LQUOT (ft_boolean_syntax[10])
#define FTB_RQUOT (ft_boolean_syntax[11])
typedef
struct
st_ft_word
{
byte
*
pos
;
...
...
@@ -111,6 +113,7 @@ typedef struct st_ftb_param {
int
plusminus
;
bool
pmsign
;
bool
trunc
;
byte
*
quot
;
}
FTB_PARAM
;
int
is_stopword
(
char
*
word
,
uint
len
);
...
...
@@ -132,7 +135,7 @@ uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
void
ft_parse_init
(
TREE
*
,
CHARSET_INFO
*
);
int
ft_parse
(
TREE
*
,
byte
*
,
int
);
FT_WORD
*
ft_linearize
(
/*MI_INFO *, uint, byte *, */
TREE
*
);
FT_WORD
*
ft_linearize
(
TREE
*
);
FT_WORD
*
_mi_ft_parserecord
(
MI_INFO
*
,
uint
,
byte
*
,
const
byte
*
);
uint
_mi_ft_parse
(
TREE
*
parsed
,
MI_INFO
*
info
,
uint
keynr
,
const
byte
*
record
);
...
...
mysql-test/r/fulltext.result
View file @
3cc69d5a
...
...
@@ -67,6 +67,9 @@ Full-text indexes are called collections 1
Only MyISAM tables support collections 2
Function MATCH ... AGAINST() is used to do a search 0
Full-text search in MySQL implements vector space model 0
select * from t1 where MATCH a,b AGAINST ('"Now sUPPort"' IN BOOLEAN MODE);
a b
MySQL has now support for full-text search
select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE);
a b
Full-text search in MySQL implements vector space model
...
...
mysql-test/t/fulltext.test
View file @
3cc69d5a
...
...
@@ -20,7 +20,6 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
# UNION of fulltext's
select
*
from
t1
where
MATCH
(
a
,
b
)
AGAINST
(
"collections"
)
UNION
ALL
select
*
from
t1
where
MATCH
(
a
,
b
)
AGAINST
(
"indexes"
);
# boolean search
select
*
from
t1
where
MATCH
(
a
,
b
)
AGAINST
(
"support -collections"
IN
BOOLEAN
MODE
);
...
...
@@ -34,6 +33,8 @@ select * from t1 where MATCH(a,b) AGAINST("+search -(support vector)" IN BOOLEAN
select
*
,
MATCH
(
a
,
b
)
AGAINST
(
"support collections"
IN
BOOLEAN
MODE
)
as
x
from
t1
;
select
*
,
MATCH
(
a
,
b
)
AGAINST
(
"collections support"
IN
BOOLEAN
MODE
)
as
x
from
t1
;
select
*
from
t1
where
MATCH
a
,
b
AGAINST
(
'"Now sUPPort"'
IN
BOOLEAN
MODE
);
# boolean w/o index:
select
*
from
t1
where
MATCH
a
AGAINST
(
"search"
IN
BOOLEAN
MODE
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment