Commit 162e7996 authored by Michal Čihař's avatar Michal Čihař

Implement search using Whoosh

parent 5a284615
......@@ -3,6 +3,8 @@ from django.conf import settings
from lang.models import Language
from whoosh import qparser
from util import is_plural, split_plural, join_plural, msg_checksum
import trans.search
......@@ -178,21 +180,27 @@ class UnitManager(models.Manager):
self.add_to_source_index(unit.checksum, unit.source, unit.context, writer_source)
self.add_to_target_index(unit.checksum, unit.target, writer_target)
def search(self, query, source = True, translation = True):
from trans.models import Unit
if isinstance(query, str) or isinstance(query, unicode):
# split the string into a list of search terms
query = self.separate_words(query)
elif not isinstance(query, list) and not isinstance(query, tuple):
raise TypeError("search must be called with a string or a list")
p = settings.SEARCH_STEMMER()
# lowercase and stem each word
stemmed_query = [p.stem(s.lower()) for s in query if s != '']
# get a row from the db for each matching word
rows = self.__get_match_rows(stemmed_query, language)
if rows == []:
return self.none()
return self.filter(pk__in = rows)
def search(self, query, source = True, context = True, translation = True):
ret = []
sample = self.all()[0]
if source or context:
with trans.search.get_source_searcher() as searcher:
if source:
qp = qparser.QueryParser('source', trans.search.SourceSchema())
q = qp.parse(query)
for doc in searcher.docs_for_query(q):
ret.append(searcher.stored_fields(doc)['checksum'])
if context:
qp = qparser.QueryParser('context', trans.search.SourceSchema())
q = qp.parse(query)
for doc in searcher.docs_for_query(q):
ret.append(searcher.stored_fields(doc)['checksum'])
if translation:
with trans.search.get_target_searcher(sample.translation.language.code) as searcher:
qp = qparser.QueryParser('target', trans.search.TargetSchema())
q = qp.parse(query)
for doc in searcher.docs_for_query(q):
ret.append(searcher.stored_fields(doc)['checksum'])
return self.filter(checksum__in = ret)
......@@ -10,7 +10,7 @@ from django.conf import settings
from whoosh import index
from whoosh.writing import BufferedWriter
class TranslationSchema(SchemaClass):
class TargetSchema(SchemaClass):
checksum = ID(stored = True)
target = TEXT
......@@ -29,7 +29,7 @@ def create_source_index():
def create_target_index(lang):
ix_target = index.create_in(
settings.WHOOSH_INDEX,
schema = TranslationSchema,
schema = TargetSchema,
indexname = 'target-%s' % lang
)
......@@ -69,8 +69,8 @@ def get_target_writer(lang, buffered = True):
if not buffered:
return get_target_index(lang).writer()
if not hasattr(get_target_writer, 'target_writer'):
get_target_index.target_writer = {}
if not lang in get_target_index.target_writer:
get_target_writer.target_writer = {}
if not lang in get_target_writer.target_writer:
get_target_writer.target_writer[lang] = BufferedWriter(get_target_index(lang))
return get_target_writer.target_writer[lang]
......
......@@ -302,7 +302,7 @@ def translate(request, project, subproject, lang):
query |= Q(context = search_query)
units = units.filter(query)
else:
units = obj.unit_set.search(search_query, search_source, search _target)
units = obj.unit_set.search(search_query, search_source, search_context, search_target)
if direction == 'stay':
units = units.filter(position = pos)
elif direction == 'back':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment