Commit adf568cc authored by Michal Čihař's avatar Michal Čihař

Limit search options

parent 963801ed
...@@ -205,18 +205,19 @@ class UnitManager(models.Manager): ...@@ -205,18 +205,19 @@ class UnitManager(models.Manager):
return self.filter(checksum__in = ret) return self.filter(checksum__in = ret)
def similar(self, unit): def similar(self, unit):
import whoosh.classify
ret = set() ret = set()
with trans.search.get_source_searcher() as searcher: with trans.search.get_source_searcher() as searcher:
# Extract up to 10 terms from the source # Extract up to 10 terms from the source
terms = [t[0] for t in searcher.key_terms_from_text('source', unit.source, numterms = 10)] terms = [t[0] for t in searcher.key_terms_from_text('source', unit.source, numterms = 10)]
cnt = len(terms) cnt = len(terms)
# Try to find 10 similar string, remove up to 5 words # Try to find 10 similar string, remove up to 4 words
while len(ret) < 10 and cnt > 0 and len(terms) - cnt < 5: while len(ret) < 10 and cnt > 0 and len(terms) - cnt < 4:
for search in itertools.combinations(terms, cnt): for search in itertools.combinations(terms, cnt):
ret = ret.union(self.search(' '.join(search), True, False, False, True)) ret = ret.union(self.search(' '.join(search), True, False, False, True))
cnt -= 1 cnt -= 1
ret.remove(unit.checksum)
return self.filter( return self.filter(
translation__subproject__project = unit.translation.subproject.project, translation__subproject__project = unit.translation.subproject.project,
translation__language = unit.translation.language, translation__language = unit.translation.language,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment