Commit 16306348 authored by Michal Čihař's avatar Michal Čihař

Catch errors from Whoosh analyzers

Some (especially language specific ones) do not properly handle Unicode
strings, so let's hide this error.
Signed-off-by: default avatarMichal Čihař <michal@cihar.com>
parent 6b86e1cf
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# #
import sys
from django.db import models from django.db import models
from django.db.models import Q from django.db.models import Q
from django.utils.encoding import force_unicode from django.utils.encoding import force_unicode
...@@ -25,6 +26,7 @@ from weblate.lang.models import Language ...@@ -25,6 +26,7 @@ from weblate.lang.models import Language
from weblate.trans.formats import AutoFormat, StringIOMode from weblate.trans.formats import AutoFormat, StringIOMode
from weblate.trans.models.project import Project from weblate.trans.models.project import Project
from translate.storage.csvl10n import csvfile from translate.storage.csvl10n import csvfile
from weblate.trans.util import report_error
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from whoosh.analysis import ( from whoosh.analysis import (
LanguageAnalyzer, StandardAnalyzer, StemmingAnalyzer, NgramAnalyzer LanguageAnalyzer, StandardAnalyzer, StemmingAnalyzer, NgramAnalyzer
...@@ -151,9 +153,13 @@ class DictionaryManager(models.Manager): ...@@ -151,9 +153,13 @@ class DictionaryManager(models.Manager):
# Extract words from all plurals and from context # Extract words from all plurals and from context
for text in unit.get_source_plurals() + [unit.context]: for text in unit.get_source_plurals() + [unit.context]:
for analyzer in analyzers: for analyzer in analyzers:
# Some Whoosh analyzers break on unicode
try:
words = words.union( words = words.union(
[token.text for token in analyzer(force_unicode(text))] [token.text for token in analyzer(force_unicode(text))]
) )
except UnicodeDecodeError as error:
report_error(error, sys.exc_info())
# Grab all words in the dictionary # Grab all words in the dictionary
dictionary = self.filter( dictionary = self.filter(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment