Commit 16306348 authored by Michal Čihař's avatar Michal Čihař

Catch errors from Whoosh analyzers

Some (especially language specific ones) do not properly handle Unicode
strings, so let's hide this error.
Signed-off-by: default avatarMichal Čihař <michal@cihar.com>
parent 6b86e1cf
......@@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import sys
from django.db import models
from django.db.models import Q
from django.utils.encoding import force_unicode
......@@ -25,6 +26,7 @@ from weblate.lang.models import Language
from weblate.trans.formats import AutoFormat, StringIOMode
from weblate.trans.models.project import Project
from translate.storage.csvl10n import csvfile
from weblate.trans.util import report_error
from django.core.urlresolvers import reverse
from whoosh.analysis import (
LanguageAnalyzer, StandardAnalyzer, StemmingAnalyzer, NgramAnalyzer
......@@ -151,9 +153,13 @@ class DictionaryManager(models.Manager):
# Extract words from all plurals and from context
for text in unit.get_source_plurals() + [unit.context]:
for analyzer in analyzers:
words = words.union(
[token.text for token in analyzer(force_unicode(text))]
)
# Some Whoosh analyzers break on unicode
try:
words = words.union(
[token.text for token in analyzer(force_unicode(text))]
)
except UnicodeDecodeError as error:
report_error(error, sys.exc_info())
# Grab all words in the dictionary
dictionary = self.filter(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment