Commit c4acc2bd authored by Barry Warsaw's avatar Barry Warsaw

GNUTranslations:

    __init__(): Removed since we no longer need the coerce flag.
    Message ids and strings are now always coerced to Unicode, /if/
    the catalog specified a charset parameter.

    gettext(), ngettext(): Since the message strings are Unicodes in
    the catalog, coerce back to encoded 8-bit strings on return.

    ugettext(), ungettext(): Coerce the message ids to Unicode when
    there's no entry for the id in the catalog.

Minor code cleanups; use booleans where appropriate.
parent edb155fd
...@@ -73,17 +73,15 @@ def test(condition, true, false): ...@@ -73,17 +73,15 @@ def test(condition, true, false):
def c2py(plural): def c2py(plural):
""" """Gets a C expression as used in PO files for plural forms and returns a
Gets a C expression as used in PO files for plural forms and Python lambda function that implements an equivalent expression.
returns a Python lambda function that implements an equivalent
expression.
""" """
# Security check, allow only the "n" identifier # Security check, allow only the "n" identifier
from StringIO import StringIO from StringIO import StringIO
import token, tokenize import token, tokenize
tokens = tokenize.generate_tokens(StringIO(plural).readline) tokens = tokenize.generate_tokens(StringIO(plural).readline)
try: try:
danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ] danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
except tokenize.TokenError: except tokenize.TokenError:
raise ValueError, \ raise ValueError, \
'plural forms expression error, maybe unbalanced parenthesis' 'plural forms expression error, maybe unbalanced parenthesis'
...@@ -218,7 +216,7 @@ class NullTranslations: ...@@ -218,7 +216,7 @@ class NullTranslations:
def charset(self): def charset(self):
return self._charset return self._charset
def install(self, unicode=0): def install(self, unicode=False):
import __builtin__ import __builtin__
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
...@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations): ...@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):
LE_MAGIC = 0x950412deL LE_MAGIC = 0x950412deL
BE_MAGIC = 0xde120495L BE_MAGIC = 0xde120495L
def __init__(self, fp=None, coerce=False):
# Set this attribute before calling the base class constructor, since
# the latter calls _parse() which depends on self._coerce.
self._coerce = coerce
NullTranslations.__init__(self, fp)
def _parse(self, fp): def _parse(self, fp):
"""Override this method to support alternative .mo formats.""" """Override this method to support alternative .mo formats."""
unpack = struct.unpack unpack = struct.unpack
...@@ -281,21 +273,28 @@ class GNUTranslations(NullTranslations): ...@@ -281,21 +273,28 @@ class GNUTranslations(NullTranslations):
self._charset = v.split('charset=')[1] self._charset = v.split('charset=')[1]
elif k == 'plural-forms': elif k == 'plural-forms':
v = v.split(';') v = v.split(';')
## nplurals = v[0].split('nplurals=')[1]
## nplurals = int(nplurals.strip())
plural = v[1].split('plural=')[1] plural = v[1].split('plural=')[1]
self.plural = c2py(plural) self.plural = c2py(plural)
# Note: we unconditionally convert both msgids and msgstrs to
# Unicode using the character encoding specified in the charset
# parameter of the Content-Type header. The gettext documentation
# strongly encourages msgids to be us-ascii, but some appliations
# require alternative encodings (e.g. Zope's ZCML and ZPT). For
# traditional gettext applications, the msgid conversion will
# cause no problems since us-ascii should always be a subset of
# the charset encoding. We may want to fall back to 8-bit msgids
# if the Unicode conversion fails.
if msg.find('\x00') >= 0: if msg.find('\x00') >= 0:
# Plural forms # Plural forms
msgid1, msgid2 = msg.split('\x00') msgid1, msgid2 = msg.split('\x00')
tmsg = tmsg.split('\x00') tmsg = tmsg.split('\x00')
if self._coerce: if self._charset:
msgid1 = unicode(msgid1, self._charset) msgid1 = unicode(msgid1, self._charset)
tmsg = [unicode(x, self._charset) for x in tmsg] tmsg = [unicode(x, self._charset) for x in tmsg]
for i in range(len(tmsg)): for i in range(len(tmsg)):
catalog[(msgid1, i)] = tmsg[i] catalog[(msgid1, i)] = tmsg[i]
else: else:
if self._coerce: if self._charset:
msg = unicode(msg, self._charset) msg = unicode(msg, self._charset)
tmsg = unicode(tmsg, self._charset) tmsg = unicode(tmsg, self._charset)
catalog[msg] = tmsg catalog[msg] = tmsg
...@@ -304,16 +303,23 @@ class GNUTranslations(NullTranslations): ...@@ -304,16 +303,23 @@ class GNUTranslations(NullTranslations):
transidx += 8 transidx += 8
def gettext(self, message): def gettext(self, message):
try: missing = object()
return self._catalog[message] tmsg = self._catalog.get(message, missing)
except KeyError: if tmsg is missing:
if self._fallback: if self._fallback:
return self._fallback.gettext(message) return self._fallback.gettext(message)
return message return message
# Encode the Unicode tmsg back to an 8-bit string, if possible
if self._charset:
return tmsg.encode(self._charset)
return tmsg
def ngettext(self, msgid1, msgid2, n): def ngettext(self, msgid1, msgid2, n):
try: try:
return self._catalog[(msgid1, self.plural(n))] tmsg = self._catalog[(msgid1, self.plural(n))]
if self._charset:
return tmsg.encode(self._charset)
return tmsg
except KeyError: except KeyError:
if self._fallback: if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n) return self._fallback.ngettext(msgid1, msgid2, n)
...@@ -328,10 +334,7 @@ class GNUTranslations(NullTranslations): ...@@ -328,10 +334,7 @@ class GNUTranslations(NullTranslations):
if tmsg is missing: if tmsg is missing:
if self._fallback: if self._fallback:
return self._fallback.ugettext(message) return self._fallback.ugettext(message)
tmsg = message return unicode(message)
if not self._coerce:
return unicode(tmsg, self._charset)
# The msgstr is already coerced to Unicode
return tmsg return tmsg
def ungettext(self, msgid1, msgid2, n): def ungettext(self, msgid1, msgid2, n):
...@@ -341,12 +344,9 @@ class GNUTranslations(NullTranslations): ...@@ -341,12 +344,9 @@ class GNUTranslations(NullTranslations):
if self._fallback: if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n) return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1: if n == 1:
tmsg = msgid1 tmsg = unicode(msgid1)
else: else:
tmsg = msgid2 tmsg = unicode(msgid2)
if not self._coerce:
return unicode(tmsg, self._charset)
# The msgstr is already coerced to Unicode
return tmsg return tmsg
...@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0): ...@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):
_translations = {} _translations = {}
def translation(domain, localedir=None, languages=None, def translation(domain, localedir=None, languages=None,
class_=None, fallback=0): class_=None, fallback=False):
if class_ is None: if class_ is None:
class_ = GNUTranslations class_ = GNUTranslations
mofiles = find(domain, localedir, languages, all=1) mofiles = find(domain, localedir, languages, all=1)
if len(mofiles)==0: if not mofiles:
if fallback: if fallback:
return NullTranslations() return NullTranslations()
raise IOError(ENOENT, 'No translation file found for domain', domain) raise IOError(ENOENT, 'No translation file found for domain', domain)
...@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None, ...@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,
return result return result
def install(domain, localedir=None, unicode=0): def install(domain, localedir=None, unicode=False):
translation(domain, localedir, fallback=1).install(unicode) translation(domain, localedir, fallback=True).install(unicode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment