GNUTranslations:

__init__(): Removed since we no longer need the coerce flag. Message ids and strings are now always coerced to Unicode, /if/ the catalog specified a charset parameter. gettext(), ngettext(): Since the message strings are Unicodes in the catalog, coerce back to encoded 8-bit strings on return. ugettext(), ungettext(): Coerce the message ids to Unicode when there's no entry for the id in the catalog. Minor code cleanups; use booleans where appropriate.

GNUTranslations:
__init__(): Removed since we no longer need the coerce flag. Message ids and strings are now always coerced to Unicode, /if/ the catalog specified a charset parameter. gettext(), ngettext(): Since the message strings are Unicodes in the catalog, coerce back to encoded 8-bit strings on return. ugettext(), ungettext(): Coerce the message ids to Unicode when there's no entry for the id in the catalog. Minor code cleanups; use booleans where appropriate.
c4acc2bd · Barry Warsaw · edb155fd · c4acc2bd
Commit c4acc2bd authored Apr 24, 2003 by Barry Warsaw
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 33 deletions

Lib/gettext.py Lib/gettext.py +33 -33

No files found.
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -73,17 +73,15 @@ def test(condition, true, false):
 def c2py(plural):
-    """
+    """Gets a C expression as used in PO files for plural forms and returns a
-    Gets a C expression as used in PO files for plural forms and
+    Python lambda function that implements an equivalent expression.
-    returns a Python lambda function that implements an equivalent
-    expression.
    """
    # Security check, allow only the "n" identifier
    from StringIO import StringIO
    import token, tokenize
    tokens = tokenize.generate_tokens(StringIO(plural).readline)
    try:
-        danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
+        danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
    except tokenize.TokenError:
        raise ValueError, \
              'plural forms expression error, maybe unbalanced parenthesis'
@@ -218,7 +216,7 @@ class NullTranslations:
    def charset(self):
        return self._charset
-    def install(self, unicode=0):
+    def install(self, unicode=False):
        import __builtin__
        __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
@@ -228,12 +226,6 @@ class GNUTranslations(NullTranslations):
    LE_MAGIC = 0x950412deL
    BE_MAGIC = 0xde120495L
-    def __init__(self, fp=None, coerce=False):
-        # Set this attribute before calling the base class constructor, since
-        # the latter calls _parse() which depends on self._coerce.
-        self._coerce = coerce
-        NullTranslations.__init__(self, fp)
    def _parse(self, fp):
        """Override this method to support alternative .mo formats."""
        unpack = struct.unpack
@@ -281,21 +273,28 @@ class GNUTranslations(NullTranslations):
                        self._charset = v.split('charset=')[1]
                    elif k == 'plural-forms':
                        v = v.split(';')
-##                        nplurals = v[0].split('nplurals=')[1]
-##                        nplurals = int(nplurals.strip())
                        plural = v[1].split('plural=')[1]
                        self.plural = c2py(plural)
+            # Note: we unconditionally convert both msgids and msgstrs to
+            # Unicode using the character encoding specified in the charset
+            # parameter of the Content-Type header.  The gettext documentation
+            # strongly encourages msgids to be us-ascii, but some appliations
+            # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
+            # traditional gettext applications, the msgid conversion will
+            # cause no problems since us-ascii should always be a subset of
+            # the charset encoding.  We may want to fall back to 8-bit msgids
+            # if the Unicode conversion fails.
            if msg.find('\x00') >= 0:
                # Plural forms
                msgid1, msgid2 = msg.split('\x00')
                tmsg = tmsg.split('\x00')
-                if self._coerce:
+                if self._charset:
                    msgid1 = unicode(msgid1, self._charset)
                    tmsg = [unicode(x, self._charset) for x in tmsg]
                for i in range(len(tmsg)):
                    catalog[(msgid1, i)] = tmsg[i]
            else:
-                if self._coerce:
+                if self._charset:
                    msg = unicode(msg, self._charset)
                    tmsg = unicode(tmsg, self._charset)
                catalog[msg] = tmsg
@@ -304,16 +303,23 @@ class GNUTranslations(NullTranslations):
            transidx += 8
    def gettext(self, message):
-        try:
+        missing = object()
-            return self._catalog[message]
+        tmsg = self._catalog.get(message, missing)
-        except KeyError:
+        if tmsg is missing:
            if self._fallback:
                return self._fallback.gettext(message)
            return message
+        # Encode the Unicode tmsg back to an 8-bit string, if possible
+        if self._charset:
+            return tmsg.encode(self._charset)
+        return tmsg
    def ngettext(self, msgid1, msgid2, n):
        try:
-            return self._catalog[(msgid1, self.plural(n))]
+            tmsg = self._catalog[(msgid1, self.plural(n))]
+            if self._charset:
+                return tmsg.encode(self._charset)
+            return tmsg
        except KeyError:
            if self._fallback:
                return self._fallback.ngettext(msgid1, msgid2, n)
@@ -328,10 +334,7 @@ class GNUTranslations(NullTranslations):
        if tmsg is missing:
            if self._fallback:
                return self._fallback.ugettext(message)
-            tmsg = message
+            return unicode(message)
-        if not self._coerce:
-            return unicode(tmsg, self._charset)
-        # The msgstr is already coerced to Unicode
        return tmsg
    def ungettext(self, msgid1, msgid2, n):
@@ -341,12 +344,9 @@ class GNUTranslations(NullTranslations):
            if self._fallback:
                return self._fallback.ungettext(msgid1, msgid2, n)
            if n == 1:
-                tmsg = msgid1
+                tmsg = unicode(msgid1)
            else:
-                tmsg = msgid2
+                tmsg = unicode(msgid2)
-        if not self._coerce:
-            return unicode(tmsg, self._charset)
-        # The msgstr is already coerced to Unicode
        return tmsg
@@ -392,11 +392,11 @@ def find(domain, localedir=None, languages=None, all=0):
 _translations = {}
 def translation(domain, localedir=None, languages=None,
-                class_=None, fallback=0):
+                class_=None, fallback=False):
    if class_ is None:
        class_ = GNUTranslations
    mofiles = find(domain, localedir, languages, all=1)
-    if len(mofiles)==0:
+    if not mofiles:
        if fallback:
            return NullTranslations()
        raise IOError(ENOENT, 'No translation file found for domain', domain)
@@ -419,8 +419,8 @@ def translation(domain, localedir=None, languages=None,
    return result
-def install(domain, localedir=None, unicode=0):
+def install(domain, localedir=None, unicode=False):
-    translation(domain, localedir, fallback=1).install(unicode)
+    translation(domain, localedir, fallback=True).install(unicode)