bpo-29755: Fixed the lgettext() family of functions in the gettext module. (#2266)

They now always return bytes. Updated the gettext documentation.

bpo-29755: Fixed the lgettext() family of functions in the gettext module. (#2266)
They now always return bytes. Updated the gettext documentation.
26cb4657 · Serhiy Storchaka · GitHub · 8457706e · 26cb4657 · 26cb4657
Commit 26cb4657 authored Jun 20, 2017 by Serhiy Storchaka Committed by GitHub Jun 20, 2017
Showing with 229 additions and 107 deletions

Doc/library/gettext.rst Doc/library/gettext.rst +80 -81

Lib/gettext.py Lib/gettext.py +23 -17

Lib/test/test_gettext.py Lib/test/test_gettext.py +123 -9

Misc/NEWS Misc/NEWS +3 -0

No files found.
--- a/Doc/library/gettext.rst
+++ b/Doc/library/gettext.rst
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -279,7 +279,9 @@ class NullTranslations:
    def lgettext(self, message):
        if self._fallback:
            return self._fallback.lgettext(message)
-        return message
+        if self._output_charset:
+            return message.encode(self._output_charset)
+        return message.encode(locale.getpreferredencoding())

    def ngettext(self, msgid1, msgid2, n):
        if self._fallback:
@@ -293,9 +295,12 @@ class NullTranslations:
        if self._fallback:
            return self._fallback.lngettext(msgid1, msgid2, n)
        if n == 1:
-            return msgid1
+            tmsg = msgid1
        else:
-            return msgid2
+            tmsg = msgid2
+        if self._output_charset:
+            return tmsg.encode(self._output_charset)
+        return tmsg.encode(locale.getpreferredencoding())

    def info(self):
        return self._info
@@ -377,7 +382,7 @@ class GNUTranslations(NullTranslations):
            if mlen == 0:
                # Catalog description
                lastk = None
-                for b_item in tmsg.split('\n'.encode("ascii")):
+                for b_item in tmsg.split(b'\n'):
                    item = b_item.decode().strip()
                    if not item:
                        continue
@@ -425,7 +430,7 @@ class GNUTranslations(NullTranslations):
        if tmsg is missing:
            if self._fallback:
                return self._fallback.lgettext(message)
-            return message
+            tmsg = message
        if self._output_charset:
            return tmsg.encode(self._output_charset)
        return tmsg.encode(locale.getpreferredencoding())
@@ -433,16 +438,16 @@ class GNUTranslations(NullTranslations):
    def lngettext(self, msgid1, msgid2, n):
        try:
            tmsg = self._catalog[(msgid1, self.plural(n))]
-            if self._output_charset:
-                return tmsg.encode(self._output_charset)
-            return tmsg.encode(locale.getpreferredencoding())
        except KeyError:
            if self._fallback:
                return self._fallback.lngettext(msgid1, msgid2, n)
            if n == 1:
-                return msgid1
+                tmsg = msgid1
            else:
-                return msgid2
+                tmsg = msgid2
+        if self._output_charset:
+            return tmsg.encode(self._output_charset)
+        return tmsg.encode(locale.getpreferredencoding())

    def gettext(self, message):
        missing = object()
@@ -582,11 +587,11 @@ def dgettext(domain, message):
    return t.gettext(message)

 def ldgettext(domain, message):
+    codeset = _localecodesets.get(domain)
    try:
-        t = translation(domain, _localedirs.get(domain, None),
-                        codeset=_localecodesets.get(domain))
+        t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
    except OSError:
-        return message
+        return message.encode(codeset or locale.getpreferredencoding())
    return t.lgettext(message)

 def dngettext(domain, msgid1, msgid2, n):
@@ -601,14 +606,15 @@ def dngettext(domain, msgid1, msgid2, n):
    return t.ngettext(msgid1, msgid2, n)

 def ldngettext(domain, msgid1, msgid2, n):
+    codeset = _localecodesets.get(domain)
    try:
-        t = translation(domain, _localedirs.get(domain, None),
-                        codeset=_localecodesets.get(domain))
+        t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
    except OSError:
        if n == 1:
-            return msgid1
+            tmsg = msgid1
        else:
-            return msgid2
+            tmsg = msgid2
+        return tmsg.encode(codeset or locale.getpreferredencoding())
    return t.lngettext(msgid1, msgid2, n)

 def gettext(message):

--- a/Lib/test/test_gettext.py
+++ b/Lib/test/test_gettext.py
 import os
 import base64
 import gettext
+import locale
 import unittest

 from test import support
@@ -455,6 +456,122 @@ class PluralFormsTestCase(GettextBaseTest):
        self.assertRaises(TypeError, f, object())


+class LGettextTestCase(GettextBaseTest):
+    def setUp(self):
+        GettextBaseTest.setUp(self)
+        self.mofile = MOFILE
+
+    def test_lgettext(self):
+        lgettext = gettext.lgettext
+        ldgettext = gettext.ldgettext
+        self.assertEqual(lgettext('mullusk'), b'bacon')
+        self.assertEqual(lgettext('spam'), b'spam')
+        self.assertEqual(ldgettext('gettext', 'mullusk'), b'bacon')
+        self.assertEqual(ldgettext('gettext', 'spam'), b'spam')
+
+    def test_lgettext_2(self):
+        with open(self.mofile, 'rb') as fp:
+            t = gettext.GNUTranslations(fp)
+        lgettext = t.lgettext
+        self.assertEqual(lgettext('mullusk'), b'bacon')
+        self.assertEqual(lgettext('spam'), b'spam')
+
+    def test_lgettext_bind_textdomain_codeset(self):
+        lgettext = gettext.lgettext
+        ldgettext = gettext.ldgettext
+        saved_codeset = gettext.bind_textdomain_codeset('gettext')
+        try:
+            gettext.bind_textdomain_codeset('gettext', 'utf-16')
+            self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
+            self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
+            self.assertEqual(ldgettext('gettext', 'mullusk'), 'bacon'.encode('utf-16'))
+            self.assertEqual(ldgettext('gettext', 'spam'), 'spam'.encode('utf-16'))
+        finally:
+            del gettext._localecodesets['gettext']
+            gettext.bind_textdomain_codeset('gettext', saved_codeset)
+
+    def test_lgettext_output_encoding(self):
+        with open(self.mofile, 'rb') as fp:
+            t = gettext.GNUTranslations(fp)
+        lgettext = t.lgettext
+        t.set_output_charset('utf-16')
+        self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
+        self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
+
+    def test_lngettext(self):
+        lngettext = gettext.lngettext
+        ldngettext = gettext.ldngettext
+        x = lngettext('There is %s file', 'There are %s files', 1)
+        self.assertEqual(x, b'Hay %s fichero')
+        x = lngettext('There is %s file', 'There are %s files', 2)
+        self.assertEqual(x, b'Hay %s ficheros')
+        x = lngettext('There is %s directory', 'There are %s directories', 1)
+        self.assertEqual(x, b'There is %s directory')
+        x = lngettext('There is %s directory', 'There are %s directories', 2)
+        self.assertEqual(x, b'There are %s directories')
+        x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
+        self.assertEqual(x, b'Hay %s fichero')
+        x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
+        self.assertEqual(x, b'Hay %s ficheros')
+        x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
+        self.assertEqual(x, b'There is %s directory')
+        x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
+        self.assertEqual(x, b'There are %s directories')
+
+    def test_lngettext_2(self):
+        with open(self.mofile, 'rb') as fp:
+            t = gettext.GNUTranslations(fp)
+        lngettext = t.lngettext
+        x = lngettext('There is %s file', 'There are %s files', 1)
+        self.assertEqual(x, b'Hay %s fichero')
+        x = lngettext('There is %s file', 'There are %s files', 2)
+        self.assertEqual(x, b'Hay %s ficheros')
+        x = lngettext('There is %s directory', 'There are %s directories', 1)
+        self.assertEqual(x, b'There is %s directory')
+        x = lngettext('There is %s directory', 'There are %s directories', 2)
+        self.assertEqual(x, b'There are %s directories')
+
+    def test_lngettext_bind_textdomain_codeset(self):
+        lngettext = gettext.lngettext
+        ldngettext = gettext.ldngettext
+        saved_codeset = gettext.bind_textdomain_codeset('gettext')
+        try:
+            gettext.bind_textdomain_codeset('gettext', 'utf-16')
+            x = lngettext('There is %s file', 'There are %s files', 1)
+            self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+            x = lngettext('There is %s file', 'There are %s files', 2)
+            self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+            x = lngettext('There is %s directory', 'There are %s directories', 1)
+            self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+            x = lngettext('There is %s directory', 'There are %s directories', 2)
+            self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+            x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
+            self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+            x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
+            self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+            x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
+            self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+            x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
+            self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+        finally:
+            del gettext._localecodesets['gettext']
+            gettext.bind_textdomain_codeset('gettext', saved_codeset)
+
+    def test_lngettext_output_encoding(self):
+        with open(self.mofile, 'rb') as fp:
+            t = gettext.GNUTranslations(fp)
+        lngettext = t.lngettext
+        t.set_output_charset('utf-16')
+        x = lngettext('There is %s file', 'There are %s files', 1)
+        self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
+        x = lngettext('There is %s file', 'There are %s files', 2)
+        self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
+        x = lngettext('There is %s directory', 'There are %s directories', 1)
+        self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
+        x = lngettext('There is %s directory', 'There are %s directories', 2)
+        self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
+
+
 class GNUTranslationParsingTest(GettextBaseTest):
    def test_plural_form_error_issue17898(self):
        with open(MOFILE, 'wb') as fp:
@@ -472,13 +589,10 @@ class UnicodeTranslationsTest(GettextBaseTest):
        self._ = self.t.gettext

    def test_unicode_msgid(self):
-        unless = self.assertTrue
-        unless(isinstance(self._(''), str))
-        unless(isinstance(self._(''), str))
+        self.assertIsInstance(self._(''), str)

    def test_unicode_msgstr(self):
-        eq = self.assertEqual
-        eq(self._('ab\xde'), '\xa4yz')
+        self.assertEqual(self._('ab\xde'), '\xa4yz')


 class WeirdMetadataTest(GettextBaseTest):
@@ -547,7 +661,7 @@ if __name__ == '__main__':
 # The original version was automatically generated from the sources with
 # pygettext. Later it was manually modified to add plural forms support.

-'''
+b'''
 # Dummy translation for the Python test_gettext.py module.
 # Copyright (C) 2001 Python Software Foundation
 # Barry Warsaw <barry@python.org>, 2000.
@@ -607,7 +721,7 @@ msgstr[1] "Hay %s ficheros"
 # Here's the second example po file example, used to generate the UMO_DATA
 # containing utf-8 encoded Unicode strings

-'''
+b'''
 # Dummy translation for the Python test_gettext.py module.
 # Copyright (C) 2001 Python Software Foundation
 # Barry Warsaw <barry@python.org>, 2000.
@@ -630,7 +744,7 @@ msgstr "\xc2\xa4yz"

 # Here's the third example po file, used to generate MMO_DATA

-'''
+b'''
 msgid ""
 msgstr ""
 "Project-Id-Version: No Project 0.0\n"
@@ -649,7 +763,7 @@ msgstr ""
 # messages.po, used for bug 17898
 #

-'''
+b'''
 # test file for http://bugs.python.org/issue17898
 msgid ""
 msgstr ""

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -368,6 +368,9 @@ Extension Modules
 Library
 -------

+- bpo-29755: Fixed the lgettext() family of functions in the gettext module.
+  They now always return bytes.
+
 - [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse
  fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
  correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``