Commit 58e5c092 authored by Martin v. Löwis's avatar Martin v. Löwis

Patch #633547: Support plural forms. Do TODOs in test suite.

parent 4ef0a445
......@@ -69,6 +69,32 @@ Like \function{gettext()}, but look the message up in the specified
\var{domain}.
\end{funcdesc}
\begin{funcdesc}{ngettext}{singular, plural, n}
Like \function{gettext()}, but consider plural forms. If a translation
is found, apply the plural formula to \var{n}, and return the
resulting message (some languages have more than two plural forms).
If no translation is found, return \var{singular} if \var{n} is 1;
return \var{plural} otherwise.
The Plural formula is taken from the catalog header. It is a C or
Python expression that has a free variable n; the expression evaluates
to the index of the plural in the catalog. See the GNU gettext
documentation for the precise syntax to be used in .po files, and the
formulas for a variety of languages.
\versionadded{2.3}
\end{funcdesc}
\begin{funcdesc}{dngettext}{domain, singular, plural, n}
Like \function{ngettext()}, but look the message up in the specified
\var{domain}.
\versionadded{2.3}
\end{funcdesc}
Note that GNU \program{gettext} also defines a \function{dcgettext()}
method, but this was deemed not useful and so it is currently
unimplemented.
......@@ -207,6 +233,21 @@ Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ngettext}{singular, plural, n}
If a fallback has been set, forward \method{ngettext} to the fallback.
Otherwise, return the translated message. Overridden in derived classes.
\versionadded{2.3}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{ungettext}{singular, plural, n}
If a fallback has been set, forward \method{ungettext} to the fallback.
Otherwise, return the translated message as a Unicode string.
Overridden in derived classes.
\versionadded{2.3}
\end{methoddesc}
\begin{methoddesc}[NullTranslations]{info}{}
Return the ``protected'' \member{_info} variable.
\end{methoddesc}
......@@ -263,6 +304,9 @@ returns a Unicode string by passing both the translated message string
and the value of the ``protected'' \member{_charset} variable to the
builtin \function{unicode()} function.
To facilitate plural forms, the methods \method{ngettext} and
\method{ungettext} are overridden as well.
\subsubsection{Solaris message catalog support}
The Solaris operating system defines its own binary
......@@ -534,6 +578,7 @@ this module:
\begin{itemize}
\item Peter Funk
\item James Henstridge
\Juan David Ib\'a\~nez Palomar
\item Marc-Andr\'e Lemburg
\item Martin von L\"owis
\item Fran\c cois Pinard
......
......@@ -32,6 +32,8 @@ internationalized, to the local language and cultural habits.
# Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
# module.
#
# J. David Ibanez implemented plural forms.
#
# TODO:
# - Lazy loading of .mo files. Currently the entire catalog is loaded into
# memory, but that's probably bad for large translated programs. Instead,
......@@ -43,18 +45,76 @@ internationalized, to the local language and cultural habits.
# - Support Solaris .mo file formats. Unfortunately, we've been unable to
# find this format documented anywhere.
import os
import sys
import struct
import copy
import copy, os, re, struct, sys
from errno import ENOENT
__all__ = ["bindtextdomain","textdomain","gettext","dgettext",
"find","translation","install","Catalog"]
_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
def test(condition, true, false):
"""
Implements the C expression:
condition ? true : false
Required to correctly interpret plural forms.
"""
if condition:
return true
else:
return false
def c2py(plural):
"""
Gets a C expression as used in PO files for plural forms and
returns a Python lambda function that implements an equivalent
expression.
"""
# Security check, allow only the "n" identifier
from StringIO import StringIO
import token, tokenize
tokens = tokenize.generate_tokens(StringIO(plural).readline)
danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
if danger:
raise ValueError, 'dangerous expression'
# Replace some C operators by their Python equivalents
plural = plural.replace('&&', ' and ')
plural = plural.replace('||', ' or ')
expr = re.compile(r'\![^=]')
plural = expr.sub(' not ', plural)
# Regular expression and replacement function used to transform
# "a?b:c" to "test(a,b,c)".
expr = re.compile(r'(.*?)\?(.*?):(.*)')
def repl(x):
return "test(%s, %s, %s)" % (x.group(1), x.group(2),
expr.sub(repl, x.group(3)))
# Code to transform the plural expression, taking care of parentheses
stack = ['']
for c in plural:
if c == '(':
stack.append('')
elif c == ')':
if len(stack) == 0:
raise ValueError, 'unbalanced parenthesis in plural form'
s = expr.sub(repl, stack.pop())
stack[-1] += '(%s)' % s
else:
stack[-1] += c
plural = expr.sub(repl, stack.pop())
return eval('lambda n: int(%s)' % plural)
def _expand_lang(locale):
from locale import normalize
......@@ -121,11 +181,27 @@ class NullTranslations:
return self._fallback.gettext(message)
return message
def ngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
return unicode(message)
def ungettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1:
return unicode(msgid1)
else:
return unicode(msgid2)
def info(self):
return self._info
......@@ -169,8 +245,16 @@ class GNUTranslations(NullTranslations):
tlen, toff = unpack(ii, buf[transidx:transidx+8])
tend = toff + tlen
if mend < buflen and tend < buflen:
msg = buf[moff:mend]
tmsg = buf[toff:tend]
catalog[buf[moff:mend]] = tmsg
if msg.find('\x00') >= 0:
# Plural forms
msgid1, msgid2 = msg.split('\x00')
tmsg = tmsg.split('\x00')
for i in range(len(tmsg)):
catalog[(msgid1, i)] = tmsg[i]
else:
catalog[msg] = tmsg
else:
raise IOError(0, 'File is corrupt', filename)
# See if we're looking at GNU .mo conventions for metadata
......@@ -186,6 +270,12 @@ class GNUTranslations(NullTranslations):
self._info[k] = v
if k == 'content-type':
self._charset = v.split('charset=')[1]
elif k == 'plural-forms':
v = v.split(';')
## nplurals = v[0].split('nplurals=')[1]
## nplurals = int(nplurals.strip())
plural = v[1].split('plural=')[1]
self.plural = c2py(plural)
# advance to next entry in the seek tables
masteridx += 8
transidx += 8
......@@ -198,6 +288,19 @@ class GNUTranslations(NullTranslations):
return self._fallback.gettext(message)
return message
def ngettext(self, msgid1, msgid2, n):
try:
return self._catalog[(msgid1, self.plural(n))]
except KeyError:
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
try:
tmsg = self._catalog[message]
......@@ -208,6 +311,18 @@ class GNUTranslations(NullTranslations):
return unicode(tmsg, self._charset)
def ungettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
except KeyError:
if self._fallback:
return self._fallback.ungettext(msgid1, msgid2, n)
if n == 1:
tmsg = msgid1
else:
tmsg = msgid2
return unicode(tmsg, self._charset)
# Locate a .mo file using the gettext strategy
def find(domain, localedir=None, languages=None, all=0):
......@@ -311,10 +426,25 @@ def dgettext(domain, message):
return t.gettext(message)
def dngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None))
except IOError:
if n == 1:
return msgid1
else:
return msgid2
return t.ngettext(msgid1, msgid2, n)
def gettext(message):
return dgettext(_current_domain, message)
def ngettext(msgid1, msgid2, n):
return dngettext(_current_domain, msgid1, msgid2, n)
# dcgettext() has been deemed unnecessary and is not implemented.
# James Henstridge's Catalog constructor from GNOME gettext. Documented usage
......
test_gettext
test api 1
installing gettext
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
Guvf zbqhyr cebivqrf vagreangvbanyvmngvba naq ybpnyvmngvba
fhccbeg sbe lbhe Clguba cebtenzf ol cebivqvat na vagresnpr gb gur TAH
trggrkg zrffntr pngnybt yvoenel.
wink wink
bacon
test api 2
True
gettext
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
albatross
bacon
Throatwobbler Mangrove
wink wink
Guvf zbqhyr cebivqrf vagreangvbanyvmngvba naq ybpnyvmngvba
fhccbeg sbe lbhe Clguba cebtenzf ol cebivqvat na vagresnpr gb gur TAH
trggrkg zrffntr pngnybt yvoenel.
This diff is collapsed.
......@@ -251,6 +251,7 @@ Michael Hudson
Jim Hugunin
Greg Humphreys
Jeremy Hylton
Juan David Ibez Palomar
Tony Ingraldi
John Interrante
Ben Jackson
......
......@@ -577,7 +577,8 @@ Library
- gettext.translation has an optional fallback argument, and
gettext.find an optional all argument. Translations will now fallback
on a per-message basis.
on a per-message basis. The module supports plural forms, by means
of gettext.[d]ngettext and Translation.[u]ngettext.
- distutils bdist commands now offer a --skip-build option.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment