Commit b89316fd authored by Georg Brandl's avatar Georg Brandl

Patch #1180296: improve locale string formatting functions

parent 9d6da3e2
...@@ -61,7 +61,7 @@ locale.setlocale(locale.LC_ALL, '') ...@@ -61,7 +61,7 @@ locale.setlocale(locale.LC_ALL, '')
Returns the database of the local conventions as a dictionary. Returns the database of the local conventions as a dictionary.
This dictionary has the following strings as keys: This dictionary has the following strings as keys:
\begin{tableiii}{l|l|p{3in}}{constant}{Key}{Category}{Meaning} \begin{tableiii}{l|l|p{3in}}{constant}{Category}{Key}{Meaning}
\lineiii{LC_NUMERIC}{\code{'decimal_point'}} \lineiii{LC_NUMERIC}{\code{'decimal_point'}}
{Decimal point character.} {Decimal point character.}
\lineiii{}{\code{'grouping'}} \lineiii{}{\code{'grouping'}}
...@@ -76,8 +76,20 @@ locale.setlocale(locale.LC_ALL, '') ...@@ -76,8 +76,20 @@ locale.setlocale(locale.LC_ALL, '')
{International currency symbol.} {International currency symbol.}
\lineiii{}{\code{'currency_symbol'}} \lineiii{}{\code{'currency_symbol'}}
{Local currency symbol.} {Local currency symbol.}
\lineiii{}{\code{'p_cs_precedes/n_cs_precedes'}}
{Whether the currency symbol precedes the value (for positive resp.
negative values).}
\lineiii{}{\code{'p_sep_by_space/n_sep_by_space'}}
{Whether the currency symbol is separated from the value
by a space (for positive resp. negative values).}
\lineiii{}{\code{'mon_decimal_point'}} \lineiii{}{\code{'mon_decimal_point'}}
{Decimal point used for monetary values.} {Decimal point used for monetary values.}
\lineiii{}{\code{'frac_digits'}}
{Number of fractional digits used in local formatting
of monetary values.}
\lineiii{}{\code{'int_frac_digits'}}
{Number of fractional digits used in international
formatting of monetary values.}
\lineiii{}{\code{'mon_thousands_sep'}} \lineiii{}{\code{'mon_thousands_sep'}}
{Group separator used for monetary values.} {Group separator used for monetary values.}
\lineiii{}{\code{'mon_grouping'}} \lineiii{}{\code{'mon_grouping'}}
...@@ -87,13 +99,12 @@ locale.setlocale(locale.LC_ALL, '') ...@@ -87,13 +99,12 @@ locale.setlocale(locale.LC_ALL, '')
{Symbol used to annotate a positive monetary value.} {Symbol used to annotate a positive monetary value.}
\lineiii{}{\code{'negative_sign'}} \lineiii{}{\code{'negative_sign'}}
{Symbol used to annotate a negative monetary value.} {Symbol used to annotate a negative monetary value.}
\lineiii{}{\code{'frac_digits'}} \lineiii{}{\code{'p_sign_posn/n_sign_posn'}}
{Number of fractional digits used in local formatting {The position of the sign (for positive resp. negative values), see below.}
of monetary values.}
\lineiii{}{\code{'int_frac_digits'}}
{Number of fractional digits used in international
formatting of monetary values.}
\end{tableiii} \end{tableiii}
All numeric values can be set to \constant{CHAR_MAX} to indicate that
there is no value specified in this locale.
The possible values for \code{'p_sign_posn'} and The possible values for \code{'p_sign_posn'} and
\code{'n_sign_posn'} are given below. \code{'n_sign_posn'} are given below.
...@@ -104,7 +115,7 @@ locale.setlocale(locale.LC_ALL, '') ...@@ -104,7 +115,7 @@ locale.setlocale(locale.LC_ALL, '')
\lineii{2}{The sign should follow the value and currency symbol.} \lineii{2}{The sign should follow the value and currency symbol.}
\lineii{3}{The sign should immediately precede the value.} \lineii{3}{The sign should immediately precede the value.}
\lineii{4}{The sign should immediately follow the value.} \lineii{4}{The sign should immediately follow the value.}
\lineii{\constant{LC_MAX}}{Nothing is specified in this locale.} \lineii{\constant{CHAR_MAX}}{Nothing is specified in this locale.}
\end{tableii} \end{tableii}
\end{funcdesc} \end{funcdesc}
...@@ -206,12 +217,44 @@ for which symbolic constants are available in the locale module. ...@@ -206,12 +217,44 @@ for which symbolic constants are available in the locale module.
strings. strings.
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{format}{format, val\optional{, grouping}} \begin{funcdesc}{format}{format, val\optional{, grouping\optional{, monetary}}}
Formats a number \var{val} according to the current Formats a number \var{val} according to the current
\constant{LC_NUMERIC} setting. The format follows the conventions \constant{LC_NUMERIC} setting. The format follows the conventions
of the \code{\%} operator. For floating point values, the decimal of the \code{\%} operator. For floating point values, the decimal
point is modified if appropriate. If \var{grouping} is true, also point is modified if appropriate. If \var{grouping} is true, also
takes the grouping into account. takes the grouping into account.
If \var{monetary} is true, the conversion uses monetary thousands
separator and grouping strings.
Please note that this function will only work for exactly one \%char
specifier. For whole format strings, use \function{format_string()}.
\versionchanged[Added the \var{monetary} parameter]{2.5}
\end{funcdesc}
\begin{funcdesc}{format_string}{format, val\optional{, grouping}}
Processes formatting specifiers as in \code{format \% val},
but takes the current locale settings into account.
\versionadded{2.5}
\end{funcdesc}
\begin{funcdesc}{currency}{val\optional{, symbol\optional{, grouping\optional{, international}}}}
Formats a number \var{val} according to the current \constant{LC_MONETARY}
settings.
The returned string includes the currency symbol if \var{symbol} is true,
which is the default.
If \var{grouping} is true (which is not the default), grouping is done with
the value.
If \var{international} is true (which is not the default), the international
currency symbol is used.
Note that this function will not work with the `C' locale, so you have to set
a locale via \function{setlocale()} first.
\versionadded{2.5}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{str}{float} \begin{funcdesc}{str}{float}
......
...@@ -88,13 +88,16 @@ except ImportError: ...@@ -88,13 +88,16 @@ except ImportError:
### Number formatting APIs ### Number formatting APIs
# Author: Martin von Loewis # Author: Martin von Loewis
# improved by Georg Brandl
#perform the grouping from right to left #perform the grouping from right to left
def _group(s): def _group(s, monetary=False):
conv=localeconv() conv = localeconv()
grouping=conv['grouping'] thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
if not grouping:return (s, 0) grouping = conv[monetary and 'mon_grouping' or 'grouping']
result="" if not grouping:
return (s, 0)
result = ""
seps = 0 seps = 0
spaces = "" spaces = ""
if s[-1] == ' ': if s[-1] == ' ':
...@@ -103,63 +106,142 @@ def _group(s): ...@@ -103,63 +106,142 @@ def _group(s):
s = s[:sp] s = s[:sp]
while s and grouping: while s and grouping:
# if grouping is -1, we are done # if grouping is -1, we are done
if grouping[0]==CHAR_MAX: if grouping[0] == CHAR_MAX:
break break
# 0: re-use last group ad infinitum # 0: re-use last group ad infinitum
elif grouping[0]!=0: elif grouping[0] != 0:
#process last group #process last group
group=grouping[0] group = grouping[0]
grouping=grouping[1:] grouping = grouping[1:]
if result: if result:
result=s[-group:]+conv['thousands_sep']+result result = s[-group:] + thousands_sep + result
seps += 1 seps += 1
else: else:
result=s[-group:] result = s[-group:]
s=s[:-group] s = s[:-group]
if s and s[-1] not in "0123456789": if s and s[-1] not in "0123456789":
# the leading string is only spaces and signs # the leading string is only spaces and signs
return s+result+spaces,seps return s + result + spaces, seps
if not result: if not result:
return s+spaces,seps return s + spaces, seps
if s: if s:
result=s+conv['thousands_sep']+result result = s + thousands_sep + result
seps += 1 seps += 1
return result+spaces,seps return result + spaces, seps
def format(f,val,grouping=0): def format(percent, value, grouping=False, monetary=False, *additional):
"""Formats a value in the same way that the % formatting would use, """Returns the locale-aware substitution of a %? specifier
(percent).
additional is for format strings which contain one or more
'*' modifiers."""
# this is only for one-percent-specifier strings and this should be checked
if percent[0] != '%':
raise ValueError("format() must be given exactly one %char "
"format specifier")
if additional:
formatted = percent % ((value,) + additional)
else:
formatted = percent % value
# floats and decimal ints need special action!
if percent[-1] in 'eEfFgG':
seps = 0
parts = formatted.split('.')
if grouping:
parts[0], seps = _group(parts[0], monetary=monetary)
decimal_point = localeconv()[monetary and 'mon_decimal_point'
or 'decimal_point']
formatted = decimal_point.join(parts)
while seps:
sp = formatted.find(' ')
if sp == -1: break
formatted = formatted[:sp] + formatted[sp+1:]
seps -= 1
elif percent[-1] in 'diu':
if grouping:
formatted = _group(formatted, monetary=monetary)[0]
return formatted
import re, operator
_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
def format_string(f, val, grouping=False):
"""Formats a string in the same way that the % formatting would use,
but takes the current locale into account. but takes the current locale into account.
Grouping is applied if the third parameter is true.""" Grouping is applied if the third parameter is true."""
result = f % val percents = list(_percent_re.finditer(f))
fields = result.split(".") new_f = _percent_re.sub('%s', f)
seps = 0
if grouping: if isinstance(val, tuple):
fields[0],seps=_group(fields[0]) new_val = list(val)
if len(fields)==2: i = 0
result = fields[0]+localeconv()['decimal_point']+fields[1] for perc in percents:
elif len(fields)==1: starcount = perc.group('modifiers').count('*')
result = fields[0] new_val[i] = format(perc.group(), new_val[i], grouping, False, *new_val[i+1:i+1+starcount])
del new_val[i+1:i+1+starcount]
i += (1 + starcount)
val = tuple(new_val)
elif operator.isMappingType(val):
for perc in percents:
key = perc.group("key")
val[key] = format(perc.group(), val[key], grouping)
else: else:
raise Error, "Too many decimal points in result string" # val is a single value
val = format(percents[0].group(), val, grouping)
return new_f % val
def currency(val, symbol=True, grouping=False, international=False):
"""Formats val according to the currency settings
in the current locale."""
conv = localeconv()
while seps: # check for illegal values
# If the number was formatted for a specific width, then it digits = conv[international and 'int_frac_digits' or 'frac_digits']
# might have been filled with spaces to the left or right. If if digits == 127:
# so, kill as much spaces as there where separators. raise ValueError("Currency formatting is not possible using "
# Leading zeroes as fillers are not yet dealt with, as it is "the 'C' locale.")
# not clear how they should interact with grouping.
sp = result.find(" ") s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
if sp==-1:break # '<' and '>' are markers if the sign must be inserted between symbol and value
result = result[:sp]+result[sp+1:] s = '<' + s + '>'
seps -= 1
if symbol:
smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
if precedes:
s = smb + (separated and ' ' or '') + s
else:
s = s + (separated and ' ' or '') + smb
sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
sign = conv[val<0 and 'negative_sign' or 'positive_sign']
if sign_pos == 0:
s = '(' + s + ')'
elif sign_pos == 1:
s = sign + s
elif sign_pos == 2:
s = s + sign
elif sign_pos == 3:
s = s.replace('<', sign)
elif sign_pos == 4:
s = s.replace('>', sign)
else:
# the default if nothing specified;
# this should be the most fitting sign position
s = sign + s
return result return s.replace('<', '').replace('>', '')
def str(val): def str(val):
"""Convert float to integer, taking the locale into account.""" """Convert float to integer, taking the locale into account."""
return format("%.12g",val) return format("%.12g", val)
def atof(string,func=float): def atof(string, func=float):
"Parses a string as a float according to the locale settings." "Parses a string as a float according to the locale settings."
#First, get rid of the grouping #First, get rid of the grouping
ts = localeconv()['thousands_sep'] ts = localeconv()['thousands_sep']
...@@ -179,10 +261,10 @@ def atoi(str): ...@@ -179,10 +261,10 @@ def atoi(str):
def _test(): def _test():
setlocale(LC_ALL, "") setlocale(LC_ALL, "")
#do grouping #do grouping
s1=format("%d", 123456789,1) s1 = format("%d", 123456789,1)
print s1, "is", atoi(s1) print s1, "is", atoi(s1)
#standard formatting #standard formatting
s1=str(3.14) s1 = str(3.14)
print s1, "is", atof(s1) print s1, "is", atof(s1)
### Locale name aliasing engine ### Locale name aliasing engine
......
...@@ -20,14 +20,14 @@ for tloc in tlocs: ...@@ -20,14 +20,14 @@ for tloc in tlocs:
else: else:
raise ImportError, "test locale not supported (tried %s)"%(', '.join(tlocs)) raise ImportError, "test locale not supported (tried %s)"%(', '.join(tlocs))
def testformat(formatstr, value, grouping = 0, output=None): def testformat(formatstr, value, grouping = 0, output=None, func=locale.format):
if verbose: if verbose:
if output: if output:
print "%s %% %s =? %s ..." %\ print "%s %% %s =? %s ..." %\
(repr(formatstr), repr(value), repr(output)), (repr(formatstr), repr(value), repr(output)),
else: else:
print "%s %% %s works? ..." % (repr(formatstr), repr(value)), print "%s %% %s works? ..." % (repr(formatstr), repr(value)),
result = locale.format(formatstr, value, grouping = grouping) result = func(formatstr, value, grouping = grouping)
if output and result != output: if output and result != output:
if verbose: if verbose:
print 'no' print 'no'
...@@ -49,6 +49,27 @@ try: ...@@ -49,6 +49,27 @@ try:
testformat("%-10.f", 4200, grouping=1, output='4%s200 ' % sep) testformat("%-10.f", 4200, grouping=1, output='4%s200 ' % sep)
# Invoke getpreferredencoding to make sure it does not cause exceptions, # Invoke getpreferredencoding to make sure it does not cause exceptions,
locale.getpreferredencoding() locale.getpreferredencoding()
# === Test format() with more complex formatting strings
# test if grouping is independent from other characters in formatting string
testformat("One million is %i", 1000000, grouping=1, output='One million is 1,000,000',
func=locale.format_string)
testformat("One million is %i", 1000000, grouping=1, output='One million is 1,000,000',
func=locale.format_string)
# test dots in formatting string
testformat(".%f.", 1000.0, output='.1000.000000.', func=locale.format_string)
# test floats
testformat("--> %10.2f", 1000.0, grouping=1, output='--> 1,000.00',
func=locale.format_string)
# test asterisk formats
testformat("%10.*f", (2, 1000.0), grouping=0, output=' 1000.00',
func=locale.format_string)
testformat("%*.*f", (10, 2, 1000.0), grouping=1, output=' 1,000.00',
func=locale.format_string)
# test more-in-one
testformat("int %i float %.2f str %s", (1000, 1000.0, 'str'), grouping=1,
output='int 1,000 float 1,000.00 str str', func=locale.format_string)
finally: finally:
locale.setlocale(locale.LC_NUMERIC, oldlocale) locale.setlocale(locale.LC_NUMERIC, oldlocale)
......
...@@ -45,6 +45,10 @@ Extension Modules ...@@ -45,6 +45,10 @@ Extension Modules
Library Library
------- -------
- Patch #1180296: Two new functions were added to the locale module:
format_string() to get the effect of "format % items" but locale-aware,
and currency() to format a monetary number with currency sign.
- Patch #1486962: Several bugs in the turtle Tk demo module were fixed - Patch #1486962: Several bugs in the turtle Tk demo module were fixed
and several features added, such as speed and geometry control. and several features added, such as speed and geometry control.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment