Implementation of PEP 3101, Advanced String Formatting.

Known issues: The string.Formatter class, as discussed in the PEP, is incomplete. Error handling needs to conform to the PEP. Need to fix this warning that I introduced in Python/formatter_unicode.c: Objects/stringlib/unicodedefs.h:26: warning: `STRINGLIB_CMP' defined but not used Need to make sure sign formatting is correct, more tests needed. Need to remove '()' sign formatting, left over from an earlier version of the PEP.

Implementation of PEP 3101, Advanced String Formatting.
Known issues: The string.Formatter class, as discussed in the PEP, is incomplete. Error handling needs to conform to the PEP. Need to fix this warning that I introduced in Python/formatter_unicode.c: Objects/stringlib/unicodedefs.h:26: warning: `STRINGLIB_CMP' defined but not used Need to make sure sign formatting is correct, more tests needed. Need to remove '()' sign formatting, left over from an earlier version of the PEP.
8c663263 · Eric Smith · e4dc3248 · 8c663263 · 8c663263 · 8c663263
Commit 8c663263 authored Aug 25, 2007 by Eric Smith
22 changed files
--- a/Include/formatter_unicode.h
+++ b/Include/formatter_unicode.h
+PyObject *
+unicode_unicode__format__(PyObject *self, PyObject *args);
+
+PyObject *
+unicode_long__format__(PyObject *self, PyObject *args);
+
+PyObject *
+unicode_float__format__(PyObject *self, PyObject *args);
+
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1437,6 +1437,11 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
    const Py_UNICODE *s, Py_UNICODE c
    );

+PyObject *
+_unicodeformatter_iterator(PyObject *str);
+PyObject *
+_unicodeformatter_lookup(PyObject *field_name, PyObject *args,
+                         PyObject *kwargs);

 #ifdef __cplusplus
 }

--- a/Lib/string.py
+++ b/Lib/string.py
@@ -189,3 +189,42 @@ class Template(metaclass=_TemplateMetaclass):
            raise ValueError('Unrecognized named group in pattern',
                             self.pattern)
        return self.pattern.sub(convert, self.template)
+
+
+
+########################################################################
+# the Formatter class
+# see PEP 3101 for details and purpose of this class
+
+# The hard parts are reused from the C implementation.  They're
+# exposed here via the sys module.  sys was chosen because it's always
+# available and doesn't have to be dynamically loaded.
+
+# The parser is implemented in sys._formatter_parser.
+# The "object lookup" is implemented in sys._formatter_lookup
+
+from sys import _formatter_parser, _formatter_lookup
+
+class Formatter:
+    def format(self, format_string, *args, **kwargs):
+        return self.vformat(format_string, args, kwargs)
+
+    def vformat(self, format_string, args, kwargs):
+        result = []
+        for (is_markup, literal, field_name, format_spec, conversion) in \
+                _formatter_parser(format_string):
+            if is_markup:
+                # find the object
+                index, name, obj = _formatter_lookup(field_name, args, kwargs)
+            else:
+                result.append(literal)
+        return ''.join(result)
+
+    def get_value(self, key, args, kwargs):
+        pass
+
+    def check_unused_args(self, used_args, args, kwargs):
+        pass
+
+    def format_field(self, value, format_spec):
+        pass
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -517,6 +517,32 @@ class BuiltinTest(unittest.TestCase):
        self.assertAlmostEqual(float(Foo3(21)), 42.)
        self.assertRaises(TypeError, float, Foo4(42))

+    def test_format(self):
+        class A:
+            def __init__(self, x):
+                self.x = x
+            def __format__(self, format_spec):
+                return str(self.x) + format_spec
+
+        # class that returns a bad type from __format__
+        class H:
+            def __format__(self, format_spec):
+                return 1.0
+
+        self.assertEqual(format(3, ''), '3')
+        self.assertEqual(format(A(3), 'spec'), '3spec')
+
+        # for builtin types, format(x, "") == str(x)
+        self.assertEqual(format(17**13, ""), str(17**13))
+        self.assertEqual(format(1.0, ""), str(1.0))
+        self.assertEqual(format(3.1415e104, ""), str(3.1415e104))
+        self.assertEqual(format(-3.1415e104, ""), str(-3.1415e104))
+        self.assertEqual(format(3.1415e-104, ""), str(3.1415e-104))
+        self.assertEqual(format(-3.1415e-104, ""), str(-3.1415e-104))
+        self.assertEqual(format(object, ""), str(object))
+
+        #self.assertRaises(TypeError, format, H(), "")
+
    def test_getattr(self):
        import sys
        self.assert_(getattr(sys, 'stdout') is sys.stdout)

--- a/Lib/test/test_descrtut.py
+++ b/Lib/test/test_descrtut.py
@@ -173,6 +173,7 @@ You can get the information from the list type:
     '__delslice__',
     '__doc__',
     '__eq__',
+     '__format__',
     '__ge__',
     '__getattribute__',
     '__getitem__',

--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -114,12 +114,44 @@ class IEEEFormatTestCase(unittest.TestCase):
            self.assertEquals(pos_pos(), neg_pos())
            self.assertEquals(pos_neg(), neg_neg())

+class FormatTestCase(unittest.TestCase):
+    def testFormat(self):
+        # these should be rewritten to use both format(x, spec) and
+        # x.__format__(spec)
+
+        self.assertEqual(format(0.0, 'f'), '0.000000')
+
+        # the default is 'g', except for empty format spec
+        self.assertEqual(format(0.0, ''), '0.0')
+        self.assertEqual(format(0.01, ''), '0.01')
+        self.assertEqual(format(0.01, 'g'), '0.01')
+
+        self.assertEqual(format(0, 'f'), '0.000000')
+
+        self.assertEqual(format(1.0, 'f'), '1.000000')
+        self.assertEqual(format(1, 'f'), '1.000000')
+
+        self.assertEqual(format(-1.0, 'f'), '-1.000000')
+        self.assertEqual(format(-1, 'f'), '-1.000000')
+
+        self.assertEqual(format( 1.0, ' f'), ' 1.000000')
+        self.assertEqual(format(-1.0, ' f'), '-1.000000')
+        self.assertEqual(format( 1.0, '+f'), '+1.000000')
+        self.assertEqual(format(-1.0, '+f'), '-1.000000')
+
+        # % formatting
+        self.assertEqual(format(-1.0, '%'), '-100.000000%')
+
+        # conversion to string should fail
+        self.assertRaises(ValueError, format, 3.0, "s")
+

 def test_main():
    test_support.run_unittest(
        FormatFunctionsTestCase,
        UnknownFormatTestCase,
-        IEEEFormatTestCase)
+        IEEEFormatTestCase,
+        FormatTestCase)

 if __name__ == '__main__':
    test_main()
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -493,6 +493,50 @@ class LongTest(unittest.TestCase):
                eq(x > y, Rcmp > 0, Frm("%r > %r %d", x, y, Rcmp))
                eq(x >= y, Rcmp >= 0, Frm("%r >= %r %d", x, y, Rcmp))

+    def test_format(self):
+        self.assertEqual(format(123456789, 'd'), '123456789')
+        self.assertEqual(format(123456789, 'd'), '123456789')
+
+        # hex
+        self.assertEqual(format(3, "x"), "3")
+        self.assertEqual(format(3, "X"), "3")
+        self.assertEqual(format(1234, "x"), "4d2")
+        self.assertEqual(format(-1234, "x"), "-4d2")
+        self.assertEqual(format(1234, "8x"), "     4d2")
+# XXX fix       self.assertEqual(format(-1234, "8x"), "    -4d2")
+        self.assertEqual(format(1234, "x"), "4d2")
+        self.assertEqual(format(-1234, "x"), "-4d2")
+        self.assertEqual(format(-3, "x"), "-3")
+        self.assertEqual(format(-3, "X"), "-3")
+        self.assertEqual(format(int('be', 16), "x"), "be")
+        self.assertEqual(format(int('be', 16), "X"), "BE")
+        self.assertEqual(format(-int('be', 16), "x"), "-be")
+        self.assertEqual(format(-int('be', 16), "X"), "-BE")
+
+        # octal
+        self.assertEqual(format(3, "b"), "11")
+        self.assertEqual(format(-3, "b"), "-11")
+        self.assertEqual(format(1234, "b"), "10011010010")
+        self.assertEqual(format(-1234, "b"), "-10011010010")
+        self.assertEqual(format(1234, "-b"), "10011010010")
+        self.assertEqual(format(-1234, "-b"), "-10011010010")
+        self.assertEqual(format(1234, " b"), " 10011010010")
+        self.assertEqual(format(-1234, " b"), "-10011010010")
+        self.assertEqual(format(1234, "+b"), "+10011010010")
+        self.assertEqual(format(-1234, "+b"), "-10011010010")
+
+        # conversion to float
+        self.assertEqual(format(0, 'f'), '0.000000')
+
+        # make sure these are errors
+        self.assertRaises(ValueError, format, 3, "1.3")  # precision disallowed
+        return
+        self.assertRaises(ValueError, format, 3, "+c")   # sign not allowed
+                                                         # with 'c'
+        self.assertRaises(ValueError, format, 3, "R")    # bogus format type
+        # conversion to string should fail
+        self.assertRaises(ValueError, format, 3, "s")
+
 def test_main():
    test_support.run_unittest(LongTest)


--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -15,6 +15,14 @@ class ModuleTest(unittest.TestCase):
        string.punctuation
        string.printable

+    def test_formatter(self):
+        fmt = string.Formatter()
+        self.assertEqual(fmt.format("foo"), "foo")
+
+        # Formatter not working you for lookups
+        #self.assertEqual(fmt.format("foo{0}", "bar"), "foobar")
+
+
    def test_maketrans(self):
        transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'


--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -357,6 +357,218 @@ class UnicodeTest(

        self.assertRaises(TypeError, "abc".__contains__)

+    def test_format(self):
+        self.assertEqual(''.format(), '')
+        self.assertEqual('a'.format(), 'a')
+        self.assertEqual('ab'.format(), 'ab')
+        self.assertEqual('a{{'.format(), 'a{')
+        self.assertEqual('a}}'.format(), 'a}')
+        self.assertEqual('{{b'.format(), '{b')
+        self.assertEqual('}}b'.format(), '}b')
+        self.assertEqual('a{{b'.format(), 'a{b')
+
+        # examples from the PEP:
+        import datetime
+        self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
+        self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
+                         "My name is Fred")
+        self.assertEqual("My name is {0} :-{{}}".format('Fred'),
+                         "My name is Fred :-{}")
+
+        d = datetime.date(2007, 8, 18)
+        self.assertEqual("The year is {0.year}".format(d),
+                         "The year is 2007")
+
+        #"{0!r:20}".format("Hello")
+
+        # classes we'll use for testing
+        class C:
+            def __init__(self, x=100):
+                self._x = x
+            def __format__(self, spec):
+                return spec
+
+        class D:
+            def __init__(self, x):
+                self.x = x
+            def __format__(self, spec):
+                return str(self.x)
+
+        # class with __str__, but no __format__
+        class E:
+            def __init__(self, x):
+                self.x = x
+            def __str__(self):
+                return 'E(' + self.x + ')'
+
+        # class with __repr__, but no __format__ or __str__
+        class F:
+            def __init__(self, x):
+                self.x = x
+            def __repr__(self):
+                return 'F(' + self.x + ')'
+
+        # class with __format__ that forwards to string, for some format_spec's
+        class G:
+            def __init__(self, x):
+                self.x = x
+            def __str__(self):
+                return "string is " + self.x
+            def __format__(self, format_spec):
+                if format_spec == 'd':
+                    return 'G(' + self.x + ')'
+                return object.__format__(self, format_spec)
+
+        # class that returns a bad type from __format__
+        class H:
+            def __format__(self, format_spec):
+                return 1.0
+
+
+        self.assertEqual(''.format(), '')
+        self.assertEqual('abc'.format(), 'abc')
+        self.assertEqual('{0}'.format('abc'), 'abc')
+        self.assertEqual('{0:}'.format('abc'), 'abc')
+#        self.assertEqual('{ 0 }'.format('abc'), 'abc')
+        self.assertEqual('X{0}'.format('abc'), 'Xabc')
+        self.assertEqual('{0}X'.format('abc'), 'abcX')
+        self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
+        self.assertEqual('{1}'.format(1, 'abc'), 'abc')
+        self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
+        self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
+        self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
+        self.assertEqual('{0}'.format(-15), '-15')
+        self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
+        self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
+        self.assertEqual('{{'.format(), '{')
+        self.assertEqual('}}'.format(), '}')
+        self.assertEqual('{{}}'.format(), '{}')
+        self.assertEqual('{{x}}'.format(), '{x}')
+        self.assertEqual('{{{0}}}'.format(123), '{123}')
+        self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
+        self.assertEqual('}}{{'.format(), '}{')
+        self.assertEqual('}}x{{'.format(), '}x{')
+
+        self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
+        self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
+        self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
+        self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
+        self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
+        self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
+        self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
+
+        # I'm not sure if this should work, or if it's a problem if it does work
+        #'{0[_{foo}]}'.format({'_FOO': 'abc'}, foo='FOO')
+        #('{0[{foo}{bar}]}'.format({'FOOBAR': 'abc'}, foo='FOO', bar='BAR')
+
+        # format specifiers for built in types
+
+        # strings
+        self.assertEqual('{0:.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:.3s}'.format('ab'), 'ab')
+        self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
+        self.assertEqual('{0:.0s}'.format('abcdef'), '')
+        self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
+        self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
+        self.assertEqual('{0:x<0s}'.format('result'), 'result')
+        self.assertEqual('{0:x<5s}'.format('result'), 'result')
+        self.assertEqual('{0:x<6s}'.format('result'), 'result')
+        self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
+        self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
+        self.assertEqual('{0: <7s}'.format('result'), 'result ')
+        self.assertEqual('{0:<7s}'.format('result'), 'result ')
+        self.assertEqual('{0:>7s}'.format('result'), ' result')
+        self.assertEqual('{0:>8s}'.format('result'), '  result')
+        self.assertEqual('{0:^8s}'.format('result'), ' result ')
+        self.assertEqual('{0:^9s}'.format('result'), ' result  ')
+        self.assertEqual('{0:^10s}'.format('result'), '  result  ')
+        self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
+        self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
+        self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
+
+        # format specifiers for user defined type
+        self.assertEqual('{0:abc}'.format(C()), 'abc')
+
+        # !r and !s coersions
+        self.assertEqual('{0!s}'.format('Hello'), 'Hello')
+        self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
+        self.assertEqual('{0!s:15}'.format('Hello'), 'Hello          ')
+        self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello          ')
+        self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
+        self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
+        self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
+
+        # XXX should pass, but currently don't
+        # format(object, "")
+
+        # test fallback to object.__format__
+        self.assertEqual('{0}'.format({}), '{}')
+        self.assertEqual('{0}'.format([]), '[]')
+        self.assertEqual('{0}'.format([1]), '[1]')
+        self.assertEqual('{0}'.format(E('data')), 'E(data)')
+        self.assertEqual('{0:^10}'.format(E('data')), ' E(data)  ')
+        self.assertEqual('{0:^10s}'.format(E('data')), ' E(data)  ')
+        self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
+        self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
+        self.assertEqual('{0!s}'.format(G('data')), 'string is data')
+
+        # string format specifiers
+        self.assertEqual('{0:}'.format('a'), 'a')
+
+        # computed format specifiers
+        self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
+        self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
+        self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
+        self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello     ')
+        self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello     ')
+
+        # test various errors
+        self.assertRaises(ValueError, '{'.format)
+        self.assertRaises(ValueError, '}'.format)
+        self.assertRaises(ValueError, 'a{'.format)
+        self.assertRaises(ValueError, 'a}'.format)
+        self.assertRaises(ValueError, '{a'.format)
+        self.assertRaises(ValueError, '}a'.format)
+        self.assertRaises(ValueError, '{0}'.format)
+        self.assertRaises(ValueError, '{1}'.format, 'abc')
+        self.assertRaises(ValueError, '{x}'.format)
+        self.assertRaises(ValueError, "}{".format)
+        self.assertRaises(ValueError, "{".format)
+        self.assertRaises(ValueError, "}".format)
+        self.assertRaises(ValueError, "abc{0:{}".format)
+        self.assertRaises(ValueError, "{0".format)
+        self.assertRaises(ValueError, "{0.[]}".format)
+        self.assertRaises(ValueError, "{0[0}".format)
+        self.assertRaises(ValueError, "{0[0:foo}".format)
+        self.assertRaises(ValueError, "{c]}".format)
+        self.assertRaises(ValueError, "{{ {{{0}}".format)
+        self.assertRaises(ValueError, "{0}}".format)
+        self.assertRaises(ValueError, "{foo}".format, bar=3)
+        self.assertRaises(ValueError, "{0!x}".format, 3)
+        self.assertRaises(ValueError, "{0!}".format)
+        self.assertRaises(ValueError, "{0!rs}".format)
+        self.assertRaises(ValueError, "{!}".format)
+        self.assertRaises(ValueError, "{:}".format)
+        self.assertRaises(ValueError, "{}".format)
+
+        # can't have a replacement on the field name portion
+        self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
+
+        # exceed maximum recursion depth
+        self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
+        self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
+                          0, 1, 2, 3, 4, 5, 6, 7)
+
+        # string format spec errors
+        self.assertRaises(ValueError, "{0:-s}".format, '')
+        self.assertRaises(ValueError, format, "", "-")
+        self.assertRaises(ValueError, "{0:=s}".format, '')
+
+        # check that __format__ returns a string
+        #self.assertRaises(TypeError, "{0}".format, H())
+
    def test_formatting(self):
        string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
        # Testing Unicode formatting strings...

--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -271,6 +271,7 @@ PYTHON_OBJS=	\
 		Python/traceback.o \
 		Python/getopt.o \
 		Python/pystrtod.o \
+		Python/formatter_unicode.o \
 		Python/$(DYNLOADFILE) \
 		$(MACHDEP_OBJS) \
 		$(THREADOBJ)
@@ -503,6 +504,19 @@ Python/importdl.o: $(srcdir)/Python/importdl.c
 Objects/unicodectype.o:	$(srcdir)/Objects/unicodectype.c \
 				$(srcdir)/Objects/unicodetype_db.h

+Objects/unicodeobject.o: $(srcdir)/Objects/unicodeobject.c \
+				$(srcdir)/Objects/stringlib/string_format.h \
+	                        $(srcdir)/Objects/stringlib/unicodedefs.h \
+	                        $(srcdir)/Objects/stringlib/fastsearch.h \
+	                        $(srcdir)/Objects/stringlib/count.h \
+	                        $(srcdir)/Objects/stringlib/find.h \
+	                        $(srcdir)/Objects/stringlib/partition.h
+
+Python/formatter_unicode.o: $(srcdir)/Python/formatter_unicode.c \
+	                        $(srcdir)/Objects/stringlib/formatter.h
+
+
+
 ############################################################################
 # Header files

@@ -527,6 +541,7 @@ PYTHON_HEADERS= \
 		Include/genobject.h \
 		Include/fileobject.h \
 		Include/floatobject.h \
+		Include/formatter_unicode.h \
 		Include/funcobject.h \
 		Include/import.h \
 		Include/intobject.h \

--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -6,6 +6,8 @@

 #include "Python.h"

+#include "formatter_unicode.h"
+
 #include <ctype.h>

 #if !defined(__STDC__)
@@ -1015,6 +1017,21 @@ float_getzero(PyObject *v, void *closure)
 	return PyFloat_FromDouble(0.0);
 }

+static PyObject *
+float__format__(PyObject *self, PyObject *args)
+{
+    /* when back porting this to 2.6, check type of the format_spec
+       and call either unicode_long__format__ or
+       string_long__format__ */
+    return unicode_float__format__(self, args);
+}
+
+PyDoc_STRVAR(float__format__doc,
+"float.__format__(format_spec) -> string\n"
+"\n"
+"Formats the float according to format_spec.");
+
+
 static PyMethodDef float_methods[] = {
  	{"conjugate",	(PyCFunction)float_float,	METH_NOARGS,
 	 "Returns self, the complex conjugate of any float."},
@@ -1028,6 +1045,8 @@ static PyMethodDef float_methods[] = {
 	 METH_O|METH_CLASS,		float_getformat_doc},
 	{"__setformat__",	(PyCFunction)float_setformat,	
 	 METH_VARARGS|METH_CLASS,	float_setformat_doc},
+        {"__format__",          (PyCFunction)float__format__,
+         METH_VARARGS,                  float__format__doc},
 	{NULL,		NULL}		/* sentinel */
 };


--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -5,6 +5,8 @@
 #include "Python.h"
 #include "longintrepr.h"

+#include "formatter_unicode.h"
+
 #include <ctype.h>

 long
@@ -3592,6 +3594,16 @@ long_getN(PyLongObject *v, void *context) {
 	return PyLong_FromLong((intptr_t)context);
 }

+static PyObject *
+long__format__(PyObject *self, PyObject *args)
+{
+    /* when back porting this to 2.6, check type of the format_spec
+       and call either unicode_long__format__ or
+       string_long__format__ */
+    return unicode_long__format__(self, args);
+}
+
+
 static PyObject *
 long_round(PyObject *self, PyObject *args)
 {
@@ -3632,6 +3644,7 @@ static PyMethodDef long_methods[] = {
         "Rounding an Integral returns itself.\n"
 	 "Rounding with an ndigits arguments defers to float.__round__."},
 	{"__getnewargs__",	(PyCFunction)long_getnewargs,	METH_NOARGS},
+        {"__format__", (PyCFunction)long__format__, METH_VARARGS},
 	{NULL,		NULL}		/* sentinel */
 };


--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
+#ifndef STRINGLIB_STRINGDEFS_H
+#define STRINGLIB_STRINGDEFS_H
+
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     0
+
+#define STRINGLIB_CHAR           char
+#define STRINGLIB_TYPE_NAME      "string"
+#define STRINGLIB_EMPTY          string_empty
+#define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
+#define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
+#define STRINGLIB_FILL           memset
+#define STRINGLIB_STR            PyString_AS_STRING
+#define STRINGLIB_LEN            PyString_GET_SIZE
+#define STRINGLIB_NEW            PyString_FromStringAndSize
+#define STRINGLIB_RESIZE         _PyString_Resize
+#define STRINGLIB_CHECK          PyString_Check
+#define STRINGLIB_CMP            memcmp
+#define STRINGLIB_TOSTR          PyObject_Str
+
+#endif /* !STRINGLIB_STRINGDEFS_H */
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
+#ifndef STRINGLIB_UNICODEDEFS_H
+#define STRINGLIB_UNICODEDEFS_H
+
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define STRINGLIB_CHAR           Py_UNICODE
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_TOUPPER        Py_UNICODE_TOUPPER
+#define STRINGLIB_TOLOWER        Py_UNICODE_TOLOWER
+#define STRINGLIB_FILL           Py_UNICODE_FILL
+#define STRINGLIB_STR            PyUnicode_AS_UNICODE
+#define STRINGLIB_LEN            PyUnicode_GET_SIZE
+#define STRINGLIB_NEW            PyUnicode_FromUnicode
+#define STRINGLIB_RESIZE         PyUnicode_Resize
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_TOSTR          PyObject_Unicode
+
+Py_LOCAL_INLINE(int)
+STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
+{
+    if (str[0] != other[0])
+        return 1;
+    return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
+}
+
+#endif /* !STRINGLIB_UNICODEDEFS_H */
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -2933,11 +2933,52 @@ object_reduce_ex(PyObject *self, PyObject *args)
 	return _common_reduce(self, proto);
 }

+
+/*
+   from PEP 3101, this code implements:
+
+   class object:
+       def __format__(self, format_spec):
+           return format(str(self), format_spec)
+*/
+static PyObject *
+object_format(PyObject *self, PyObject *args)
+{
+        PyObject *format_spec;
+        PyObject *self_as_str = NULL;
+        PyObject *result = NULL;
+        PyObject *format_meth = NULL;
+
+        if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
+                return NULL;
+        if (!PyUnicode_Check(format_spec)) {
+                PyErr_SetString(PyExc_TypeError, "Unicode object required");
+                return NULL;
+        }
+
+        self_as_str = PyObject_Unicode(self);
+        if (self_as_str != NULL) {
+                /* find the format function */
+                format_meth = PyObject_GetAttrString(self_as_str, "__format__");
+                if (format_meth != NULL) {
+                       /* and call it */
+                        result = PyObject_CallFunctionObjArgs(format_meth, format_spec, NULL);
+                }
+        }
+
+        Py_XDECREF(self_as_str);
+        Py_XDECREF(format_meth);
+
+        return result;
+}
+
 static PyMethodDef object_methods[] = {
 	{"__reduce_ex__", object_reduce_ex, METH_VARARGS,
 	 PyDoc_STR("helper for pickle")},
 	{"__reduce__", object_reduce, METH_VARARGS,
 	 PyDoc_STR("helper for pickle")},
+        {"__format__", object_format, METH_VARARGS,
+         PyDoc_STR("default object formatter")},
 	{0}
 };


--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -45,6 +45,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include "unicodeobject.h"
 #include "ucnhash.h"

+#include "formatter_unicode.h"
+
 #ifdef MS_WINDOWS
 #include <windows.h>
 #endif
@@ -5009,21 +5011,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,

 /* --- Helpers ------------------------------------------------------------ */

-#define STRINGLIB_CHAR Py_UNICODE
-
-#define STRINGLIB_LEN PyUnicode_GET_SIZE
-#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_STR PyUnicode_AS_UNICODE
-
-Py_LOCAL_INLINE(int)
-STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
-{
-    if (str[0] != other[0])
-        return 1;
-    return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
-}
-
-#define STRINGLIB_EMPTY unicode_empty
+#include "stringlib/unicodedefs.h"

 #include "stringlib/fastsearch.h"

@@ -7964,6 +7952,33 @@ unicode_endswith(PyUnicodeObject *self,
    return PyBool_FromLong(result);
 }

+#include "stringlib/string_format.h"
+
+PyDoc_STRVAR(format__doc__,
+"S.format(*args, **kwargs) -> unicode\n\
+\n\
+");
+
+static PyObject *
+unicode_format(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* this calls into stringlib/string_format.h because it can be
+       included for either string or unicode.  this is needed for
+       python 2.6. */
+    return do_string_format(self, args, kwds);
+}
+
+
+PyDoc_STRVAR(p_format__doc__,
+"S.__format__(format_spec) -> unicode\n\
+\n\
+");
+
+static PyObject *
+unicode__format__(PyObject *self, PyObject *args)
+{
+    return unicode_unicode__format__(self, args);
+}


 static PyObject *
@@ -8019,6 +8034,8 @@ static PyMethodDef unicode_methods[] = {
    {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
    {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
    {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
+    {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
+    {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
 #if 0
    {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
 #endif
@@ -9124,6 +9141,205 @@ void _Py_ReleaseInternedUnicodeStrings(void)
 }


+/********************* Formatter Iterator ************************/
+
+/* this is used to implement string.Formatter.vparse().  it exists so
+   Formatter can share code with the built in unicode.format()
+   method */
+
+typedef struct {
+	PyObject_HEAD
+
+        /* we know this to be a unicode object, but since we just keep
+           it around to keep the object alive, having it as PyObject
+           is okay */
+        PyObject *str;
+
+        MarkupIterator it_markup;
+} formatteriterobject;
+
+static void
+formatteriter_dealloc(formatteriterobject *it)
+{
+	_PyObject_GC_UNTRACK(it);
+	Py_XDECREF(it->str);
+	PyObject_GC_Del(it);
+}
+
+/* returns a tuple:
+   (is_markup, literal, field_name, format_spec, conversion)
+   if is_markup == True:
+        literal is None
+        field_name is the string before the ':'
+        format_spec is the string after the ':'
+        conversion is either None, or the string after the '!'
+   if is_markup == False:
+        literal is the literal string
+        field_name is None
+        format_spec is None
+        conversion is None
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
+{
+        SubString literal;
+        SubString field_name;
+        SubString format_spec;
+        Py_UNICODE conversion;
+        int is_markup;
+        int format_spec_needs_expanding;
+        int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+                                         &field_name, &format_spec, &conversion,
+                                         &format_spec_needs_expanding);
+
+        /* all of the SubString objects point into it->str, so no
+           memory management needs to be done on them */
+
+        if (result == 0) {
+                /* error has already been set */
+                return NULL;
+        } else if (result == 1) {
+                /* end of iterator */
+                return NULL;
+        } else {
+                PyObject *is_markup_bool = NULL;
+                PyObject *literal_str = NULL;
+                PyObject *field_name_str = NULL;
+                PyObject *format_spec_str = NULL;
+                PyObject *conversion_str = NULL;
+                PyObject *result = NULL;
+
+                assert(result == 2);
+
+                is_markup_bool = PyBool_FromLong(is_markup);
+                if (!is_markup_bool)
+                    goto error;
+
+                if (is_markup) {
+                        /* field_name, format_spec, and conversion are
+                           returned */
+                        literal_str = Py_None;
+                        Py_INCREF(literal_str);
+
+                        field_name_str = SubString_new_object(&field_name);
+                        if (field_name_str == NULL)
+                                goto error;
+
+                        format_spec_str = SubString_new_object(&format_spec);
+                        if (format_spec_str == NULL)
+                                goto error;
+
+                        /* if the conversion is not specified, return
+                           a None, otherwise create a one length
+                           string with the conversion characater */
+                        if (conversion == '\0') {
+                                conversion_str = Py_None;
+                                Py_INCREF(conversion_str);
+                        } else
+                            conversion_str = PyUnicode_FromUnicode(&conversion,
+                                                                   1);
+                        if (conversion_str == NULL)
+                                goto error;
+                } else {
+                        /* only literal is returned */
+                        literal_str = SubString_new_object(&literal);
+                        if (literal_str == NULL)
+                                goto error;
+
+                        field_name_str = Py_None;
+                        format_spec_str = Py_None;
+                        conversion_str = Py_None;
+
+                        Py_INCREF(field_name_str);
+                        Py_INCREF(format_spec_str);
+                        Py_INCREF(conversion_str);
+                }
+               /* return a tuple of values */
+                result = PyTuple_Pack(5, is_markup_bool, literal_str,
+                                      field_name_str, format_spec_str,
+                                      conversion_str);
+                if (result == NULL)
+                        goto error;
+
+                return result;
+        error:
+                Py_XDECREF(is_markup_bool);
+                Py_XDECREF(literal_str);
+                Py_XDECREF(field_name_str);
+                Py_XDECREF(format_spec_str);
+                Py_XDECREF(conversion_str);
+                Py_XDECREF(result);
+                return NULL;
+        }
+}
+
+static PyMethodDef formatteriter_methods[] = {
+ 	{NULL,		NULL}		/* sentinel */
+};
+
+PyTypeObject PyFormatterIter_Type = {
+	PyVarObject_HEAD_INIT(&PyType_Type, 0)
+	"formatteriterator",			/* tp_name */
+	sizeof(formatteriterobject),		/* tp_basicsize */
+	0,					/* tp_itemsize */
+	/* methods */
+	(destructor)formatteriter_dealloc,	/* tp_dealloc */
+	0,					/* tp_print */
+	0,					/* tp_getattr */
+	0,					/* tp_setattr */
+	0,					/* tp_compare */
+	0,					/* tp_repr */
+	0,					/* tp_as_number */
+	0,					/* tp_as_sequence */
+	0,					/* tp_as_mapping */
+	0,					/* tp_hash */
+	0,					/* tp_call */
+	0,					/* tp_str */
+	PyObject_GenericGetAttr,		/* tp_getattro */
+	0,					/* tp_setattro */
+	0,					/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,			/* tp_flags */
+	0,					/* tp_doc */
+	0,					/* tp_traverse */
+	0,					/* tp_clear */
+	0,					/* tp_richcompare */
+	0,					/* tp_weaklistoffset */
+	PyObject_SelfIter,			/* tp_iter */
+	(iternextfunc)formatteriter_next,	/* tp_iternext */
+	formatteriter_methods,			/* tp_methods */
+	0,
+};
+
+PyObject *
+_unicodeformatter_iterator(PyObject *str)
+{
+        formatteriterobject *it;
+
+	it = PyObject_GC_New(formatteriterobject, &PyFormatterIter_Type);
+	if (it == NULL)
+		return NULL;
+
+        /* take ownership, give the object to the iterator */
+        Py_INCREF(str);
+        it->str = str;
+
+        /* initialize the contained MarkupIterator */
+        MarkupIterator_init(&it->it_markup,
+                            PyUnicode_AS_UNICODE(str),
+                            PyUnicode_GET_SIZE(str));
+
+	_PyObject_GC_TRACK(it);
+	return (PyObject *)it;
+}
+
+PyObject *
+_unicodeformatter_lookup(PyObject *field_name, PyObject *args,
+                         PyObject *kwargs)
+{
+        return NULL;
+}
+
+
 /********************* Unicode Iterator **************************/

 typedef struct {

--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -2,7 +2,7 @@


 /*
-   __version__ 56266.
+   __version__ .

   This module must be committed separately after each AST grammar change;
   The __version__ number is set to the revision number of the commit
@@ -3179,7 +3179,7 @@ init_ast(void)
        if (PyDict_SetItemString(d, "AST", (PyObject*)AST_type) < 0) return;
        if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)
                return;
-        if (PyModule_AddStringConstant(m, "__version__", "56266") < 0)
+        if (PyModule_AddStringConstant(m, "__version__", "") < 0)
                return;
        if (PyDict_SetItemString(d, "mod", (PyObject*)mod_type) < 0) return;
        if (PyDict_SetItemString(d, "Module", (PyObject*)Module_type) < 0)

--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -275,6 +275,61 @@ for which the predicate (a Boolean function) returns true.\n\
 If the predicate is None, 'lambda x: bool(x)' is assumed.\n\
 (This is identical to itertools.ifilter().)");

+static PyObject *
+builtin_format(PyObject *self, PyObject *args)
+{
+        static PyObject * format_str = NULL;
+        PyObject *value;
+        PyObject *spec;
+        PyObject *meth;
+        PyObject *result;
+
+        /* Initialize cached value */
+        if (format_str == NULL) {
+                /* Initialize static variable needed by _PyType_Lookup */
+                format_str = PyUnicode_FromString("__format__");
+                if (format_str == NULL)
+                        return NULL;
+        }
+
+        if (!PyArg_ParseTuple(args, "OO:format", &value, &spec))
+               return NULL;
+
+        /* Make sure the type is initialized.  float gets initialized late */
+        if (Py_Type(value)->tp_dict == NULL)
+                if (PyType_Ready(Py_Type(value)) < 0)
+                    return NULL;
+
+        /* Find the (unbound!) __format__ method (a borrowed reference) */
+        meth = _PyType_Lookup(Py_Type(value), format_str);
+        if (meth == NULL) {
+                PyErr_Format(PyExc_TypeError,
+                             "Type %.100s doesn't define __format__",
+                             Py_Type(value)->tp_name);
+                return NULL;
+        }
+
+        /* And call it, binding it to the value */
+        result = PyObject_CallFunctionObjArgs(meth, value, spec, NULL);
+
+#if 0
+        /* XXX this is segfaulting, not sure why.  find out later! */
+	if (!PyUnicode_Check(result)) {
+                PyErr_SetString(PyExc_TypeError,
+                                "__format__ method did not return string");
+                Py_DECREF(result);
+                return NULL;
+        }
+#endif
+
+        return result;
+}
+
+
+PyDoc_STRVAR(format_doc,
+"format(value, format_spec) -> string\n\
+\n\
+Returns value.__format__(format_spec).");

 static PyObject *
 builtin_chr8(PyObject *self, PyObject *args)
@@ -1676,6 +1731,7 @@ static PyMethodDef builtin_methods[] = {
 	{"eval",	builtin_eval,       METH_VARARGS, eval_doc},
 	{"exec",        builtin_exec,       METH_VARARGS, exec_doc},
 	{"filter",	builtin_filter,     METH_VARARGS, filter_doc},
+ 	{"format",	builtin_format,     METH_VARARGS, format_doc},
 	{"getattr",	builtin_getattr,    METH_VARARGS, getattr_doc},
 	{"globals",	(PyCFunction)builtin_globals,    METH_NOARGS, globals_doc},
 	{"hasattr",	builtin_hasattr,    METH_VARARGS, hasattr_doc},

--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
+/* implements the unicode (as opposed to string) version of the
+   built-in formatters for string, int, float.  that is, the versions
+   of int.__float__, etc., that take and return unicode objects */
+
+#include "Python.h"
+#include "formatter_unicode.h"
+
+#include "../Objects/stringlib/unicodedefs.h"
+
+#define FORMAT_STRING unicode_unicode__format__
+#define FORMAT_LONG   unicode_long__format__
+#define FORMAT_FLOAT  unicode_float__format__
+#include "../Objects/stringlib/formatter.h"
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -660,6 +660,54 @@ sys_current_frames(PyObject *self, PyObject *noargs)
 	return _PyThread_CurrentFrames();
 }

+/* sys_formatter_iterator is used to implement
+   string.Formatter.vformat.  it parses a string and returns tuples
+   describing the parsed elements.  see unicodeobject.c's
+   _unicodeformatter_iterator for details */
+static PyObject *
+sys_formatter_iterator(PyObject *self, PyObject *args)
+{
+        /* in 2.6, check type and dispatch to unicode or string
+           accordingly */
+        PyObject *str;
+
+        if (!PyArg_ParseTuple(args, "O:_formatter_iterator", &str))
+                return NULL;
+
+        if (!PyUnicode_Check(str)) {
+                PyErr_SetString(PyExc_TypeError,
+                                "_formatter_iterator expects unicode object");
+                return NULL;
+        }
+
+        return _unicodeformatter_iterator(str);
+}
+
+/* sys_formatter_lookup is used to implement string.Formatter.vformat.
+   it takes an PEP 3101 "field name", args, and kwargs, and returns a
+   tuple (index, name, object).  see unicodeobject.c's
+   _unicodeformatter_lookup for details */
+static PyObject *
+sys_formatter_lookup(PyObject *self, PyObject *args)
+{
+        PyObject *field_name;
+        PyObject *arg_args;
+        PyObject *kwargs;
+
+        if (!PyArg_ParseTuple(args, "OOO:_formatter_lookup", &field_name,
+                              &arg_args, &kwargs))
+                return NULL;
+
+        if (!PyUnicode_Check(field_name)) {
+                PyErr_SetString(PyExc_TypeError,
+                                "_formatter_lookup expects unicode object");
+                return NULL;
+        }
+
+        return _unicodeformatter_lookup(field_name, arg_args, kwargs);
+}
+
+
 PyDoc_STRVAR(call_tracing_doc,
 "call_tracing(func, args) -> object\n\
 \n\
@@ -724,6 +772,8 @@ static PyMethodDef sys_methods[] = {
 	 callstats_doc},
 	{"_current_frames", sys_current_frames, METH_NOARGS,
 	 current_frames_doc},
+        {"_formatter_parser", sys_formatter_iterator, METH_VARARGS},
+        {"_formatter_lookup", sys_formatter_lookup, METH_VARARGS},
 	{"displayhook",	sys_displayhook, METH_O, displayhook_doc},
 	{"exc_info",	sys_exc_info, METH_NOARGS, exc_info_doc},
 	{"excepthook",	sys_excepthook, METH_VARARGS, excepthook_doc},