Issue #3163: The struct module gets new format characters 'n' and 'N'

supporting C integer types `ssize_t` and `size_t`, respectively.

Issue #3163: The struct module gets new format characters 'n' and 'N'
supporting C integer types `ssize_t` and `size_t`, respectively.
45d9c91d · Antoine Pitrou · 15a66cf1 · 45d9c91d · 45d9c91d · 45d9c91d
Commit 45d9c91d authored Oct 06, 2011 by Antoine Pitrou
Showing with 150 additions and 30 deletions

Doc/library/struct.rst Doc/library/struct.rst +17 -4

Lib/test/test_struct.py Lib/test/test_struct.py +41 -25

Misc/NEWS Misc/NEWS +3 -0

Modules/_struct.c Modules/_struct.c +89 -1

No files found.
--- a/Doc/library/struct.rst
+++ b/Doc/library/struct.rst
@@ -187,17 +187,24 @@ platform-dependent.
 | ``Q``  | :c:type:`unsigned long   | integer            | 8              | \(2), \(3) |
 |        | long`                    |                    |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``f``  | :c:type:`float`          | float              | 4              | \(4)       |
+| ``n``  | :c:type:`ssize_t`        | integer            |                | \(4)       |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``d``  | :c:type:`double`         | float              | 8              | \(4)       |
+| ``N``  | :c:type:`size_t`         | integer            |                | \(4)       |
+--------+--------------------------+--------------------+----------------+------------+
+| ``f``  | :c:type:`float`          | float              | 4              | \(5)       |
+--------+--------------------------+--------------------+----------------+------------+
+| ``d``  | :c:type:`double`         | float              | 8              | \(5)       |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``s``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
 | ``p``  | :c:type:`char[]`         | bytes              |                |            |
 +--------+--------------------------+--------------------+----------------+------------+
-| ``P``  | :c:type:`void \*`        | integer            |                | \(5)       |
+| ``P``  | :c:type:`void \*`        | integer            |                | \(6)       |
 +--------+--------------------------+--------------------+----------------+------------+
+.. versionchanged:: 3.3
+   Added support for the ``'n'`` and ``'N'`` formats.
 Notes:
 (1)
@@ -219,11 +226,17 @@ Notes:
      Use of the :meth:`__index__` method for non-integers is new in 3.2.
 (4)
+   The ``'n'`` and ``'N'`` conversion codes are only available for the native
+   size (selected as the default or with the ``'@'`` byte order character).
+   For the standard size, you can use whichever of the other integer formats
+   fits your application.
+(5)
   For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
   the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
   regardless of the floating-point format used by the platform.
-(5)
+(6)
   The ``'P'`` format character is only available for the native byte ordering
   (selected as the default or with the ``'@'`` byte order character). The byte
   order character ``'='`` chooses to use little- or big-endian ordering based

--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -8,9 +8,19 @@ from test.support import run_unittest
 ISBIGENDIAN = sys.byteorder == "big"
 IS32BIT = sys.maxsize == 0x7fffffff
-integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q'
+integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N'
 byteorders = '', '@', '=', '<', '>', '!'
+def iter_integer_formats(byteorders=byteorders):
+    for code in integer_codes:
+        for byteorder in byteorders:
+            if (byteorder in ('', '@') and code in ('q', 'Q') and
+                not HAVE_LONG_LONG):
+                continue
+            if (byteorder not in ('', '@') and code in ('n', 'N')):
+                continue
+            yield code, byteorder
 # Native 'q' packing isn't available on systems that don't have the C
 # long long type.
 try:
@@ -141,14 +151,13 @@ class StructTest(unittest.TestCase):
            }
        # standard integer sizes
-        for code in integer_codes:
+        for code, byteorder in iter_integer_formats(('=', '<', '>', '!')):
-            for byteorder in '=', '<', '>', '!':
+            format = byteorder+code
-                format = byteorder+code
+            size = struct.calcsize(format)
-                size = struct.calcsize(format)
+            self.assertEqual(size, expected_size[code])
-                self.assertEqual(size, expected_size[code])
        # native integer sizes
-        native_pairs = 'bB', 'hH', 'iI', 'lL'
+        native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN'
        if HAVE_LONG_LONG:
            native_pairs += 'qQ',
        for format_pair in native_pairs:
@@ -166,9 +175,11 @@ class StructTest(unittest.TestCase):
        if HAVE_LONG_LONG:
            self.assertLessEqual(8, struct.calcsize('q'))
            self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q'))
+        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i'))
+        self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P'))
    def test_integers(self):
-        # Integer tests (bBhHiIlLqQ).
+        # Integer tests (bBhHiIlLqQnN).
        import binascii
        class IntTester(unittest.TestCase):
@@ -182,11 +193,11 @@ class StructTest(unittest.TestCase):
                                     self.byteorder)
                self.bytesize = struct.calcsize(format)
                self.bitsize = self.bytesize * 8
-                if self.code in tuple('bhilq'):
+                if self.code in tuple('bhilqn'):
                    self.signed = True
                    self.min_value = -(2**(self.bitsize-1))
                    self.max_value = 2**(self.bitsize-1) - 1
-                elif self.code in tuple('BHILQ'):
+                elif self.code in tuple('BHILQN'):
                    self.signed = False
                    self.min_value = 0
                    self.max_value = 2**self.bitsize - 1
@@ -316,14 +327,23 @@ class StructTest(unittest.TestCase):
                                      struct.pack, self.format,
                                      obj)
-        for code in integer_codes:
+        for code, byteorder in iter_integer_formats():
-            for byteorder in byteorders:
+            format = byteorder+code
-                if (byteorder in ('', '@') and code in ('q', 'Q') and
+            t = IntTester(format)
-                    not HAVE_LONG_LONG):
+            t.run()
-                    continue
+    def test_nN_code(self):
+        # n and N don't exist in standard sizes
+        def assertStructError(func, *args, **kwargs):
+            with self.assertRaises(struct.error) as cm:
+                func(*args, **kwargs)
+            self.assertIn("bad char in struct format", str(cm.exception))
+        for code in 'nN':
+            for byteorder in ('=', '<', '>', '!'):
                format = byteorder+code
-                t = IntTester(format)
+                assertStructError(struct.calcsize, format)
-                t.run()
+                assertStructError(struct.pack, format, 0)
+                assertStructError(struct.unpack, format, b"")
    def test_p_code(self):
        # Test p ("Pascal string") code.
@@ -377,14 +397,10 @@ class StructTest(unittest.TestCase):
        self.assertRaises(OverflowError, struct.pack, ">f", big)
    def test_1530559(self):
-        for byteorder in '', '@', '=', '<', '>', '!':
+        for code, byteorder in iter_integer_formats():
-            for code in integer_codes:
+            format = byteorder + code
-                if (byteorder in ('', '@') and code in ('q', 'Q') and
+            self.assertRaises(struct.error, struct.pack, format, 1.0)
-                    not HAVE_LONG_LONG):
+            self.assertRaises(struct.error, struct.pack, format, 1.5)
-                    continue
-                format = byteorder + code
-                self.assertRaises(struct.error, struct.pack, format, 1.0)
-                self.assertRaises(struct.error, struct.pack, format, 1.5)
        self.assertRaises(struct.error, struct.pack, 'P', 1.0)
        self.assertRaises(struct.error, struct.pack, 'P', 1.5)

--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -294,6 +294,9 @@ Core and Builtins
 Library
 -------
+- Issue #3163: The struct module gets new format characters 'n' and 'N'
+  supporting C integer types ``ssize_t`` and ``size_t``, respectively.
 - Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale.
  Reported and diagnosed by Thomas Kluyver.

--- a/Modules/_struct.c
+++ b/Modules/_struct.c
@@ -58,6 +58,7 @@ typedef struct { char c; long x; } st_long;
 typedef struct { char c; float x; } st_float;
 typedef struct { char c; double x; } st_double;
 typedef struct { char c; void *x; } st_void_p;
+typedef struct { char c; size_t x; } st_size_t;
 #define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
 #define INT_ALIGN (sizeof(st_int) - sizeof(int))
@@ -65,6 +66,7 @@ typedef struct { char c; void *x; } st_void_p;
 #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
 #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
 #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
+#define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t))
 /* We can't support q and Q in native mode unless the compiler does;
   in std mode, they're 8 bytes on all platforms. */
@@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)
 #endif
+/* Same, but handling Py_ssize_t */
+static int
+get_ssize_t(PyObject *v, Py_ssize_t *p)
+{
+    Py_ssize_t x;
+    v = get_pylong(v);
+    if (v == NULL)
+        return -1;
+    assert(PyLong_Check(v));
+    x = PyLong_AsSsize_t(v);
+    Py_DECREF(v);
+    if (x == (Py_ssize_t)-1 && PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_OverflowError))
+            PyErr_SetString(StructError,
+                            "argument out of range");
+        return -1;
+    }
+    *p = x;
+    return 0;
+}
+/* Same, but handling size_t */
+static int
+get_size_t(PyObject *v, size_t *p)
+{
+    size_t x;
+    v = get_pylong(v);
+    if (v == NULL)
+        return -1;
+    assert(PyLong_Check(v));
+    x = PyLong_AsSize_t(v);
+    Py_DECREF(v);
+    if (x == (size_t)-1 && PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_OverflowError))
+            PyErr_SetString(StructError,
+                            "argument out of range");
+        return -1;
+    }
+    *p = x;
+    return 0;
+}
 #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)
@@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f)
    return PyLong_FromUnsignedLong(x);
 }
+static PyObject *
+nu_ssize_t(const char *p, const formatdef *f)
+{
+    Py_ssize_t x;
+    memcpy((char *)&x, p, sizeof x);
+    return PyLong_FromSsize_t(x);
+}
+static PyObject *
+nu_size_t(const char *p, const formatdef *f)
+{
+    size_t x;
+    memcpy((char *)&x, p, sizeof x);
+    return PyLong_FromSize_t(x);
+}
 /* Native mode doesn't support q or Q unless the platform C supports
   long long (or, on Windows, __int64). */
@@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f)
    return 0;
 }
+static int
+np_ssize_t(char *p, PyObject *v, const formatdef *f)
+{
+    Py_ssize_t x;
+    if (get_ssize_t(v, &x) < 0)
+        return -1;
+    memcpy(p, (char *)&x, sizeof x);
+    return 0;
+}
+static int
+np_size_t(char *p, PyObject *v, const formatdef *f)
+{
+    size_t x;
+    if (get_size_t(v, &x) < 0)
+        return -1;
+    memcpy(p, (char *)&x, sizeof x);
+    return 0;
+}
 #ifdef HAVE_LONG_LONG
 static int
@@ -651,6 +736,8 @@ static formatdef native_table[] = {
    {'I',       sizeof(int),    INT_ALIGN,      nu_uint,        np_uint},
    {'l',       sizeof(long),   LONG_ALIGN,     nu_long,        np_long},
    {'L',       sizeof(long),   LONG_ALIGN,     nu_ulong,       np_ulong},
+    {'n',       sizeof(size_t), SIZE_T_ALIGN,   nu_ssize_t,     np_ssize_t},
+    {'N',       sizeof(size_t), SIZE_T_ALIGN,   nu_size_t,      np_size_t},
 #ifdef HAVE_LONG_LONG
    {'q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
    {'Q',       sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
@@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\
  l:long; L:unsigned long; f:float; d:double.\n\
 Special cases (preceding decimal count indicates length):\n\
  s:string (array of char); p: pascal string (with count byte).\n\
-Special case (only available in native format):\n\
+Special cases (only available in native format):\n\
+  n:ssize_t; N:size_t;\n\
  P:an integer type that is wide enough to hold a pointer.\n\
 Special case (not in native mode unless 'long long' in platform C):\n\
  q:long long; Q:unsigned long long\n\