Commit 45d9c91d authored by Antoine Pitrou's avatar Antoine Pitrou

Issue #3163: The struct module gets new format characters 'n' and 'N'

supporting C integer types `ssize_t` and `size_t`, respectively.
parent 15a66cf1
...@@ -187,17 +187,24 @@ platform-dependent. ...@@ -187,17 +187,24 @@ platform-dependent.
| ``Q`` | :c:type:`unsigned long | integer | 8 | \(2), \(3) | | ``Q`` | :c:type:`unsigned long | integer | 8 | \(2), \(3) |
| | long` | | | | | | long` | | | |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
| ``f`` | :c:type:`float` | float | 4 | \(4) | | ``n`` | :c:type:`ssize_t` | integer | | \(4) |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
| ``d`` | :c:type:`double` | float | 8 | \(4) | | ``N`` | :c:type:`size_t` | integer | | \(4) |
+--------+--------------------------+--------------------+----------------+------------+
| ``f`` | :c:type:`float` | float | 4 | \(5) |
+--------+--------------------------+--------------------+----------------+------------+
| ``d`` | :c:type:`double` | float | 8 | \(5) |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
| ``s`` | :c:type:`char[]` | bytes | | | | ``s`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
| ``p`` | :c:type:`char[]` | bytes | | | | ``p`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
| ``P`` | :c:type:`void \*` | integer | | \(5) | | ``P`` | :c:type:`void \*` | integer | | \(6) |
+--------+--------------------------+--------------------+----------------+------------+ +--------+--------------------------+--------------------+----------------+------------+
.. versionchanged:: 3.3
Added support for the ``'n'`` and ``'N'`` formats.
Notes: Notes:
(1) (1)
...@@ -219,11 +226,17 @@ Notes: ...@@ -219,11 +226,17 @@ Notes:
Use of the :meth:`__index__` method for non-integers is new in 3.2. Use of the :meth:`__index__` method for non-integers is new in 3.2.
(4) (4)
The ``'n'`` and ``'N'`` conversion codes are only available for the native
size (selected as the default or with the ``'@'`` byte order character).
For the standard size, you can use whichever of the other integer formats
fits your application.
(5)
For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format, the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
regardless of the floating-point format used by the platform. regardless of the floating-point format used by the platform.
(5) (6)
The ``'P'`` format character is only available for the native byte ordering The ``'P'`` format character is only available for the native byte ordering
(selected as the default or with the ``'@'`` byte order character). The byte (selected as the default or with the ``'@'`` byte order character). The byte
order character ``'='`` chooses to use little- or big-endian ordering based order character ``'='`` chooses to use little- or big-endian ordering based
......
...@@ -8,9 +8,19 @@ from test.support import run_unittest ...@@ -8,9 +8,19 @@ from test.support import run_unittest
ISBIGENDIAN = sys.byteorder == "big" ISBIGENDIAN = sys.byteorder == "big"
IS32BIT = sys.maxsize == 0x7fffffff IS32BIT = sys.maxsize == 0x7fffffff
integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q' integer_codes = 'b', 'B', 'h', 'H', 'i', 'I', 'l', 'L', 'q', 'Q', 'n', 'N'
byteorders = '', '@', '=', '<', '>', '!' byteorders = '', '@', '=', '<', '>', '!'
def iter_integer_formats(byteorders=byteorders):
for code in integer_codes:
for byteorder in byteorders:
if (byteorder in ('', '@') and code in ('q', 'Q') and
not HAVE_LONG_LONG):
continue
if (byteorder not in ('', '@') and code in ('n', 'N')):
continue
yield code, byteorder
# Native 'q' packing isn't available on systems that don't have the C # Native 'q' packing isn't available on systems that don't have the C
# long long type. # long long type.
try: try:
...@@ -141,14 +151,13 @@ class StructTest(unittest.TestCase): ...@@ -141,14 +151,13 @@ class StructTest(unittest.TestCase):
} }
# standard integer sizes # standard integer sizes
for code in integer_codes: for code, byteorder in iter_integer_formats(('=', '<', '>', '!')):
for byteorder in '=', '<', '>', '!': format = byteorder+code
format = byteorder+code size = struct.calcsize(format)
size = struct.calcsize(format) self.assertEqual(size, expected_size[code])
self.assertEqual(size, expected_size[code])
# native integer sizes # native integer sizes
native_pairs = 'bB', 'hH', 'iI', 'lL' native_pairs = 'bB', 'hH', 'iI', 'lL', 'nN'
if HAVE_LONG_LONG: if HAVE_LONG_LONG:
native_pairs += 'qQ', native_pairs += 'qQ',
for format_pair in native_pairs: for format_pair in native_pairs:
...@@ -166,9 +175,11 @@ class StructTest(unittest.TestCase): ...@@ -166,9 +175,11 @@ class StructTest(unittest.TestCase):
if HAVE_LONG_LONG: if HAVE_LONG_LONG:
self.assertLessEqual(8, struct.calcsize('q')) self.assertLessEqual(8, struct.calcsize('q'))
self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q')) self.assertLessEqual(struct.calcsize('l'), struct.calcsize('q'))
self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('i'))
self.assertGreaterEqual(struct.calcsize('n'), struct.calcsize('P'))
def test_integers(self): def test_integers(self):
# Integer tests (bBhHiIlLqQ). # Integer tests (bBhHiIlLqQnN).
import binascii import binascii
class IntTester(unittest.TestCase): class IntTester(unittest.TestCase):
...@@ -182,11 +193,11 @@ class StructTest(unittest.TestCase): ...@@ -182,11 +193,11 @@ class StructTest(unittest.TestCase):
self.byteorder) self.byteorder)
self.bytesize = struct.calcsize(format) self.bytesize = struct.calcsize(format)
self.bitsize = self.bytesize * 8 self.bitsize = self.bytesize * 8
if self.code in tuple('bhilq'): if self.code in tuple('bhilqn'):
self.signed = True self.signed = True
self.min_value = -(2**(self.bitsize-1)) self.min_value = -(2**(self.bitsize-1))
self.max_value = 2**(self.bitsize-1) - 1 self.max_value = 2**(self.bitsize-1) - 1
elif self.code in tuple('BHILQ'): elif self.code in tuple('BHILQN'):
self.signed = False self.signed = False
self.min_value = 0 self.min_value = 0
self.max_value = 2**self.bitsize - 1 self.max_value = 2**self.bitsize - 1
...@@ -316,14 +327,23 @@ class StructTest(unittest.TestCase): ...@@ -316,14 +327,23 @@ class StructTest(unittest.TestCase):
struct.pack, self.format, struct.pack, self.format,
obj) obj)
for code in integer_codes: for code, byteorder in iter_integer_formats():
for byteorder in byteorders: format = byteorder+code
if (byteorder in ('', '@') and code in ('q', 'Q') and t = IntTester(format)
not HAVE_LONG_LONG): t.run()
continue
def test_nN_code(self):
# n and N don't exist in standard sizes
def assertStructError(func, *args, **kwargs):
with self.assertRaises(struct.error) as cm:
func(*args, **kwargs)
self.assertIn("bad char in struct format", str(cm.exception))
for code in 'nN':
for byteorder in ('=', '<', '>', '!'):
format = byteorder+code format = byteorder+code
t = IntTester(format) assertStructError(struct.calcsize, format)
t.run() assertStructError(struct.pack, format, 0)
assertStructError(struct.unpack, format, b"")
def test_p_code(self): def test_p_code(self):
# Test p ("Pascal string") code. # Test p ("Pascal string") code.
...@@ -377,14 +397,10 @@ class StructTest(unittest.TestCase): ...@@ -377,14 +397,10 @@ class StructTest(unittest.TestCase):
self.assertRaises(OverflowError, struct.pack, ">f", big) self.assertRaises(OverflowError, struct.pack, ">f", big)
def test_1530559(self): def test_1530559(self):
for byteorder in '', '@', '=', '<', '>', '!': for code, byteorder in iter_integer_formats():
for code in integer_codes: format = byteorder + code
if (byteorder in ('', '@') and code in ('q', 'Q') and self.assertRaises(struct.error, struct.pack, format, 1.0)
not HAVE_LONG_LONG): self.assertRaises(struct.error, struct.pack, format, 1.5)
continue
format = byteorder + code
self.assertRaises(struct.error, struct.pack, format, 1.0)
self.assertRaises(struct.error, struct.pack, format, 1.5)
self.assertRaises(struct.error, struct.pack, 'P', 1.0) self.assertRaises(struct.error, struct.pack, 'P', 1.0)
self.assertRaises(struct.error, struct.pack, 'P', 1.5) self.assertRaises(struct.error, struct.pack, 'P', 1.5)
......
...@@ -294,6 +294,9 @@ Core and Builtins ...@@ -294,6 +294,9 @@ Core and Builtins
Library Library
------- -------
- Issue #3163: The struct module gets new format characters 'n' and 'N'
supporting C integer types ``ssize_t`` and ``size_t``, respectively.
- Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale. - Issue #13099: Fix sqlite3.Cursor.lastrowid under a Turkish locale.
Reported and diagnosed by Thomas Kluyver. Reported and diagnosed by Thomas Kluyver.
......
...@@ -58,6 +58,7 @@ typedef struct { char c; long x; } st_long; ...@@ -58,6 +58,7 @@ typedef struct { char c; long x; } st_long;
typedef struct { char c; float x; } st_float; typedef struct { char c; float x; } st_float;
typedef struct { char c; double x; } st_double; typedef struct { char c; double x; } st_double;
typedef struct { char c; void *x; } st_void_p; typedef struct { char c; void *x; } st_void_p;
typedef struct { char c; size_t x; } st_size_t;
#define SHORT_ALIGN (sizeof(st_short) - sizeof(short)) #define SHORT_ALIGN (sizeof(st_short) - sizeof(short))
#define INT_ALIGN (sizeof(st_int) - sizeof(int)) #define INT_ALIGN (sizeof(st_int) - sizeof(int))
...@@ -65,6 +66,7 @@ typedef struct { char c; void *x; } st_void_p; ...@@ -65,6 +66,7 @@ typedef struct { char c; void *x; } st_void_p;
#define FLOAT_ALIGN (sizeof(st_float) - sizeof(float)) #define FLOAT_ALIGN (sizeof(st_float) - sizeof(float))
#define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double)) #define DOUBLE_ALIGN (sizeof(st_double) - sizeof(double))
#define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *)) #define VOID_P_ALIGN (sizeof(st_void_p) - sizeof(void *))
#define SIZE_T_ALIGN (sizeof(st_size_t) - sizeof(size_t))
/* We can't support q and Q in native mode unless the compiler does; /* We can't support q and Q in native mode unless the compiler does;
in std mode, they're 8 bytes on all platforms. */ in std mode, they're 8 bytes on all platforms. */
...@@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p) ...@@ -213,6 +215,52 @@ get_ulonglong(PyObject *v, unsigned PY_LONG_LONG *p)
#endif #endif
/* Same, but handling Py_ssize_t */
static int
get_ssize_t(PyObject *v, Py_ssize_t *p)
{
Py_ssize_t x;
v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsSsize_t(v);
Py_DECREF(v);
if (x == (Py_ssize_t)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(StructError,
"argument out of range");
return -1;
}
*p = x;
return 0;
}
/* Same, but handling size_t */
static int
get_size_t(PyObject *v, size_t *p)
{
size_t x;
v = get_pylong(v);
if (v == NULL)
return -1;
assert(PyLong_Check(v));
x = PyLong_AsSize_t(v);
Py_DECREF(v);
if (x == (size_t)-1 && PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(StructError,
"argument out of range");
return -1;
}
*p = x;
return 0;
}
#define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag) #define RANGE_ERROR(x, f, flag, mask) return _range_error(f, flag)
...@@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f) ...@@ -369,6 +417,23 @@ nu_ulong(const char *p, const formatdef *f)
return PyLong_FromUnsignedLong(x); return PyLong_FromUnsignedLong(x);
} }
static PyObject *
nu_ssize_t(const char *p, const formatdef *f)
{
Py_ssize_t x;
memcpy((char *)&x, p, sizeof x);
return PyLong_FromSsize_t(x);
}
static PyObject *
nu_size_t(const char *p, const formatdef *f)
{
size_t x;
memcpy((char *)&x, p, sizeof x);
return PyLong_FromSize_t(x);
}
/* Native mode doesn't support q or Q unless the platform C supports /* Native mode doesn't support q or Q unless the platform C supports
long long (or, on Windows, __int64). */ long long (or, on Windows, __int64). */
...@@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f) ...@@ -558,6 +623,26 @@ np_ulong(char *p, PyObject *v, const formatdef *f)
return 0; return 0;
} }
static int
np_ssize_t(char *p, PyObject *v, const formatdef *f)
{
Py_ssize_t x;
if (get_ssize_t(v, &x) < 0)
return -1;
memcpy(p, (char *)&x, sizeof x);
return 0;
}
static int
np_size_t(char *p, PyObject *v, const formatdef *f)
{
size_t x;
if (get_size_t(v, &x) < 0)
return -1;
memcpy(p, (char *)&x, sizeof x);
return 0;
}
#ifdef HAVE_LONG_LONG #ifdef HAVE_LONG_LONG
static int static int
...@@ -651,6 +736,8 @@ static formatdef native_table[] = { ...@@ -651,6 +736,8 @@ static formatdef native_table[] = {
{'I', sizeof(int), INT_ALIGN, nu_uint, np_uint}, {'I', sizeof(int), INT_ALIGN, nu_uint, np_uint},
{'l', sizeof(long), LONG_ALIGN, nu_long, np_long}, {'l', sizeof(long), LONG_ALIGN, nu_long, np_long},
{'L', sizeof(long), LONG_ALIGN, nu_ulong, np_ulong}, {'L', sizeof(long), LONG_ALIGN, nu_ulong, np_ulong},
{'n', sizeof(size_t), SIZE_T_ALIGN, nu_ssize_t, np_ssize_t},
{'N', sizeof(size_t), SIZE_T_ALIGN, nu_size_t, np_size_t},
#ifdef HAVE_LONG_LONG #ifdef HAVE_LONG_LONG
{'q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong}, {'q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_longlong, np_longlong},
{'Q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong}, {'Q', sizeof(PY_LONG_LONG), LONG_LONG_ALIGN, nu_ulonglong,np_ulonglong},
...@@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\ ...@@ -1951,7 +2038,8 @@ these can be preceded by a decimal repeat count:\n\
l:long; L:unsigned long; f:float; d:double.\n\ l:long; L:unsigned long; f:float; d:double.\n\
Special cases (preceding decimal count indicates length):\n\ Special cases (preceding decimal count indicates length):\n\
s:string (array of char); p: pascal string (with count byte).\n\ s:string (array of char); p: pascal string (with count byte).\n\
Special case (only available in native format):\n\ Special cases (only available in native format):\n\
n:ssize_t; N:size_t;\n\
P:an integer type that is wide enough to hold a pointer.\n\ P:an integer type that is wide enough to hold a pointer.\n\
Special case (not in native mode unless 'long long' in platform C):\n\ Special case (not in native mode unless 'long long' in platform C):\n\
q:long long; Q:unsigned long long\n\ q:long long; Q:unsigned long long\n\
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment