*: It is not safe to use multiply.reduce() - it overflows

e.g. In [1]: multiply.reduce((1<<30, 1<<30, 1<<30)) Out[1]: 0 instead of In [2]: (1<<30) * (1<<30) * (1<<30) Out[2]: 1237940039285380274899124224 In [3]: 1<<90 Out[3]: 1237940039285380274899124224 also multiply.reduce returns int64, instead of python int: In [4]: type( multiply.reduce([1,2,3]) ) Out[4]: numpy.int64 which also leads to overflow-related problems if we further compute with this value and other integers and results exceeds int64 - it becomes float: In [5]: idx0_stop = 18446744073709551615 In [6]: stride0 = numpy.int64(1) In [7]: byte0_stop = idx0_stop * stride0 In [8]: byte0_stop Out[8]: 1.8446744073709552e+19 and then it becomes a real problem for BigArray.__getitem__() wendelin.core/bigarray/__init__.py:326: RuntimeWarning: overflow encountered in long_scalars page0_min = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize and then > vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1) E TypeError: integer argument expected, got float ~~~~ So just avoid multiple.reduce() and do our own mul() properly the same way sum() is builtin into python, and we avoid overflow-related problems.

*: It is not safe to use multiply.reduce() - it overflows
e.g. In [1]: multiply.reduce((1<<30, 1<<30, 1<<30)) Out[1]: 0 instead of In [2]: (1<<30) * (1<<30) * (1<<30) Out[2]: 1237940039285380274899124224 In [3]: 1<<90 Out[3]: 1237940039285380274899124224 also multiply.reduce returns int64, instead of python int: In [4]: type( multiply.reduce([1,2,3]) ) Out[4]: numpy.int64 which also leads to overflow-related problems if we further compute with this value and other integers and results exceeds int64 - it becomes float: In [5]: idx0_stop = 18446744073709551615 In [6]: stride0 = numpy.int64(1) In [7]: byte0_stop = idx0_stop * stride0 In [8]: byte0_stop Out[8]: 1.8446744073709552e+19 and then it becomes a real problem for BigArray.__getitem__() wendelin.core/bigarray/__init__.py:326: RuntimeWarning: overflow encountered in long_scalars page0_min = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize and then > vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1) E TypeError: integer argument expected, got float ~~~~ So just avoid multiple.reduce() and do our own mul() properly the same way sum() is builtin into python, and we avoid overflow-related problems.
73926487 · Kirill Smelkov · d59b15a3 · 73926487 · 73926487 · 73926487
Commit 73926487 authored Jun 02, 2015 by Kirill Smelkov
5 changed files
--- a/bigarray/__init__.py
+++ b/bigarray/__init__.py
@@ -35,6 +35,7 @@ of physical RAM.
 """

 from __future__ import print_function
+from wendelin.lib.calc import mul
 from numpy import ndarray, dtype, multiply, sign, newaxis


@@ -82,8 +83,7 @@ class BigArray(object):

        # shape, dtype -> ._stridev
        # TODO take dtype.alignment into account ?
-        # NOTE (1,) so that multiply.reduce return 1 (not 1.0) for []
-        self._stridev = tuple( multiply.reduce((1,) + shape[i+1:]) * self._dtype.itemsize  \
+        self._stridev = tuple( mul(shape[i+1:]) * self._dtype.itemsize  \
                                    for i in range(len(shape)) )


@@ -110,7 +110,7 @@ class BigArray(object):

    @property
    def size(self):
-        return multiply.reduce(self._shape)
+        return mul(self._shape)

    def __len__(self):
        # lengths of the first axis

--- a/bigarray/tests/test_basic.py
+++ b/bigarray/tests/test_basic.py
@@ -19,6 +19,7 @@
 from wendelin.bigarray import BigArray
 from wendelin.bigfile import BigFile
 from wendelin.lib.mem import memcpy
+from wendelin.lib.calc import mul
 from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \
        multiply, array_equal, asarray

@@ -288,7 +289,7 @@ def test_bigarray_indexing_Nd():
    # test data - all items are unique - so we can check array by content
    # NOTE +PS so that BigFile_Data has no problem loading last blk
    #      (else data slice will be smaller than buf)
-    data  = arange(multiply.reduce(shape) + PS, dtype=uint32)
+    data  = arange(mul(shape) + PS, dtype=uint32)

    # synthetic bigfile that only loads data from numpy array
    class BigFile_Data_RO(BigFile_Data):
@@ -299,7 +300,7 @@ def test_bigarray_indexing_Nd():
    fh = f.fileh_open()

    A  = BigArray(shape, uint32, fh)                    # bigarray with test data and shape
-    A_ = data[:multiply.reduce(shape)].reshape(shape)   # ndarray  ----//----
+    A_ = data[:mul(shape)].reshape(shape)               # ndarray  ----//----

    # AA[key] -> A[key], A_[key]
    AA = DoubleGet(A, A_)

--- a/lib/calc.py
+++ b/lib/calc.py
+# -*- coding: utf-8 -*-
+# Wendelin.core | Calculation helpers
+# Copyright (C) 2015  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Open Source Initiative approved licenses and Convey
+# the resulting work. Corresponding source of such a combination shall include
+# the source code for all other software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+
+
+# like multiply.reduce() but does not overflow
+# ( in other words `mul` is for * the same as `sum` is for + )
+def mul(args, start=1):
+    v = start
+    for x in args:
+        v *= x
+    return v
--- a/lib/tests/__init__.py
+++ b/lib/tests/__init__.py
--- a/lib/tests/test_calc.py
+++ b/lib/tests/test_calc.py
+# -*- coding: utf-8 -*-
+# Wendelin.core.calc | Tests
+# Copyright (C) 2015  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Open Source Initiative approved licenses and Convey
+# the resulting work. Corresponding source of such a combination shall include
+# the source code for all other software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+from wendelin.lib.calc import mul
+
+def test_mul():
+    assert mul([1]) == 1
+    assert mul([1], 5) == 5
+    assert mul([0], 5) == 0
+    assert mul([1,2,3,4,5]) == 120
+    assert mul([1,2,3,4,5,6]) == 720
+
+    # check it does not overflow
+    assert mul([1<<30, 1<<30, 1<<30]) == 1<<90