Commit 73926487 by Kirill Smelkov

### *: It is not safe to use multiply.reduce() - it overflows

```e.g.

In [1]: multiply.reduce((1<<30, 1<<30, 1<<30))
Out[1]: 0

In [2]: (1<<30) * (1<<30) * (1<<30)
Out[2]: 1237940039285380274899124224

In [3]: 1<<90
Out[3]: 1237940039285380274899124224

also multiply.reduce returns int64, instead of python int:

In [4]: type( multiply.reduce([1,2,3]) )
Out[4]: numpy.int64

which also leads to overflow-related problems if we further compute with
this value and other integers and results exceeds int64 - it becomes
float:

In [5]: idx0_stop = 18446744073709551615

In [6]: stride0   = numpy.int64(1)

In [7]: byte0_stop = idx0_stop * stride0

In [8]: byte0_stop
Out[8]: 1.8446744073709552e+19

and then it becomes a real problem for BigArray.__getitem__()

wendelin.core/bigarray/__init__.py:326: RuntimeWarning: overflow encountered in long_scalars
page0_min  = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize

and then

>           vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1)
E           TypeError: integer argument expected, got float

~~~~

So just avoid multiple.reduce() and do our own mul() properly the same
way sum() is builtin into python, and we avoid overflow-related
problems.```
parent d59b15a3
 ... ... @@ -35,6 +35,7 @@ of physical RAM. """ from __future__ import print_function from wendelin.lib.calc import mul from numpy import ndarray, dtype, multiply, sign, newaxis ... ... @@ -82,8 +83,7 @@ class BigArray(object): # shape, dtype -> ._stridev # TODO take dtype.alignment into account ? # NOTE (1,) so that multiply.reduce return 1 (not 1.0) for [] self._stridev = tuple( multiply.reduce((1,) + shape[i+1:]) * self._dtype.itemsize \ self._stridev = tuple( mul(shape[i+1:]) * self._dtype.itemsize \ for i in range(len(shape)) ) ... ... @@ -110,7 +110,7 @@ class BigArray(object): @property def size(self): return multiply.reduce(self._shape) return mul(self._shape) def __len__(self): # lengths of the first axis ... ...
 ... ... @@ -19,6 +19,7 @@ from wendelin.bigarray import BigArray from wendelin.bigfile import BigFile from wendelin.lib.mem import memcpy from wendelin.lib.calc import mul from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \ multiply, array_equal, asarray ... ... @@ -288,7 +289,7 @@ def test_bigarray_indexing_Nd(): # test data - all items are unique - so we can check array by content # NOTE +PS so that BigFile_Data has no problem loading last blk # (else data slice will be smaller than buf) data = arange(multiply.reduce(shape) + PS, dtype=uint32) data = arange(mul(shape) + PS, dtype=uint32) # synthetic bigfile that only loads data from numpy array class BigFile_Data_RO(BigFile_Data): ... ... @@ -299,7 +300,7 @@ def test_bigarray_indexing_Nd(): fh = f.fileh_open() A = BigArray(shape, uint32, fh) # bigarray with test data and shape A_ = data[:multiply.reduce(shape)].reshape(shape) # ndarray ----//---- A_ = data[:mul(shape)].reshape(shape) # ndarray ----//---- # AA[key] -> A[key], A_[key] AA = DoubleGet(A, A_) ... ...
lib/calc.py 0 → 100644
 # -*- coding: utf-8 -*- # Wendelin.core | Calculation helpers # Copyright (C) 2015 Nexedi SA and Contributors. # Kirill Smelkov # # This program is free software: you can Use, Study, Modify and Redistribute # it under the terms of the GNU General Public License version 3, or (at your # option) any later version, as published by the Free Software Foundation. # # You can also Link and Combine this program with other software covered by # the terms of any of the Open Source Initiative approved licenses and Convey # the resulting work. Corresponding source of such a combination shall include # the source code for all other software used. # # This program is distributed WITHOUT ANY WARRANTY; without even the implied # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See COPYING file for full licensing terms. # like multiply.reduce() but does not overflow # ( in other words `mul` is for * the same as `sum` is for + ) def mul(args, start=1): v = start for x in args: v *= x return v
 # -*- coding: utf-8 -*- # Wendelin.core.calc | Tests # Copyright (C) 2015 Nexedi SA and Contributors. # Kirill Smelkov # # This program is free software: you can Use, Study, Modify and Redistribute # it under the terms of the GNU General Public License version 3, or (at your # option) any later version, as published by the Free Software Foundation. # # You can also Link and Combine this program with other software covered by # the terms of any of the Open Source Initiative approved licenses and Convey # the resulting work. Corresponding source of such a combination shall include # the source code for all other software used. # # This program is distributed WITHOUT ANY WARRANTY; without even the implied # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # See COPYING file for full licensing terms. from wendelin.lib.calc import mul def test_mul(): assert mul([1]) == 1 assert mul([1], 5) == 5 assert mul([0], 5) == 0 assert mul([1,2,3,4,5]) == 120 assert mul([1,2,3,4,5,6]) == 720 # check it does not overflow assert mul([1<<30, 1<<30, 1<<30]) == 1<<90
