Commit 73926487 authored by Kirill Smelkov's avatar Kirill Smelkov

*: It is not safe to use multiply.reduce() - it overflows

e.g.

    In [1]: multiply.reduce((1<<30, 1<<30, 1<<30))
    Out[1]: 0

instead of

    In [2]: (1<<30) * (1<<30) * (1<<30)
    Out[2]: 1237940039285380274899124224

    In [3]: 1<<90
    Out[3]: 1237940039285380274899124224

also multiply.reduce returns int64, instead of python int:

    In [4]: type( multiply.reduce([1,2,3]) )
    Out[4]: numpy.int64

which also leads to overflow-related problems if we further compute with
this value and other integers and results exceeds int64 - it becomes
float:

    In [5]: idx0_stop = 18446744073709551615

    In [6]: stride0   = numpy.int64(1)

    In [7]: byte0_stop = idx0_stop * stride0

    In [8]: byte0_stop
    Out[8]: 1.8446744073709552e+19

and then it becomes a real problem for BigArray.__getitem__()

    wendelin.core/bigarray/__init__.py:326: RuntimeWarning: overflow encountered in long_scalars
      page0_min  = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize

and then

    >           vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1)
    E           TypeError: integer argument expected, got float

~~~~

So just avoid multiple.reduce() and do our own mul() properly the same
way sum() is builtin into python, and we avoid overflow-related
problems.
parent d59b15a3
...@@ -35,6 +35,7 @@ of physical RAM. ...@@ -35,6 +35,7 @@ of physical RAM.
""" """
from __future__ import print_function from __future__ import print_function
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, multiply, sign, newaxis from numpy import ndarray, dtype, multiply, sign, newaxis
...@@ -82,8 +83,7 @@ class BigArray(object): ...@@ -82,8 +83,7 @@ class BigArray(object):
# shape, dtype -> ._stridev # shape, dtype -> ._stridev
# TODO take dtype.alignment into account ? # TODO take dtype.alignment into account ?
# NOTE (1,) so that multiply.reduce return 1 (not 1.0) for [] self._stridev = tuple( mul(shape[i+1:]) * self._dtype.itemsize \
self._stridev = tuple( multiply.reduce((1,) + shape[i+1:]) * self._dtype.itemsize \
for i in range(len(shape)) ) for i in range(len(shape)) )
...@@ -110,7 +110,7 @@ class BigArray(object): ...@@ -110,7 +110,7 @@ class BigArray(object):
@property @property
def size(self): def size(self):
return multiply.reduce(self._shape) return mul(self._shape)
def __len__(self): def __len__(self):
# lengths of the first axis # lengths of the first axis
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
from wendelin.bigarray import BigArray from wendelin.bigarray import BigArray
from wendelin.bigfile import BigFile from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \ from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \
multiply, array_equal, asarray multiply, array_equal, asarray
...@@ -288,7 +289,7 @@ def test_bigarray_indexing_Nd(): ...@@ -288,7 +289,7 @@ def test_bigarray_indexing_Nd():
# test data - all items are unique - so we can check array by content # test data - all items are unique - so we can check array by content
# NOTE +PS so that BigFile_Data has no problem loading last blk # NOTE +PS so that BigFile_Data has no problem loading last blk
# (else data slice will be smaller than buf) # (else data slice will be smaller than buf)
data = arange(multiply.reduce(shape) + PS, dtype=uint32) data = arange(mul(shape) + PS, dtype=uint32)
# synthetic bigfile that only loads data from numpy array # synthetic bigfile that only loads data from numpy array
class BigFile_Data_RO(BigFile_Data): class BigFile_Data_RO(BigFile_Data):
...@@ -299,7 +300,7 @@ def test_bigarray_indexing_Nd(): ...@@ -299,7 +300,7 @@ def test_bigarray_indexing_Nd():
fh = f.fileh_open() fh = f.fileh_open()
A = BigArray(shape, uint32, fh) # bigarray with test data and shape A = BigArray(shape, uint32, fh) # bigarray with test data and shape
A_ = data[:multiply.reduce(shape)].reshape(shape) # ndarray ----//---- A_ = data[:mul(shape)].reshape(shape) # ndarray ----//----
# AA[key] -> A[key], A_[key] # AA[key] -> A[key], A_[key]
AA = DoubleGet(A, A_) AA = DoubleGet(A, A_)
......
# -*- coding: utf-8 -*-
# Wendelin.core | Calculation helpers
# Copyright (C) 2015 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Open Source Initiative approved licenses and Convey
# the resulting work. Corresponding source of such a combination shall include
# the source code for all other software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# like multiply.reduce() but does not overflow
# ( in other words `mul` is for * the same as `sum` is for + )
def mul(args, start=1):
v = start
for x in args:
v *= x
return v
# -*- coding: utf-8 -*-
# Wendelin.core.calc | Tests
# Copyright (C) 2015 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Open Source Initiative approved licenses and Convey
# the resulting work. Corresponding source of such a combination shall include
# the source code for all other software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
from wendelin.lib.calc import mul
def test_mul():
assert mul([1]) == 1
assert mul([1], 5) == 5
assert mul([0], 5) == 0
assert mul([1,2,3,4,5]) == 120
assert mul([1,2,3,4,5,6]) == 720
# check it does not overflow
assert mul([1<<30, 1<<30, 1<<30]) == 1<<90
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment