Commit 73926487 authored by Kirill Smelkov's avatar Kirill Smelkov

*: It is not safe to use multiply.reduce() - it overflows

e.g.

    In [1]: multiply.reduce((1<<30, 1<<30, 1<<30))
    Out[1]: 0

instead of

    In [2]: (1<<30) * (1<<30) * (1<<30)
    Out[2]: 1237940039285380274899124224

    In [3]: 1<<90
    Out[3]: 1237940039285380274899124224

also multiply.reduce returns int64, instead of python int:

    In [4]: type( multiply.reduce([1,2,3]) )
    Out[4]: numpy.int64

which also leads to overflow-related problems if we further compute with
this value and other integers and results exceeds int64 - it becomes
float:

    In [5]: idx0_stop = 18446744073709551615

    In [6]: stride0   = numpy.int64(1)

    In [7]: byte0_stop = idx0_stop * stride0

    In [8]: byte0_stop
    Out[8]: 1.8446744073709552e+19

and then it becomes a real problem for BigArray.__getitem__()

    wendelin.core/bigarray/__init__.py:326: RuntimeWarning: overflow encountered in long_scalars
      page0_min  = min(byte0_start, byte0_stop+byte0_stride) // pagesize # TODO -> fileh.pagesize

and then

    >           vma0 = self._fileh.mmap(page0_min, page0_max-page0_min+1)
    E           TypeError: integer argument expected, got float

~~~~

So just avoid multiple.reduce() and do our own mul() properly the same
way sum() is builtin into python, and we avoid overflow-related
problems.
parent d59b15a3
......@@ -35,6 +35,7 @@ of physical RAM.
"""
from __future__ import print_function
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, multiply, sign, newaxis
......@@ -82,8 +83,7 @@ class BigArray(object):
# shape, dtype -> ._stridev
# TODO take dtype.alignment into account ?
# NOTE (1,) so that multiply.reduce return 1 (not 1.0) for []
self._stridev = tuple( multiply.reduce((1,) + shape[i+1:]) * self._dtype.itemsize \
self._stridev = tuple( mul(shape[i+1:]) * self._dtype.itemsize \
for i in range(len(shape)) )
......@@ -110,7 +110,7 @@ class BigArray(object):
@property
def size(self):
return multiply.reduce(self._shape)
return mul(self._shape)
def __len__(self):
# lengths of the first axis
......
......@@ -19,6 +19,7 @@
from wendelin.bigarray import BigArray
from wendelin.bigfile import BigFile
from wendelin.lib.mem import memcpy
from wendelin.lib.calc import mul
from numpy import ndarray, dtype, int32, uint32, uint8, all, zeros, arange, \
multiply, array_equal, asarray
......@@ -288,7 +289,7 @@ def test_bigarray_indexing_Nd():
# test data - all items are unique - so we can check array by content
# NOTE +PS so that BigFile_Data has no problem loading last blk
# (else data slice will be smaller than buf)
data = arange(multiply.reduce(shape) + PS, dtype=uint32)
data = arange(mul(shape) + PS, dtype=uint32)
# synthetic bigfile that only loads data from numpy array
class BigFile_Data_RO(BigFile_Data):
......@@ -299,7 +300,7 @@ def test_bigarray_indexing_Nd():
fh = f.fileh_open()
A = BigArray(shape, uint32, fh) # bigarray with test data and shape
A_ = data[:multiply.reduce(shape)].reshape(shape) # ndarray ----//----
A_ = data[:mul(shape)].reshape(shape) # ndarray ----//----
# AA[key] -> A[key], A_[key]
AA = DoubleGet(A, A_)
......
# -*- coding: utf-8 -*-
# Wendelin.core | Calculation helpers
# Copyright (C) 2015 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Open Source Initiative approved licenses and Convey
# the resulting work. Corresponding source of such a combination shall include
# the source code for all other software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# like multiply.reduce() but does not overflow
# ( in other words `mul` is for * the same as `sum` is for + )
def mul(args, start=1):
v = start
for x in args:
v *= x
return v
# -*- coding: utf-8 -*-
# Wendelin.core.calc | Tests
# Copyright (C) 2015 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Open Source Initiative approved licenses and Convey
# the resulting work. Corresponding source of such a combination shall include
# the source code for all other software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
from wendelin.lib.calc import mul
def test_mul():
assert mul([1]) == 1
assert mul([1], 5) == 5
assert mul([0], 5) == 0
assert mul([1,2,3,4,5]) == 120
assert mul([1,2,3,4,5,6]) == 720
# check it does not overflow
assert mul([1<<30, 1<<30, 1<<30]) == 1<<90
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment