Commit d63792e7 authored by Douglas's avatar Douglas

added comment in the numpy.memmap tool section

It explains that the part of the code that actually writes the data file to be used with NumPy's memmap was ran only once to ensure there was absolutely no cache in the Python side when running cold cache.
parent 6dea2f02
......@@ -218,12 +218,15 @@ def process_data(root, big_array, big_index, columns, tool):
message_list.append('numpy in memory result: %s' % result)
if tool == 'numpy.memmap':
import os.path as path
filename = path.join('/tmp', 'numpy.dat')
write_fp = np.memmap(filename, dtype=schema, mode='w+', shape=(1430394,))
with timer('time to write numpy memmap', message_list):
write_fp[:] = row
write_fp.flush()
# The code commented below was just ran one time to write the array
# for NumPy's memmap tool to avoid any type of cache from Python side.
#
# import os.path as path
# filename = path.join('/tmp', 'numpy.dat')
# write_fp = np.memmap(filename, dtype=schema, mode='w+', shape=(1430394,))
# with timer('time to write numpy memmap', message_list):
# write_fp[:] = row
# write_fp.flush()
with timer(tool, message_list):
read_fp = np.memmap(filename, dtype=schema, mode='r', shape=(1430394,))
array = np.ndarray((1430394,), schema, buffer=read_fp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment