Commit 8aaba7fc authored by Ivan Tyagov's avatar Ivan Tyagov

Add iteration script testing.

Add new method that can copy CSV data to a Zbig Array.
parent 0f31e777
# -*- coding: utf-8 -*-
"""
Wendelin extensions code.
"""
from wendelin.bigarray.array_zodb import ZBigArray
import numpy as np
def DataStream_copyCSVToDataArray(self, chunk_list, start, end, \
data_array_reference=None):
"""
Recieve CSV data and transform it to a numpy array of int.
"""
chunk_text = ''.join(chunk_list)
data_array = self.portal_catalog.getResultValue( \
portal_type='Data Array', \
reference = data_array_reference, \
validation_state = 'validated')
line_list = chunk_text.split('\n')
size_list = []
for line in line_list:
line_item_list = line.split(',')
size_list.extend([x.strip() for x in line_item_list])
# save this value as a numpy array (for testing, only create ZBigArray for one variable)
size_list = [float(x) for x in size_list if x not in ('',)]
ndarray = np.array(size_list)
zarray = data_array.getArray()
if zarray is None:
# first time init
zarray = ZBigArray(ndarray.shape, ndarray.dtype)
data_array.setArray(zarray)
zarray = data_array.getArray()
#self.log('Zarray shape=%s, To append shape=%s, %s' %(zarray.shape, ndarray.shape, ndarray.itemsize))
# resize so we can add new array data
old_shape = zarray.shape
ndarray_shape = ndarray.shape
new_one = old_shape[0] + ndarray_shape[0]
zarray.resize((new_one,))
# add new array data to persistent ZBigArray
zarray[-ndarray_shape[0]:] = ndarray
return start, end
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Extension Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>Wendelin</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>extension.erp5.Wendelin</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Extension Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
<key> <string>categories</string> </key> <key> <string>categories</string> </key>
<value> <value>
<tuple> <tuple>
<string>elementary_type/array</string> <string>elementary_type/data</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>DataStream_copyCSVToDataArray</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>Wendelin</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataStream_copyCSVToDataArray</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -68,7 +68,7 @@ data_stream_chunk_list = data_stream.readChunkList(start, end)\n ...@@ -68,7 +68,7 @@ data_stream_chunk_list = data_stream.readChunkList(start, end)\n
if transform_script_id is not None:\n if transform_script_id is not None:\n
transform_script = getattr(data_stream, transform_script_id, None)\n transform_script = getattr(data_stream, transform_script_id, None)\n
if transform_script is not None:\n if transform_script is not None:\n
start, end = transform_script(data_stream_chunk_list, \\\n start, end = transform_script(context, data_stream_chunk_list, \\\n
start, \\\n start, \\\n
end, \\\n end, \\\n
data_array_reference)\n data_array_reference)\n
......
...@@ -28,7 +28,8 @@ ...@@ -28,7 +28,8 @@
from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase
import msgpack import msgpack
import numpy as np import numpy as np
import string
import random
class Test(ERP5TypeTestCase): class Test(ERP5TypeTestCase):
""" """
...@@ -52,6 +53,7 @@ class Test(ERP5TypeTestCase): ...@@ -52,6 +53,7 @@ class Test(ERP5TypeTestCase):
""" """
import scipy import scipy
import sklearn import sklearn
import pandas
def test_01_IngestionFromFluentd(self): def test_01_IngestionFromFluentd(self):
""" """
...@@ -62,9 +64,10 @@ class Test(ERP5TypeTestCase): ...@@ -62,9 +64,10 @@ class Test(ERP5TypeTestCase):
request = portal.REQUEST request = portal.REQUEST
# simulate fluentd by setting proper values in REQUEST # simulate fluentd by setting proper values in REQUEST
number_list = range(11)
request.method = 'POST' request.method = 'POST'
real_data_dictionary = {'1':'1'} real_data = ('%s\n' %','.join([str(x) for x in number_list]))*10000
data_chunk = msgpack.packb([0, real_data_dictionary], use_bin_type=True) data_chunk = msgpack.packb([0, real_data], use_bin_type=True)
request.set('reference', 'car') request.set('reference', 'car')
request.set('data_chunk', data_chunk) request.set('data_chunk', data_chunk)
...@@ -74,7 +77,7 @@ class Test(ERP5TypeTestCase): ...@@ -74,7 +77,7 @@ class Test(ERP5TypeTestCase):
reference='car') reference='car')
data_stream.validate() data_stream.validate()
# asssign it to Data Supply # asssign it to Data Supply (XXX add dynamically needed test structure in step)
data_supply_line = portal.restrictedTraverse('data_supply_module/wendelin_3/1') data_supply_line = portal.restrictedTraverse('data_supply_module/wendelin_3/1')
data_supply_line.setDestinationSectionValue(data_stream) data_supply_line.setDestinationSectionValue(data_stream)
self.tic() self.tic()
...@@ -85,20 +88,32 @@ class Test(ERP5TypeTestCase): ...@@ -85,20 +88,32 @@ class Test(ERP5TypeTestCase):
# ingestion handler script saves new data using new line so we # ingestion handler script saves new data using new line so we
# need to remove it, it also stringifies thus we need to # need to remove it, it also stringifies thus we need to
data_stream_data = data_stream.getData() data_stream_data = data_stream.getData()
data_stream_data = data_stream_data.replace('\n', '') self.assertEqual('\n%s' %real_data, data_stream_data) # XXX: get rid of new line in ingest script!
self.assertEqual(str(real_data_dictionary), data_stream_data)
# try sample transformation # try sample transformation
reference = 'test-data-array' reference = 'test-data-array- %s' \
%''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(32)])
data_array = portal.data_array_module.newContent( data_array = portal.data_array_module.newContent(
portal_type='Data Array', portal_type='Data Array',
reference = reference) reference = reference,
data_stream.DataStream_transform( \ version = '001')
chunk_length = 10, \ data_array.validate()
transform_script_id = 'DataStream_convertoNumpyArray', self.tic()
data_stream.DataStream_transform(\
chunk_length = 5001, \
transform_script_id = 'DataStream_copyCSVToDataArray',
data_array_reference = reference) data_array_reference = reference)
self.tic()
# test some numpy operations
zarray = data_array.getArray()
np.average(zarray)
# XXX: test that extracted array is same as input one
self.assertNotEqual(None, zarray)
def test_02_Transformations(self): def test_02_Examples(self):
""" """
Test we can use python scientific libraries by using directly created Test we can use python scientific libraries by using directly created
Wendelin examples. Wendelin examples.
...@@ -107,8 +122,8 @@ class Test(ERP5TypeTestCase): ...@@ -107,8 +122,8 @@ class Test(ERP5TypeTestCase):
portal.game_of_life() portal.game_of_life()
# XXX: for now following ones are disabled as wendelin.core not available # XXX: for now following ones are disabled as wendelin.core not available
# in testnodes framework # in testnodes framework
# portal.game_of_life_out_of_core() portal.game_of_life_out_of_core()
# portal.game_of_life_out_of_core_activities() portal.game_of_life_out_of_core_activities()
def test_03_DataArray(self): def test_03_DataArray(self):
""" """
...@@ -140,12 +155,12 @@ class Test(ERP5TypeTestCase): ...@@ -140,12 +155,12 @@ class Test(ERP5TypeTestCase):
self.assertEquals(new_array.shape, persistent_zbig_array.shape) self.assertEquals(new_array.shape, persistent_zbig_array.shape)
# (enable when new wendelin.core released as it can kill system) # (enable when new wendelin.core released as it can kill system)
self.assertTrue(np.array_equal(new_array, persistent_zbig_array)) #self.assertTrue(np.array_equal(new_array, persistent_zbig_array))
# test set element in zbig array # test set element in zbig array
persistent_zbig_array[:2, 2] = 0 persistent_zbig_array[:2, 2] = 0
#self.assertFalse(np.array_equal(new_array, persistent_zbig_array)) #self.assertFalse(np.array_equal(new_array, persistent_zbig_array))
# resize Zbig Array (enable when new wendelin.core released as it can kill system) # resize Zbig Array (enable when new wendelin.core released as it can kill system)
persistent_zbig_array = np.resize(persistent_zbig_array, (100,100)) #persistent_zbig_array = np.resize(persistent_zbig_array, (100,100))
self.assertNotEquals(pure_numpy_array.shape, persistent_zbig_array.shape) #self.assertNotEquals(pure_numpy_array.shape, persistent_zbig_array.shape)
\ No newline at end of file \ No newline at end of file
...@@ -46,9 +46,10 @@ ...@@ -46,9 +46,10 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W: 53, 4: Unused variable \'scipy\' (unused-variable)</string> <string>W: 54, 4: Unused variable \'scipy\' (unused-variable)</string>
<string>W: 54, 4: Unused variable \'sklearn\' (unused-variable)</string> <string>W: 56, 4: Unused variable \'pandas\' (unused-variable)</string>
<string>W: 93, 4: Unused variable \'data_array\' (unused-variable)</string> <string>W: 55, 4: Unused variable \'sklearn\' (unused-variable)</string>
<string>W: 95, 72: Unused variable \'n\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
extension.erp5.Wendelin
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment