Commit 43f6f999 authored by Ivan Tyagov's avatar Ivan Tyagov

Roque ebulk optimizations

See merge request !81
parents f814135b 83b0372a
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ActionInformation" module="Products.CMFCore.ActionInformation"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>action</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>categories</string> </key>
<value>
<tuple>
<string>action_type/object_view</string>
</tuple>
</value>
</item>
<item>
<key> <string>category</string> </key>
<value> <string>object_view</string> </value>
</item>
<item>
<key> <string>condition</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>icon</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>data_streams</string> </value>
</item>
<item>
<key> <string>permissions</string> </key>
<value>
<tuple>
<string>View</string>
</tuple>
</value>
</item>
<item>
<key> <string>priority</string> </key>
<value> <float>3.0</float> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Data Streams</string> </value>
</item>
<item>
<key> <string>visible</string> </key>
<value> <int>1</int> </value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="Expression" module="Products.CMFCore.Expression"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>text</string> </key>
<value> <string>string:${object_url}/DataSet_viewDataStreams</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
"""
Get count of Data Streams for context Data set.
"""
return context.ERP5Site_getDataStreamCount(data_set_reference=context.getReference())['result']
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
</item> </item>
<item> <item>
<key> <string>id</string> </key> <key> <string>id</string> </key>
<value> <string>DataSet_getDataStreamList</string> </value> <value> <string>DataSet_getDataStreamCount</string> </value>
</item> </item>
</dictionary> </dictionary>
</pickle> </pickle>
......
"""
This script will return all Data streams for Data set
"""
catalog_kw = {'portal_type': 'Data Ingestion Line',
'aggregate_uid': data_set_uid,
'limit': limit,
}
data_ingestion_line_list = context.portal_catalog(**catalog_kw)
if data_ingestion_line_list:
data_ingestion_uid_list = [x.uid for x in data_ingestion_line_list]
catalog_kw = {'portal_type': 'Data Stream',
'aggregate__related__uid': data_ingestion_uid_list,
'validation_state':'validated',
'select_list': ['reference', 'relative_url', 'versioning.size', 'versioning.version'],
}
return context.getPortalObject().portal_catalog(**catalog_kw)
return data_ingestion_line_list
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>data_set_uid, limit=[]</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataSet_getDataStreamList</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
"""
Get list of Data Streams for context Data set.
"""
data_set_uid = context.getUid()
return context.DataSet_getDataStreamList(data_set_uid, limit)
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>list_start=0, list_lines=15, limit=1000, **kw</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataSet_getDataStreamRowList</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ERP5 Form" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_objects</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>action</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>action_title</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>edit_order</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>encoding</string> </key>
<value> <string>UTF-8</string> </value>
</item>
<item>
<key> <string>enctype</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>group_list</string> </key>
<value>
<list>
<string>left</string>
<string>right</string>
<string>center</string>
<string>bottom</string>
<string>hidden</string>
</list>
</value>
</item>
<item>
<key> <string>groups</string> </key>
<value>
<dictionary>
<item>
<key> <string>bottom</string> </key>
<value>
<list>
<string>listbox</string>
</list>
</value>
</item>
<item>
<key> <string>center</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>hidden</string> </key>
<value>
<list/>
</value>
</item>
<item>
<key> <string>left</string> </key>
<value>
<list>
<string>my_title</string>
<string>my_count_label</string>
</list>
</value>
</item>
<item>
<key> <string>right</string> </key>
<value>
<list/>
</value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataSet_viewDataStreams</string> </value>
</item>
<item>
<key> <string>method</string> </key>
<value> <string>POST</string> </value>
</item>
<item>
<key> <string>name</string> </key>
<value> <string>DataSet_viewDataStreams</string> </value>
</item>
<item>
<key> <string>pt</string> </key>
<value> <string>form_view</string> </value>
</item>
<item>
<key> <string>row_length</string> </key>
<value> <int>4</int> </value>
</item>
<item>
<key> <string>stored_encoding</string> </key>
<value> <string>UTF-8</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>View Data Streams</string> </value>
</item>
<item>
<key> <string>unicode_mode</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>update_action</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>update_action_title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ProxyField" module="Products.ERP5Form.ProxyField"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>delegated_list</string> </key>
<value>
<list>
<string>columns</string>
<string>editable</string>
<string>list_method</string>
<string>title</string>
<string>url_columns</string>
</list>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>listbox</string> </value>
</item>
<item>
<key> <string>message_values</string> </key>
<value>
<dictionary>
<item>
<key> <string>external_validator_failed</string> </key>
<value> <string>The input failed the external validator.</string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>overrides</string> </key>
<value>
<dictionary>
<item>
<key> <string>field_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>tales</string> </key>
<value>
<dictionary>
<item>
<key> <string>editable</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>field_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>list_method</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>url_columns</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>values</string> </key>
<value>
<dictionary>
<item>
<key> <string>columns</string> </key>
<value>
<list>
<tuple>
<string>title</string>
<string>Title</string>
</tuple>
<tuple>
<string>reference</string>
<string>Reference</string>
</tuple>
<tuple>
<string>size</string>
<string>Size</string>
</tuple>
</list>
</value>
</item>
<item>
<key> <string>editable</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>field_id</string> </key>
<value> <string>my_view_mode_listbox</string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string>Base_viewFieldLibrary</string> </value>
</item>
<item>
<key> <string>list_method</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string>Click to edit the target</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>View Data Streams</string> </value>
</item>
<item>
<key> <string>url_columns</string> </key>
<value>
<list/>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="Method" module="Products.Formulator.MethodField"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>method_name</string> </key>
<value> <string>DataSet_getDataStreamRowList</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="LabelField" module="Products.Formulator.StandardFields"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>id</string> </key>
<value> <string>my_count_label</string> </value>
</item>
<item>
<key> <string>message_values</string> </key>
<value>
<dictionary/>
</value>
</item>
<item>
<key> <string>overrides</string> </key>
<value>
<dictionary>
<item>
<key> <string>css_class</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>default</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>editable</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>extra</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>hidden</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>tales</string> </key>
<value>
<dictionary>
<item>
<key> <string>css_class</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>default</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>editable</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>extra</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>hidden</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>values</string> </key>
<value>
<dictionary>
<item>
<key> <string>css_class</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>default</string> </key>
<value> <string>lala</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>editable</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>extra</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>hidden</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Total Data Streams Count</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="TALESMethod" module="Products.Formulator.TALESField"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_text</string> </key>
<value> <string>python: context.DataSet_getDataStreamCount()</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ProxyField" module="Products.ERP5Form.ProxyField"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>delegated_list</string> </key>
<value>
<list>
<string>editable</string>
</list>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>my_title</string> </value>
</item>
<item>
<key> <string>message_values</string> </key>
<value>
<dictionary>
<item>
<key> <string>external_validator_failed</string> </key>
<value> <string>The input failed the external validator.</string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>overrides</string> </key>
<value>
<dictionary>
<item>
<key> <string>field_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>tales</string> </key>
<value>
<dictionary>
<item>
<key> <string>field_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</value>
</item>
<item>
<key> <string>values</string> </key>
<value>
<dictionary>
<item>
<key> <string>editable</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>field_id</string> </key>
<value> <string>my_view_mode_title</string> </value>
</item>
<item>
<key> <string>form_id</string> </key>
<value> <string>Base_viewFieldLibrary</string> </value>
</item>
<item>
<key> <string>target</string> </key>
<value> <string>Click to edit the target</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -42,6 +42,7 @@ Data Release Module | view ...@@ -42,6 +42,7 @@ Data Release Module | view
Data Release | predicate Data Release | predicate
Data Release | view Data Release | view
Data Set Module | view Data Set Module | view
Data Set | data_streams
Data Set | predicate Data Set | predicate
Data Set | view Data Set | view
Data Stream Module | view Data Stream Module | view
......
"""
Script to check that a data set is properly uploaded
to Wendelin Data Lake.
How to use it: create a file_system_checksum file containing md5sum
values of all dataset files uploaded with the following format:
Format of is the same as md5sum's output:
<md5_sum> <filename.extension>
It can be generated in the original data set folder outside wendelin by doing md5sum * > output.txt
"""
import os.path
data = str(context.file_system_checksum).strip()
lines = data.split("\n")
print "Total files = ", len(lines)
print
check_result = True
for line in lines[:]:
md5_checksum = line[:32].strip()
full_filename = line[32:].strip()
filename, extension = os.path.splitext(full_filename)
extension = extension[1:]
reference = "%s/%s/%s" %(data_set_reference, filename, extension)
catalog_kw = {"portal_type": "Data Stream",
"reference": reference}
data_stream = context.portal_catalog.getResultValue(**catalog_kw)
if data_stream is None:
print "[NOT FOUND]", reference
check_result = False
else:
is_upload_ok = (data_stream.getVersion()==md5_checksum)
print md5_checksum, filename, data_stream is not None, is_upload_ok
if not is_upload_ok:
check_result = False
print
if check_result:
print "[OK] Data set correctly uploaded"
else:
print "[ERROR] Data set was not correctly uploaded"
return printed
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>data_set_reference=None</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataSet_checkMd5DataStreamList</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
"""
Return list of Data Streams belonging to a Date Set.
Data Ingestion line aggregates both Data Set and Data Stream.
Note: This code is quite computationally costly (for Data Streams having thousands of iles) as it needs to:
1. Query MariaDB to find ingestion lines
2. Read from ZODB both Data Ingestion Lines and Data Streams (which itself can be big too)
"""
data_ingestion_line_list = context.portal_catalog(
portal_type = "Data Ingestion Line",
aggregate_uid = context.getUid())
return [x.getAggregateValue(portal_type = "Data Stream") \
for x in data_ingestion_line_list]
"""
This script is called from ebulk client to get count of Data Streams for a Data set.
"""
from erp5.component.module.Log import log
portal = context.getPortalObject()
try:
data_set = portal.data_set_module.get(data_set_reference)
if data_set is None or data_set.getReference().endswith("_invalid"):
return { "status_code": 0, "result": 0 }
except Exception as e:
log("Unauthorized access to getDataStreamList: " + str(e))
return { "status_code": 1, "error_message": "401 - Unauthorized access. Please check your user credentials and try again." }
data_set_uid = data_set.getUid()
data_stream_list = context.DataSet_getDataStreamList(data_set_uid)
return { "status_code": 0, "result": len(data_stream_list) }
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>data_set_reference</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ERP5Site_getDataStreamCount</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
""" """
This script is called from ebulk client to get list of Data Streams for a Data set. This script is called from ebulk client to get list of Data Streams for a Data set.
""" """
import json import json
from erp5.component.module.Log import log from erp5.component.module.Log import log
limit=[]
if batch_size:
limit=[offset, batch_size]
portal = context.getPortalObject() portal = context.getPortalObject()
try: try:
data_set = portal.data_set_module.get(data_set_reference) data_set = portal.data_set_module.get(data_set_reference)
if data_set is None or portal.ERP5Site_checkReferenceInvalidated(data_set): if data_set is None or data_set.getReference().endswith("_invalid"):
return { "status_code": 0, "result": [] } return json.dumps({ "status_code": 0, "result": [] })
except Exception as e: # fails because unauthorized access except Exception as e: # fails because unauthorized access
log("Unauthorized access to getDataStreamList: " + str(e)) log("Unauthorized access to getDataStreamList: " + str(e))
return { "status_code": 1, "error_message": "401 - Unauthorized access. Please check your user credentials and try again." } return json.dumps({ "status_code": 1, "error_message": "401 - Unauthorized access. Please check your user credentials and try again." })
data_set_uid = data_set.getUid()
data_stream_list = context.DataSet_getDataStreamList(data_set_uid, limit)
data_stream_dict = {} data_stream_dict = {}
for stream in data_set.DataSet_getDataStreamList(): for stream_brain in data_stream_list:
if stream and not portal.ERP5Site_checkReferenceInvalidated(stream) and stream.getValidationState() != "draft": reference = stream_brain.reference
data_stream_info_dict = { 'id': 'data_stream_module/'+stream.getId(), version = stream_brain.version
'size': stream.getSize(), size = stream_brain.size
'hash': stream.getVersion() } data_stream_id = stream_brain.relative_url
if stream.getReference() in data_stream_dict: data_stream_info_dict = {'id': data_stream_id,
data_stream_dict[stream.getReference()]['data-stream-list'].append(data_stream_info_dict) 'size': size,
data_stream_dict[stream.getReference()]['large-hash'] = data_stream_dict[stream.getReference()]['large-hash'] + str(stream.getVersion()) 'hash': version}
data_stream_dict[stream.getReference()]['full-size'] = int(data_stream_dict[stream.getReference()]['full-size']) + int(stream.getSize()) if reference in data_stream_dict:
else: data_stream_dict[reference]['data-stream-list'].append(data_stream_info_dict)
data_stream_dict[stream.getReference()] = { 'data-stream-list': [data_stream_info_dict], data_stream_dict[reference]['large-hash'] = data_stream_dict[reference]['large-hash'] + str(version)
'id': 'data_stream_module/'+stream.getId(), data_stream_dict[reference]['full-size'] = int(data_stream_dict[reference]['full-size']) + int(size)
'reference': stream.getReference(), else:
'large-hash': stream.getVersion(), data_stream_dict[reference] = { 'data-stream-list': [data_stream_info_dict],
'full-size': stream.getSize() } 'id': data_stream_id,
'reference': reference,
'large-hash': version,
'full-size': size}
result_dict = { 'status_code': 0, 'result': data_stream_dict.values()} result_dict = { 'status_code': 0, 'result': data_stream_dict.values()}
return json.dumps(result_dict) return json.dumps(result_dict)
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
</item> </item>
<item> <item>
<key> <string>_params</string> </key> <key> <string>_params</string> </key>
<value> <string>data_set_reference</string> </value> <value> <string>data_set_reference, offset=0, batch_size=0, get_count=False</string> </value>
</item> </item>
<item> <item>
<key> <string>id</string> </key> <key> <string>id</string> </key>
......
...@@ -334,4 +334,31 @@ class TestDataIngestion(SecurityTestCase): ...@@ -334,4 +334,31 @@ class TestDataIngestion(SecurityTestCase):
self.portal.portal_alarms.wendelin_handle_analysis.Alarm_handleAnalysis() self.portal.portal_alarms.wendelin_handle_analysis.Alarm_handleAnalysis()
self.tic() self.tic()
def test_10_checkDataSetDataStreamRelation(self):
"""
Data Set and its Data Streams are related through the corresponding Data Ingestion Lines
"""
# ingest a couple of files
reference = self.getRandomReference()
self.ingest("some-data-1", reference, self.CSV, self.SINGLE_INGESTION_END)
time.sleep(1)
self.tic()
reference += "-2"
self.ingest("some-data-2", reference, self.CSV, self.SINGLE_INGESTION_END)
time.sleep(1)
self.tic()
# get corresponding Data Streams by searching via Data Ingestion Lines of the Data Set
data_set = self.portal.data_set_module.get(self.REF_DATASET)
data_ingestion_line_list = self.portal.portal_catalog(
portal_type = 'Data Ingestion Line',
aggregate_uid = data_set.getUid())
data_ingestion_uid_list = [x.getUid() for x in data_ingestion_line_list]
data_stream_list = self.portal.portal_catalog(
portal_type = 'Data Stream',
aggregate__related__uid = data_ingestion_uid_list,
select_list = ['reference', 'relative_url', 'versioning.size', 'versioning.version'])
data_stream_list = [x.getObject() for x in data_stream_list]
# assert that the list from the search is the same as DataSet_getDataStreamList
self.assertSameSet(data_stream_list, data_set.DataSet_getDataStreamList())
# XXX: new test which simulates download / upload of Data Set and increase DS version # XXX: new test which simulates download / upload of Data Set and increase DS version
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment