Commit 85a02394 authored by Ivan Tyagov's avatar Ivan Tyagov

Index directly the relation between a Data Stream and its logical set (in this...

Index directly the relation between a Data Stream and its logical set (in this case of default Wendelin model - Data Set).
parent 2e328301
REPLACE INTO
data_stream (uid, size, version)
data_stream (uid, set_uid, size, version)
VALUES
<dtml-in prefix="loop" expr="_.range(_.len(uid))">
(
<dtml-sqlvar expr="uid[loop_item]" type="int">,
<dtml-sqlvar expr="DataStream_getSetUid[loop_item]" type="int" optional>,
<dtml-sqlvar expr="getSize[loop_item]" type="string" optional>,
<dtml-sqlvar expr="getVersion[loop_item]" type="string" optional>
)
......
......@@ -14,7 +14,8 @@
<key> <string>arguments_src</string> </key>
<value> <string>uid\n
getSize\n
getVersion</string> </value>
getVersion\n
DataStream_getSetUid</string> </value>
</item>
<item>
<key> <string>cache_time_</string> </key>
......
......@@ -4,7 +4,8 @@
#
CREATE TABLE `data_stream` (
`uid` BIGINT UNSIGNED NOT NULL,
`size` BIGINT UNSIGNED NOT NULL,
`set_uid` BIGINT UNSIGNED,
`size` BIGINT SIGNED,
`version` varchar(30) default '',
PRIMARY KEY (`uid`)
) ENGINE=InnoDB;
\ No newline at end of file
"""
Each Data Stream can be grouped in a logical set with a dedicated portal type which represents the set.
For ebulk this portal type is "Data Set" but in theory it can be anything. Thus name of script is intentionally using 'Set'.
This script works for the default ingestion model used by ebulk:
- Data Ingestion
- Data Ingestion Line
(aggregate) -> Data Stream
(aggregate) -> Date Set
Note: for this to work we must working an index relation between Data Ingestion Line and Data Stream!
"""
data_stream = context.getObject()
data_ingestion_line = data_stream.portal_catalog.getResultValue(
portal_type = "Data Ingestion Line",
aggregate_uid = context.getObject().getUid())
#context.log("DS=%s , DI=%s" %(data_stream, data_ingestion_line))
if data_ingestion_line is not None:
#context.log(data_ingestion_line.getRelativeUrl())
data_set = data_ingestion_line.getAggregateValue(portal_type = "Data Set")
if data_set is not None:
#context.log("set_uid=%s" %data_set.getUid())
return data_set.getUid()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataStream_getSetUid</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment