Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
wendelin
Commits
41b01fb5
Commit
41b01fb5
authored
May 26, 2023
by
Ivan Tyagov
Browse files
Options
Browse Files
Download
Plain Diff
Changes in 'erp5_wendelin_data_lake_ingestion' ...
See merge request
nexedi/wendelin!138
parents
a7ed9e96
b8f6c3d0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
2 additions
and
172 deletions
+2
-172
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataLake_stopIngestionList.py
...ins/erp5_wendelin_data_lake/DataLake_stopIngestionList.py
+0
-108
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataLake_stopIngestionList.xml
...ns/erp5_wendelin_data_lake/DataLake_stopIngestionList.xml
+0
-62
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDescriptorHTMLContent.xml
..._wendelin_data_lake/ERP5Site_getDescriptorHTMLContent.xml
+1
-1
bt5/erp5_wendelin_data_lake_ui/PathTemplateItem/web_page_module/fif_gadget_erp5_page_file_js.js
...plateItem/web_page_module/fif_gadget_erp5_page_file_js.js
+1
-1
No files found.
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataLake_stopIngestionList.py
deleted
100644 → 0
View file @
a7ed9e96
from
erp5.component.module.Log
import
log
from
Products.ZSQLCatalog.SQLCatalog
import
Query
import
hashlib
CHUNK_SIZE
=
200000
def
getHash
(
data_stream
):
hash_md5
=
hashlib
.
md5
()
data_stream_chunk
=
None
n_chunk
=
0
chunk_size
=
CHUNK_SIZE
while
True
:
start_offset
=
n_chunk
*
chunk_size
end_offset
=
n_chunk
*
chunk_size
+
chunk_size
try
:
data_stream_chunk
=
''
.
join
(
data_stream
.
readChunkList
(
start_offset
,
end_offset
))
except
Exception
:
# data stream is empty
data_stream_chunk
=
""
hash_md5
.
update
(
data_stream_chunk
)
if
data_stream_chunk
==
""
:
break
n_chunk
+=
1
return
hash_md5
.
hexdigest
()
def
isInterruptedAbandonedSplitIngestion
(
reference
):
from
DateTime
import
DateTime
day_hours
=
1.0
/
24
/
60
*
60
*
24
# started split data ingestions for reference
catalog_kw
=
{
'portal_type'
:
'Data Ingestion'
,
'simulation_state'
:
'started'
,
'reference'
:
reference
}
invalidate
=
True
for
data_ingestion
in
portal_catalog
(
**
catalog_kw
):
# check that all related ingestions are old (more than 24 hours)
if
(
DateTime
()
-
data_ingestion
.
getCreationDate
())
<
day_hours
:
invalidate
=
False
return
invalidate
portal
=
context
.
getPortalObject
()
portal_catalog
=
portal
.
portal_catalog
reference_end_single
=
portal
.
ERP5Site_getIngestionReferenceDictionary
()[
"single_end_suffix"
]
reference_first_split
=
portal
.
ERP5Site_getIngestionReferenceDictionary
()[
"split_first_suffix"
]
reference_end_split
=
portal
.
ERP5Site_getIngestionReferenceDictionary
()[
"split_end_suffix"
]
# stop single started ingestion (not split files)
for
data_ingestion
in
portal_catalog
(
portal_type
=
"Data Ingestion"
,
simulation_state
=
"started"
,
id
=
"%"
+
reference_end_single
):
if
not
portal
.
ERP5Site_checkReferenceInvalidated
(
data_ingestion
):
related_split_ingestions
=
portal_catalog
(
portal_type
=
"Data Ingestion"
,
reference
=
data_ingestion
.
getReference
())
if
len
(
related_split_ingestions
)
==
1
:
data_stream
=
portal_catalog
.
getResultValue
(
portal_type
=
'Data Stream'
,
reference
=
data_ingestion
.
getReference
())
if
data_stream
is
not
None
:
hash_value
=
getHash
(
data_stream
)
data_stream
.
setVersion
(
hash_value
)
if
data_stream
.
getValidationState
()
!=
"validated"
:
data_stream
.
validate
()
if
data_ingestion
.
getSimulationState
()
==
"started"
:
data_ingestion
.
stop
()
# append split ingestions
for
data_ingestion
in
portal_catalog
(
portal_type
=
"Data Ingestion"
,
simulation_state
=
"started"
,
id
=
"%"
+
reference_first_split
):
if
not
portal
.
ERP5Site_checkReferenceInvalidated
(
data_ingestion
):
if
isInterruptedAbandonedSplitIngestion
(
data_ingestion
.
getReference
()):
portal
.
ERP5Site_invalidateSplitIngestions
(
data_ingestion
.
getReference
(),
success
=
False
)
else
:
try
:
last_data_stream_id
=
""
query
=
Query
(
portal_type
=
"Data Stream"
,
reference
=
data_ingestion
.
getReference
(),
validation_state
=
"draft"
)
result_list
=
portal_catalog
(
query
=
query
,
sort_on
=
((
'creation_date'
,
'ascending'
),))
full_data_stream
=
None
for
data_stream
in
result_list
:
log
(
''
.
join
([
"Data stream for split ingestion: "
,
data_stream
.
getId
()]))
if
data_stream
.
getId
()
==
data_ingestion
.
getId
():
log
(
"It is base data stream"
)
full_data_stream
=
data_stream
else
:
log
(
"It is not base data stream, it is a part"
)
if
full_data_stream
!=
None
:
log
(
"appending content to base data stream..."
)
full_data_stream
.
appendData
(
data_stream
.
getData
())
last_data_stream_id
=
data_stream
.
getId
()
portal
.
data_stream_module
.
deleteContent
(
data_stream
.
getId
())
if
last_data_stream_id
.
endswith
(
reference_end_split
):
portal
.
ERP5Site_invalidateSplitIngestions
(
data_ingestion
.
getReference
(),
success
=
True
)
full_data_stream_hash
=
getHash
(
full_data_stream
)
full_data_stream
.
setVersion
(
full_data_stream_hash
)
if
full_data_stream
.
getValidationState
()
!=
"validated"
:
full_data_stream
.
validate
()
related_split_ingestions
=
portal_catalog
(
portal_type
=
"Data Ingestion"
,
simulation_state
=
"started"
,
reference
=
data_ingestion
.
getReference
())
for
ingestion
in
related_split_ingestions
:
if
ingestion
.
getId
()
==
full_data_stream
.
getId
():
if
ingestion
.
getSimulationState
()
==
"started"
:
ingestion
.
stop
()
else
:
portal
.
ERP5Site_invalidateReference
(
ingestion
)
ingestion
.
deliver
()
except
Exception
as
e
:
context
.
logEntry
(
"ERROR appending split data streams for ingestion: %s - reference: %s."
%
(
data_ingestion
.
getId
(),
data_ingestion
.
getReference
()))
context
.
logEntry
(
e
)
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataLake_stopIngestionList.xml
deleted
100644 → 0
View file @
a7ed9e96
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"_reconstructor"
module=
"copy_reg"
/>
</klass>
<tuple>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
<global
name=
"object"
module=
"__builtin__"
/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string></string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
DataLake_stopIngestionList
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5_getDescriptorHTMLContent.xml
→
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5
Site
_getDescriptorHTMLContent.xml
View file @
41b01fb5
...
@@ -54,7 +54,7 @@
...
@@ -54,7 +54,7 @@
</item>
</item>
<item>
<item>
<key>
<string>
id
</string>
</key>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5_getDescriptorHTMLContent
</string>
</value>
<value>
<string>
ERP5
Site
_getDescriptorHTMLContent
</string>
</value>
</item>
</item>
</dictionary>
</dictionary>
</pickle>
</pickle>
...
...
bt5/erp5_wendelin_data_lake_ui/PathTemplateItem/web_page_module/fif_gadget_erp5_page_file_js.js
View file @
41b01fb5
...
@@ -119,7 +119,7 @@
...
@@ -119,7 +119,7 @@
});
});
})
})
.
declareMethod
(
"
getDescriptorContent
"
,
function
(
descriptorReference
)
{
.
declareMethod
(
"
getDescriptorContent
"
,
function
(
descriptorReference
)
{
var
url
=
"
/ERP5_getDescriptorHTMLContent?reference=
"
+
descriptorReference
,
var
url
=
"
/ERP5
Site
_getDescriptorHTMLContent?reference=
"
+
descriptorReference
,
xmlHttp
=
new
XMLHttpRequest
();
xmlHttp
=
new
XMLHttpRequest
();
try
{
try
{
xmlHttp
.
open
(
"
GET
"
,
url
,
false
);
xmlHttp
.
open
(
"
GET
"
,
url
,
false
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment