Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
wendelin
Commits
fee9c313
Commit
fee9c313
authored
Feb 24, 2021
by
Roque
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_wendelin_data_lake_ingestion: refactor get data stream script
queries - new script to get data set number of files
parent
5256a3bc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
110 additions
and
20 deletions
+110
-20
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
...ns/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
+19
-0
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
...s/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
+62
-0
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
...ins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
+28
-19
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
...ns/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
+1
-1
No files found.
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
0 → 100644
View file @
fee9c313
"""
This script is called from ebulk client to get count of Data Streams for a Data set.
"""
from
erp5.component.module.Log
import
log
portal
=
context
.
getPortalObject
()
try
:
data_set
=
portal
.
data_set_module
.
get
(
data_set_reference
)
if
data_set
is
None
or
data_set
.
getReference
().
endswith
(
"_invalid"
):
return
{
"status_code"
:
0
,
"result"
:
0
}
except
Exception
as
e
:
log
(
"Unauthorized access to getDataStreamList: "
+
str
(
e
))
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet_getDataStreamList
(
data_set_uid
)
return
{
"status_code"
:
0
,
"result"
:
len
(
data_stream_list
)
}
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
0 → 100644
View file @
fee9c313
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
data_set_reference
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5Site_getDataStreamCount
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
View file @
fee9c313
"""
This script is called from ebulk client to get list of Data Streams for a Data set.
"""
import
json
from
erp5.component.module.Log
import
log
limit
=
[]
if
batch_size
:
limit
=
[
offset
,
batch_size
]
portal
=
context
.
getPortalObject
()
try
:
data_set
=
portal
.
data_set_module
.
get
(
data_set_reference
)
if
data_set
is
None
or
portal
.
ERP5Site_checkReferenceInvalidated
(
data_set
):
return
{
"status_code"
:
0
,
"result"
:
[]
}
if
data_set
is
None
or
data_set
.
getReference
().
endswith
(
"_invalid"
):
return
json
.
dumps
({
"status_code"
:
0
,
"result"
:
[]
})
except
Exception
as
e
:
# fails because unauthorized access
log
(
"Unauthorized access to getDataStreamList: "
+
str
(
e
))
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
return
json
.
dumps
({
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
})
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet_getDataStreamList
(
data_set_uid
,
limit
)
data_stream_dict
=
{}
for
stream
in
data_set
.
DataSet_getDataStreamList
():
if
stream
and
not
portal
.
ERP5Site_checkReferenceInvalidated
(
stream
)
and
stream
.
getValidationState
()
!=
"draft"
:
data_stream_info_dict
=
{
'id'
:
'data_stream_module/'
+
stream
.
getId
(),
'size'
:
stream
.
getSize
(),
'hash'
:
stream
.
getVersion
()
}
if
stream
.
getReference
()
in
data_stream_dict
:
data_stream_dict
[
stream
.
getReference
()][
'data-stream-list'
].
append
(
data_stream_info_dict
)
data_stream_dict
[
stream
.
getReference
()][
'large-hash'
]
=
data_stream_dict
[
stream
.
getReference
()][
'large-hash'
]
+
str
(
stream
.
getVersion
())
data_stream_dict
[
stream
.
getReference
()][
'full-size'
]
=
int
(
data_stream_dict
[
stream
.
getReference
()][
'full-size'
])
+
int
(
stream
.
getSize
())
else
:
data_stream_dict
[
stream
.
getReference
()]
=
{
'data-stream-list'
:
[
data_stream_info_dict
],
'id'
:
'data_stream_module/'
+
stream
.
getId
(),
'reference'
:
stream
.
getReference
(),
'large-hash'
:
stream
.
getVersion
(),
'full-size'
:
stream
.
getSize
()
}
for
stream_brain
in
data_stream_list
:
reference
=
stream_brain
.
reference
version
=
stream_brain
.
version
size
=
stream_brain
.
size
data_stream_id
=
stream_brain
.
relative_url
data_stream_info_dict
=
{
'id'
:
data_stream_id
,
'size'
:
size
,
'hash'
:
version
}
if
reference
in
data_stream_dict
:
data_stream_dict
[
reference
][
'data-stream-list'
].
append
(
data_stream_info_dict
)
data_stream_dict
[
reference
][
'large-hash'
]
=
data_stream_dict
[
reference
][
'large-hash'
]
+
str
(
version
)
data_stream_dict
[
reference
][
'full-size'
]
=
int
(
data_stream_dict
[
reference
][
'full-size'
])
+
int
(
size
)
else
:
data_stream_dict
[
reference
]
=
{
'data-stream-list'
:
[
data_stream_info_dict
],
'id'
:
data_stream_id
,
'reference'
:
reference
,
'large-hash'
:
version
,
'full-size'
:
size
}
result_dict
=
{
'status_code'
:
0
,
'result'
:
data_stream_dict
.
values
()}
return
json
.
dumps
(
result_dict
)
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
View file @
fee9c313
...
...
@@ -50,7 +50,7 @@
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
data_set_reference
</string>
</value>
<value>
<string>
data_set_reference
, offset=0, batch_size=0, get_count=False
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment