Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Boxiang Sun
wendelin
Commits
fee9c313
Commit
fee9c313
authored
Feb 24, 2021
by
Roque
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_wendelin_data_lake_ingestion: refactor get data stream script
queries - new script to get data set number of files
parent
5256a3bc
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
110 additions
and
20 deletions
+110
-20
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
...ns/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
+19
-0
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
...s/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
+62
-0
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
...ins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
+28
-19
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
...ns/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
+1
-1
No files found.
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
0 → 100644
View file @
fee9c313
"""
This script is called from ebulk client to get count of Data Streams for a Data set.
"""
from
erp5.component.module.Log
import
log
portal
=
context
.
getPortalObject
()
try
:
data_set
=
portal
.
data_set_module
.
get
(
data_set_reference
)
if
data_set
is
None
or
data_set
.
getReference
().
endswith
(
"_invalid"
):
return
{
"status_code"
:
0
,
"result"
:
0
}
except
Exception
as
e
:
log
(
"Unauthorized access to getDataStreamList: "
+
str
(
e
))
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet_getDataStreamList
(
data_set_uid
)
return
{
"status_code"
:
0
,
"result"
:
len
(
data_stream_list
)
}
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.xml
0 → 100644
View file @
fee9c313
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
data_set_reference
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5Site_getDataStreamCount
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
View file @
fee9c313
"""
This script is called from ebulk client to get list of Data Streams for a Data set.
"""
import
json
from
erp5.component.module.Log
import
log
limit
=
[]
if
batch_size
:
limit
=
[
offset
,
batch_size
]
portal
=
context
.
getPortalObject
()
try
:
data_set
=
portal
.
data_set_module
.
get
(
data_set_reference
)
if
data_set
is
None
or
portal
.
ERP5Site_checkReferenceInvalidated
(
data_set
):
return
{
"status_code"
:
0
,
"result"
:
[]
}
if
data_set
is
None
or
data_set
.
getReference
().
endswith
(
"_invalid"
):
return
json
.
dumps
({
"status_code"
:
0
,
"result"
:
[]
})
except
Exception
as
e
:
# fails because unauthorized access
log
(
"Unauthorized access to getDataStreamList: "
+
str
(
e
))
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
return
json
.
dumps
({
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
})
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet_getDataStreamList
(
data_set_uid
,
limit
)
data_stream_dict
=
{}
for
stream
in
data_set
.
DataSet_getDataStreamList
():
if
stream
and
not
portal
.
ERP5Site_checkReferenceInvalidated
(
stream
)
and
stream
.
getValidationState
()
!=
"draft"
:
data_stream_info_dict
=
{
'id'
:
'data_stream_module/'
+
stream
.
getId
(),
'size'
:
stream
.
getSize
(),
'hash'
:
stream
.
getVersion
()
}
if
stream
.
getReference
()
in
data_stream_dict
:
data_stream_dict
[
stream
.
getReference
()][
'data-stream-list'
].
append
(
data_stream_info_dict
)
data_stream_dict
[
stream
.
getReference
()][
'large-hash'
]
=
data_stream_dict
[
stream
.
getReference
()][
'large-hash'
]
+
str
(
stream
.
getVersion
())
data_stream_dict
[
stream
.
getReference
()][
'full-size'
]
=
int
(
data_stream_dict
[
stream
.
getReference
()][
'full-size'
])
+
int
(
stream
.
getSize
())
for
stream_brain
in
data_stream_list
:
reference
=
stream_brain
.
reference
version
=
stream_brain
.
version
size
=
stream_brain
.
size
data_stream_id
=
stream_brain
.
relative_url
data_stream_info_dict
=
{
'id'
:
data_stream_id
,
'size'
:
size
,
'hash'
:
version
}
if
reference
in
data_stream_dict
:
data_stream_dict
[
reference
][
'data-stream-list'
].
append
(
data_stream_info_dict
)
data_stream_dict
[
reference
][
'large-hash'
]
=
data_stream_dict
[
reference
][
'large-hash'
]
+
str
(
version
)
data_stream_dict
[
reference
][
'full-size'
]
=
int
(
data_stream_dict
[
reference
][
'full-size'
])
+
int
(
size
)
else
:
data_stream_dict
[
stream
.
getReference
()
]
=
{
'data-stream-list'
:
[
data_stream_info_dict
],
'id'
:
'data_stream_module/'
+
stream
.
getId
()
,
'reference'
:
stream
.
getReference
()
,
'large-hash'
:
stream
.
getVersion
()
,
'full-size'
:
stream
.
getSize
()
}
data_stream_dict
[
reference
]
=
{
'data-stream-list'
:
[
data_stream_info_dict
],
'id'
:
data_stream_id
,
'reference'
:
reference
,
'large-hash'
:
version
,
'full-size'
:
size
}
result_dict
=
{
'status_code'
:
0
,
'result'
:
data_stream_dict
.
values
()}
return
json
.
dumps
(
result_dict
)
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.xml
View file @
fee9c313
...
...
@@ -50,7 +50,7 @@
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
data_set_reference
</string>
</value>
<value>
<string>
data_set_reference
, offset=0, batch_size=0, get_count=False
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment