Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
wendelin
Commits
bd164f63
Commit
bd164f63
authored
Jun 29, 2020
by
Roque
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_wendelin_data_lake_ingestion: new test for consistency state alarm
parent
a7d7eff7
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
14 deletions
+51
-14
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.py
...Item/portal_components/test.erp5.testDataLakeIngestion.py
+49
-12
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.xml
...tem/portal_components/test.erp5.testDataLakeIngestion.xml
+2
-2
No files found.
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.py
View file @
bd164f63
...
...
@@ -19,7 +19,7 @@ class TestDataIngestion(SecurityTestCase):
SIZE_HASH
=
REFERENCE_SEPARATOR
+
"fake-size"
+
REFERENCE_SEPARATOR
+
"fake-hash"
SINGLE_INGESTION_END
=
REFERENCE_SEPARATOR
CHUNK_SIZE_CSV
=
25
REF_PREFIX
=
"fake-supplier"
+
REFERENCE_SEPARATOR
+
"fake-dataset"
+
REFERENCE_SEPARATOR
REF_PREFIX
=
"fake-supplier"
+
REFERENCE_SEPARATOR
REF_SUPPLIER_PREFIX
=
"fake-supplier"
+
REFERENCE_SEPARATOR
INVALID
=
"_invalid"
...
...
@@ -36,10 +36,12 @@ class TestDataIngestion(SecurityTestCase):
random_string
=
''
.
join
([
random
.
choice
(
string
.
ascii_letters
+
string
.
digits
)
for
_
in
xrange
(
10
)])
return
'UNIT-TEST-'
+
random_string
def
getIngestionReference
(
self
,
reference
,
extension
,
randomize_ingestion_reference
=
False
):
def
getIngestionReference
(
self
,
reference
,
extension
,
randomize_ingestion_reference
=
False
,
data_set_reference
=
False
):
if
not
data_set_reference
:
data_set_reference
=
"fake-dataset"
if
not
randomize_ingestion_reference
:
# return hard coded which results in one Data Set and multiple Data Streams (in context of test)
return
self
.
REF_PREFIX
+
reference
+
extension
return
self
.
REF_PREFIX
+
data_set_reference
+
self
.
REFERENCE_SEPARATOR
+
reference
+
extension
else
:
# create random one
random_string
=
self
.
getRandomReference
()
...
...
@@ -88,15 +90,15 @@ class TestDataIngestion(SecurityTestCase):
ingestion_policy
.
ingest
()
self
.
tic
()
def
ingest
(
self
,
data_chunk
,
reference
,
extension
,
eof
,
randomize_ingestion_reference
=
False
):
ingestion_reference
=
self
.
getIngestionReference
(
reference
,
extension
,
randomize_ingestion_reference
)
def
ingest
(
self
,
data_chunk
,
reference
,
extension
,
eof
,
randomize_ingestion_reference
=
False
,
data_set_reference
=
False
):
ingestion_reference
=
self
.
getIngestionReference
(
reference
,
extension
,
randomize_ingestion_reference
,
data_set_reference
)
# use default ebulk policy
ingestion_policy
=
self
.
portal
.
portal_ingestion_policies
.
default_ebulk
self
.
ingestRequest
(
ingestion_reference
,
eof
,
data_chunk
,
ingestion_policy
)
_
,
ingestion_reference
=
self
.
sanitizeReference
(
ingestion_reference
)
return
ingestion_reference
def
stepIngest
(
self
,
extension
,
delimiter
,
randomize_ingestion_reference
=
False
):
def
stepIngest
(
self
,
extension
,
delimiter
,
randomize_ingestion_reference
=
False
,
data_set_reference
=
False
):
file_name
=
"file_name.csv"
reference
=
self
.
getRandomReference
()
array
=
[[
random
.
random
()
for
i
in
range
(
self
.
CHUNK_SIZE_CSV
+
10
)]
for
j
in
range
(
self
.
CHUNK_SIZE_CSV
+
10
)]
...
...
@@ -111,7 +113,8 @@ class TestDataIngestion(SecurityTestCase):
chunk
.
append
(
line
)
else
:
break
ingestion_reference
=
self
.
ingest
(
data_chunk
,
reference
,
extension
,
self
.
SINGLE_INGESTION_END
,
randomize_ingestion_reference
=
randomize_ingestion_reference
)
ingestion_reference
=
self
.
ingest
(
data_chunk
,
reference
,
extension
,
self
.
SINGLE_INGESTION_END
,
randomize_ingestion_reference
=
randomize_ingestion_reference
,
data_set_reference
=
data_set_reference
)
if
os
.
path
.
exists
(
file_name
):
os
.
remove
(
file_name
)
...
...
@@ -129,17 +132,18 @@ class TestDataIngestion(SecurityTestCase):
data_stream_data
=
data_stream
.
getData
()
self
.
assertEqual
(
data_chunk
,
data_stream_data
)
# check Data Set is validated and Data Stream is published
self
.
assertEqual
(
'validated'
,
data_set
.
getValidationState
())
self
.
assertEqual
(
'validated'
,
data_stream
.
getValidationState
())
return
data_set
,
[
data_stream
]
def
test_01_DefaultEbulkIngestion
(
self
):
"""
Test default ingestion with ebulk too.
"""
self
.
stepIngest
(
self
.
CSV
,
","
)
data_set
,
data_stream_list
=
self
.
stepIngest
(
self
.
CSV
,
","
,
randomize_ingestion_reference
=
True
)
# check Data Set and Data Stream is validated
self
.
assertEqual
(
'validated'
,
data_set
.
getValidationState
())
self
.
assertSameSet
([
'validated'
for
x
in
data_stream_list
],
[
x
.
getValidationState
()
for
x
in
data_stream_list
])
def
test_02_DefaultSplitIngestion
(
self
):
"""
...
...
@@ -225,4 +229,37 @@ class TestDataIngestion(SecurityTestCase):
self
.
assertSameSet
([
'invalidated'
for
x
in
data_stream_list
],
[
x
.
getValidationState
()
for
x
in
data_stream_list
])
def
test_05_StateConsistencyAlarm
(
self
):
"""
Test alarm that checks (and fixes) Data set - Data stream state consistency
"""
data_set_reference
=
"consistency-dataset-"
+
self
.
getRandomReference
()
data_set
,
data_stream_list
=
self
.
stepIngest
(
self
.
CSV
,
","
,
randomize_ingestion_reference
=
False
,
data_set_reference
=
data_set_reference
)
self
.
tic
()
first_file_stream
=
data_stream_list
[
0
]
# publish Data Set (all Data Streams are published automatically)
data_set
.
publish
()
self
.
tic
()
# ingest new file
data_set
,
data_stream_list
=
self
.
stepIngest
(
self
.
CSV
,
","
)
self
.
tic
()
second_file_stream
=
data_stream_list
[
0
]
# second ingested file stream is in validated state (inconsistent with whole data set state)
self
.
assertEqual
(
data_set
.
getValidationState
(),
'published'
)
self
.
assertEqual
(
first_file_stream
.
getValidationState
(),
'published'
)
self
.
assertEqual
(
second_file_stream
.
getValidationState
(),
'validated'
)
# call explicitly alarm to fix the state inconsistency
self
.
portal
.
portal_alarms
.
wendelin_check_datastream_consistency
.
Alarm_checkDataStreamStateConsistency
()
self
.
tic
()
# check states where updated (all published)
self
.
assertEqual
(
data_set
.
getValidationState
(),
'published'
)
self
.
assertEqual
(
first_file_stream
.
getValidationState
(),
'published'
)
self
.
assertEqual
(
second_file_stream
.
getValidationState
(),
'published'
)
# XXX: new test which simulates download / upload of Data Set and increase DS version
\ No newline at end of file
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.xml
View file @
bd164f63
...
...
@@ -46,8 +46,8 @@
<key>
<string>
text_content_warning_message
</string>
</key>
<value>
<tuple>
<string>
W:10
2
, 34: Unused variable \'i\' (unused-variable)
</string>
<string>
W:10
2
, 76: Unused variable \'j\' (unused-variable)
</string>
<string>
W:10
4
, 34: Unused variable \'i\' (unused-variable)
</string>
<string>
W:10
4
, 76: Unused variable \'j\' (unused-variable)
</string>
</tuple>
</value>
</item>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment