Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
ed7546b5
Commit
ed7546b5
authored
Aug 07, 2020
by
Mark Lapierre
Committed by
Ramya Authappan
Aug 07, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add e2e test of distributed reads from unhealthy node
parent
932f5927
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
94 additions
and
30 deletions
+94
-30
qa/qa/service/praefect_manager.rb
qa/qa/service/praefect_manager.rb
+60
-23
qa/qa/specs/features/api/3_create/repository/distributed_reads_spec.rb
...eatures/api/3_create/repository/distributed_reads_spec.rb
+34
-7
No files found.
qa/qa/service/praefect_manager.rb
View file @
ed7546b5
...
...
@@ -27,6 +27,7 @@ module QA
end
def
replicated?
(
project_id
)
Support
::
Retrier
.
retry_until
(
raise_on_failure:
false
)
do
replicas
=
wait_until_shell_command
(
%(docker exec gitlab-gitaly-ha bash -c 'gitlab-rake "gitlab:praefect:replicas[#{project_id}]"')
)
do
|
line
|
QA
::
Runtime
::
Logger
.
debug
(
line
.
chomp
)
# The output of the rake task looks something like this:
...
...
@@ -35,11 +36,13 @@ module QA
# ----------------------------------------------------------------------------------------------------------------------------------------------------------------
# gitaly_cluster-3aff1f2bd14e6c98 | 23c4422629234d62b62adacafd0a33a8364e8619 | 23c4422629234d62b62adacafd0a33a8364e8619 | 23c4422629234d62b62adacafd0a33a8364e8619
#
break
line
if
line
.
start_with?
(
"gitaly_cluster"
)
break
line
if
line
.
start_with?
(
'gitaly_cluster'
)
break
nil
if
line
.
include?
(
'Something went wrong when getting replicas'
)
end
# We want to know if the checksums are identical
replicas
.
split
(
'|'
).
map
(
&
:strip
)[
1
..
3
].
uniq
.
one?
replicas
&
.
split
(
'|'
)
&
.
map
(
&
:strip
)
&
.
slice
(
1
..
3
)
&
.
uniq
&
.
one?
end
end
def
start_primary_node
...
...
@@ -54,6 +57,14 @@ module QA
stop_node
(
@praefect
)
end
def
stop_secondary_node
stop_node
(
@secondary_node
)
end
def
start_secondary_node
start_node
(
@secondary_node
)
end
def
start_node
(
name
)
shell
"docker start
#{
name
}
"
end
...
...
@@ -120,6 +131,18 @@ module QA
result
[
'data'
][
'result'
].
map
{
|
result
|
{
node:
result
[
'metric'
][
'storage'
],
value:
result
[
'value'
][
1
].
to_i
}
}
end
def
replication_queue_incomplete_count
result
=
[]
shell
sql_to_docker_exec_cmd
(
"select count(*) from replication_queue where state = 'ready' or state = 'in_progress';"
)
do
|
line
|
result
<<
line
end
# The result looks like:
# count
# -----
# 1
result
[
2
].
to_i
end
def
replication_queue_lock_count
result
=
[]
shell
sql_to_docker_exec_cmd
(
"select count(*) from replication_queue_lock where acquired = 't';"
)
do
|
line
|
...
...
@@ -276,6 +299,22 @@ module QA
end
end
def
wait_for_secondary_node_health_check_failure
wait_for_health_check_failure
(
@secondary_node
)
end
def
wait_for_health_check_failure
(
node
)
QA
::
Runtime
::
Logger
.
info
(
"Waiting for Praefect to record a health check failure on
#{
node
}
"
)
wait_until_shell_command
(
"docker exec
#{
@praefect
}
bash -c 'tail -n 1 /var/log/gitlab/praefect/current'"
)
do
|
line
|
QA
::
Runtime
::
Logger
.
debug
(
line
.
chomp
)
log
=
JSON
.
parse
(
line
)
log
[
'msg'
]
==
'error when pinging healthcheck'
&&
log
[
'storage'
]
==
node
rescue
JSON
::
ParserError
# Ignore lines that can't be parsed as JSON
end
end
def
wait_for_gitaly_check
Support
::
Waiter
.
repeat_until
(
max_attempts:
3
)
do
storage_ok
=
false
...
...
@@ -292,35 +331,33 @@ module QA
end
end
def
wait_for_gitlab_shell_check
wait_until_shell_command_matches
(
"docker exec
#{
@gitlab
}
bash -c 'gitlab-rake gitlab:gitlab_shell:check'"
,
/Checking GitLab Shell ... Finished/
)
end
# Waits until there is an increase in the number of reads for
# any node compared to the number of reads provided
# any node compared to the number of reads provided. If a node
# has no pre-read data, consider it to have had zero reads.
def
wait_for_read_count_change
(
pre_read_data
)
diff_found
=
false
Support
::
Waiter
.
wait_until
(
sleep_interval:
5
)
do
query_read_distribution
.
each_with_index
do
|
data
,
index
|
diff_found
=
true
if
data
[
:value
]
>
pre_read_data
[
index
][
:value
]
diff_found
=
true
if
data
[
:value
]
>
value_for_node
(
pre_read_data
,
data
[
:node
])
end
diff_found
end
end
def
value_for_node
(
data
,
node
)
data
.
find
(
->
{
0
})
{
|
item
|
item
[
:node
]
==
node
}[
:value
]
end
def
wait_for_reliable_connection
QA
::
Runtime
::
Logger
.
info
(
'Wait until GitLab and Praefect can communicate reliably'
)
wait_for_praefect
wait_for_sql_ping
wait_for_storage_nodes
wait_for_git
lab_shell
_check
wait_for_git
aly
_check
end
def
wait_for_replication
(
project_id
)
Support
::
Waiter
.
wait_until
(
sleep_interval:
1
)
{
replicated?
(
project_id
)
}
Support
::
Waiter
.
wait_until
(
sleep_interval:
1
)
{
replicat
ion_queue_incomplete_count
==
0
&&
replicat
ed?
(
project_id
)
}
end
private
...
...
qa/qa/specs/features/api/3_create/repository/distributed_reads_spec.rb
View file @
ed7546b5
...
...
@@ -29,19 +29,46 @@ module QA
pre_read_data
=
praefect_manager
.
query_read_distribution
QA
::
Runtime
::
Logger
.
info
(
'Fetching commits from the repository'
)
Parallel
.
each
((
1
..
number_of_reads
))
do
|
index
|
Resource
::
Repository
::
Commit
.
fabricate_via_api!
do
|
commits
|
commits
.
project
=
project
end
end
Parallel
.
each
((
1
..
number_of_reads
))
{
project
.
commits
}
praefect_manager
.
wait_for_read_count_change
(
pre_read_data
)
aggregate_failures
"each gitaly node"
do
praefect_manager
.
query_read_distribution
.
each_with_index
do
|
data
,
index
|
expect
(
data
[
:value
])
.
to
be
>
pre_read_data
[
index
][
:value
],
"Read counts did not differ for node
#{
pre_read_data
[
index
][
:node
]
}
"
.
to
be
>
praefect_manager
.
value_for_node
(
pre_read_data
,
data
[
:node
]),
"Read counts did not differ for node
#{
data
[
:node
]
}
"
end
end
end
context
'when a node is unhealthy'
do
before
do
praefect_manager
.
stop_secondary_node
praefect_manager
.
wait_for_secondary_node_health_check_failure
end
after
do
# Leave the cluster in a suitable state for subsequent tests
praefect_manager
.
start_secondary_node
praefect_manager
.
wait_for_health_check_all_nodes
praefect_manager
.
wait_for_reliable_connection
end
it
'does not read from the unhealthy node'
do
pre_read_data
=
praefect_manager
.
query_read_distribution
QA
::
Runtime
::
Logger
.
info
(
'Fetching commits from the repository'
)
Parallel
.
each
((
1
..
number_of_reads
))
{
project
.
commits
}
praefect_manager
.
wait_for_read_count_change
(
pre_read_data
)
post_read_data
=
praefect_manager
.
query_read_distribution
aggregate_failures
"each gitaly node"
do
expect
(
praefect_manager
.
value_for_node
(
post_read_data
,
'gitaly1'
)).
to
be
>
praefect_manager
.
value_for_node
(
pre_read_data
,
'gitaly1'
)
expect
(
praefect_manager
.
value_for_node
(
post_read_data
,
'gitaly2'
)).
to
eq
praefect_manager
.
value_for_node
(
pre_read_data
,
'gitaly2'
)
expect
(
praefect_manager
.
value_for_node
(
post_read_data
,
'gitaly3'
)).
to
be
>
praefect_manager
.
value_for_node
(
pre_read_data
,
'gitaly3'
)
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment