Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
1a4916aa
Commit
1a4916aa
authored
Jan 08, 2021
by
Mikolaj Wawrzyniak
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix flaky utils usage data spec
parent
03576ca8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
45 deletions
+38
-45
spec/lib/gitlab/utils/usage_data_spec.rb
spec/lib/gitlab/utils/usage_data_spec.rb
+38
-45
No files found.
spec/lib/gitlab/utils/usage_data_spec.rb
View file @
1a4916aa
...
@@ -58,76 +58,69 @@ RSpec.describe Gitlab::Utils::UsageData do
...
@@ -58,76 +58,69 @@ RSpec.describe Gitlab::Utils::UsageData do
expect
(
described_class
.
estimate_batch_distinct_count
(
relation
,
'column'
)).
to
eq
(
5
)
expect
(
described_class
.
estimate_batch_distinct_count
(
relation
,
'column'
)).
to
eq
(
5
)
end
end
context
'quasi integration test for different counting parameters'
,
quarantine:
{
issue:
'https://gitlab.com/gitlab-org/gitlab/-/issues/296169'
}
do
context
'quasi integration test for different counting parameters'
do
let_it_be
(
:user
)
{
create
(
:user
,
email:
'email1@domain.com'
)
}
# HyperLogLog http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf algorithm
let_it_be
(
:another_user
)
{
create
(
:user
,
email:
'email2@domain.com'
)
}
# used in estimate_batch_distinct_count produce probabilistic
# estimations of unique values present in dataset, because of that its results
let
(
:model
)
{
Issue
}
# are always off by some small factor from real value. However for given
let
(
:column
)
{
:author_id
}
# dataset it provide consistent and deterministic result. In the following context
# analyzed sets consist of values:
context
'different distribution of relation records'
do
# build_needs set: ['1', '2', '3', '4', '5']
[
10
,
100
,
100_000
].
each
do
|
spread
|
# ci_build set ['a', 'b']
context
"records are spread within
#{
spread
}
"
do
# with them, current implementation is expected to consistently report
before
do
# 5.217656147118495 and 2.0809220082170614 values
ids
=
(
1
..
spread
).
to_a
.
sample
(
10
)
# This test suite is expected to assure, that HyperLogLog implementation
create_list
(
:issue
,
10
).
each_with_index
do
|
issue
,
i
|
# behaves consistently between changes made to other parts of codebase.
issue
.
id
=
ids
[
i
]
# In case of fine tuning or changes to HyperLogLog algorithm implementation
end
# one should run in depth analysis of accuracy with supplementary rake tasks
end
# currently under implementation at https://gitlab.com/gitlab-org/gitlab/-/merge_requests/51118
# and adjust used values in this context accordingly.
it
'counts table'
do
let_it_be
(
:build
)
{
create
(
:ci_build
,
name:
'a'
)
}
expect
(
described_class
.
estimate_batch_distinct_count
(
model
)).
to
be_within
(
error_rate
).
percent_of
(
10
)
let_it_be
(
:another_build
)
{
create
(
:ci_build
,
name:
'b'
)
}
end
end
let
(
:model
)
{
Ci
::
BuildNeed
}
end
let
(
:column
)
{
:name
}
end
let
(
:build_needs_estimated_cardinality
)
{
5.217656147118495
}
let
(
:ci_builds_estimated_cardinality
)
{
2.0809220082170614
}
context
'different counting parameters'
do
context
'different counting parameters'
do
before_all
do
before_all
do
create_list
(
:issue
,
3
,
author:
user
)
1
.
upto
(
3
)
{
|
i
|
create
(
:ci_build_need
,
name:
i
,
build:
build
)
}
create_list
(
:issue
,
2
,
author:
another_user
)
4
.
upto
(
5
)
{
|
i
|
create
(
:ci_build_need
,
name:
i
,
build:
another_build
)
}
end
it
'counts table'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
)).
to
be_within
(
error_rate
).
percent_of
(
5
)
end
it
'counts with column field'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
)).
to
be_within
(
error_rate
).
percent_of
(
2
)
end
end
it
'counts with
:id field
'
do
it
'counts with
symbol passed in column argument
'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
:id
)).
to
be_within
(
error_rate
).
percent_of
(
5
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
)).
to
eq
(
build_needs_estimated_cardinality
)
end
end
it
'counts with
"id" field
'
do
it
'counts with
string passed in column argument
'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
"id"
)).
to
be_within
(
error_rate
).
percent_of
(
5
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
.
to_s
)).
to
eq
(
build_needs_estimated_cardinality
)
end
end
it
'counts with table.column
field
'
do
it
'counts with table.column
passed in column argument
'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
"
#{
model
.
table_name
}
.
#{
column
}
"
)).
to
be_within
(
error_rate
).
percent_of
(
2
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
"
#{
model
.
table_name
}
.
#{
column
}
"
)).
to
eq
(
build_needs_estimated_cardinality
)
end
end
it
'counts with Arel
column
'
do
it
'counts with Arel
passed in column argument
'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
model
.
arel_table
[
column
])).
to
be_within
(
error_rate
).
percent_of
(
2
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
model
.
arel_table
[
column
])).
to
eq
(
build_needs_estimated_cardinality
)
end
end
it
'counts over joined relations'
do
it
'counts over joined relations'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
.
joins
(
:
author
),
"users.email"
)).
to
be_within
(
error_rate
).
percent_of
(
2
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
.
joins
(
:
build
),
"ci_builds.name"
)).
to
eq
(
ci_builds_estimated_cardinality
)
end
end
it
'counts with :column field with batch_size of 50K'
do
it
'counts with :column field with batch_size of 50K'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
,
batch_size:
50_000
)).
to
be_within
(
error_rate
).
percent_of
(
2
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
,
batch_size:
50_000
)).
to
eq
(
build_needs_estimated_cardinality
)
end
end
it
'counts with different number of batches and aggregates total result'
do
it
'counts with different number of batches and aggregates total result'
do
stub_const
(
'Gitlab::Database::PostgresHll::BatchDistinctCounter::MIN_REQUIRED_BATCH_SIZE'
,
0
)
stub_const
(
'Gitlab::Database::PostgresHll::BatchDistinctCounter::MIN_REQUIRED_BATCH_SIZE'
,
0
)
[
1
,
2
,
4
,
5
,
6
].
each
{
|
i
|
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
batch_size:
i
)).
to
be_within
(
error_rate
).
percent_of
(
5
)
}
[
1
,
2
,
4
,
5
,
6
].
each
{
|
i
|
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
,
batch_size:
i
)).
to
eq
(
build_needs_estimated_cardinality
)
}
end
end
it
'counts with a start and finish'
do
it
'counts with a start and finish'
do
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
,
start:
model
.
minimum
(
:id
),
finish:
model
.
maximum
(
:id
))).
to
be_within
(
error_rate
).
percent_of
(
2
)
expect
(
described_class
.
estimate_batch_distinct_count
(
model
,
column
,
start:
model
.
minimum
(
:id
),
finish:
model
.
maximum
(
:id
))).
to
eq
(
build_needs_estimated_cardinality
)
end
end
end
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment