Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
gitlab-ce
Commits
335ee79a
Commit
335ee79a
authored
Feb 15, 2018
by
Tiago Botelho
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactors median code to work with both single and multiple projects
parent
3f31da9c
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
236 additions
and
88 deletions
+236
-88
app/models/cycle_analytics.rb
app/models/cycle_analytics.rb
+2
-2
app/serializers/analytics_stage_entity.rb
app/serializers/analytics_stage_entity.rb
+3
-1
lib/gitlab/cycle_analytics/base_query.rb
lib/gitlab/cycle_analytics/base_query.rb
+2
-2
lib/gitlab/cycle_analytics/base_stage.rb
lib/gitlab/cycle_analytics/base_stage.rb
+6
-2
lib/gitlab/cycle_analytics/usage_data.rb
lib/gitlab/cycle_analytics/usage_data.rb
+26
-22
lib/gitlab/database/median.rb
lib/gitlab/database/median.rb
+100
-40
lib/gitlab/usage_data.rb
lib/gitlab/usage_data.rb
+1
-6
spec/features/cycle_analytics_spec.rb
spec/features/cycle_analytics_spec.rb
+2
-12
spec/lib/gitlab/cycle_analytics/usage_data_spec.rb
spec/lib/gitlab/cycle_analytics/usage_data_spec.rb
+36
-0
spec/lib/gitlab/database/median_spec.rb
spec/lib/gitlab/database/median_spec.rb
+15
-0
spec/models/cycle_analytics_spec.rb
spec/models/cycle_analytics_spec.rb
+30
-0
spec/support/cycle_analytics_helpers.rb
spec/support/cycle_analytics_helpers.rb
+12
-0
spec/support/cycle_analytics_helpers/test_generation.rb
spec/support/cycle_analytics_helpers/test_generation.rb
+1
-1
No files found.
app/models/cycle_analytics.rb
View file @
335ee79a
...
...
@@ -7,8 +7,8 @@ class CycleAnalytics
end
def
all_medians_per_stage
STAGES
.
each_with_object
({})
do
|
stage_name
,
hsh
|
hsh
[
stage_name
]
=
self
[
stage_name
].
median
STAGES
.
each_with_object
({})
do
|
stage_name
,
medians_per_stage
|
medians_per_stage
[
stage_name
]
=
self
[
stage_name
].
median
end
end
...
...
app/serializers/analytics_stage_entity.rb
View file @
335ee79a
...
...
@@ -7,6 +7,8 @@ class AnalyticsStageEntity < Grape::Entity
expose
:description
expose
:median
,
as: :value
do
|
stage
|
distance_of_time_in_words
(
stage
.
median
)
if
stage
.
median
&&
!
(
stage
.
median
.
blank?
||
stage
.
median
.
zero?
)
if
stage
.
median
&&
!
(
stage
.
median
.
nil?
||
stage
.
median
.
zero?
)
distance_of_time_in_words
(
stage
.
median
)
end
end
end
lib/gitlab/cycle_analytics/base_query.rb
View file @
335ee79a
...
...
@@ -8,14 +8,14 @@ module Gitlab
private
def
base_query
@base_query
||=
stage_query
(
[
@project
.
id
]
)
# rubocop:disable Gitlab/ModuleWithInstanceVariables
@base_query
||=
stage_query
(
@project
.
id
)
# rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def
stage_query
(
project_ids
)
query
=
mr_closing_issues_table
.
join
(
issue_table
).
on
(
issue_table
[
:id
].
eq
(
mr_closing_issues_table
[
:issue_id
]))
.
join
(
issue_metrics_table
).
on
(
issue_table
[
:id
].
eq
(
issue_metrics_table
[
:issue_id
]))
.
project
(
issue_table
[
:project_id
].
as
(
"project_id"
))
.
where
(
issue_table
[
:project_id
].
in
(
project_ids
))
.
where
(
issue_table
[
:project_id
].
in
(
Array
(
project_ids
)
))
.
where
(
issue_table
[
:created_at
].
gteq
(
@options
[
:from
]))
# rubocop:disable Gitlab/ModuleWithInstanceVariables
# Load merge_requests
...
...
lib/gitlab/cycle_analytics/base_stage.rb
View file @
335ee79a
...
...
@@ -31,8 +31,12 @@ module Gitlab
interval_query
=
Arel
::
Nodes
::
As
.
new
(
cte_table
,
subtract_datetimes
(
stage_query
(
project_ids
),
start_time_attrs
,
end_time_attrs
,
name
.
to_s
))
median_datetimes
(
cte_table
,
interval_query
,
name
,
:project_id
)
&
.
each
do
|
project_id
,
median
|
loader
.
call
(
project_id
,
median
)
if
project_ids
.
size
==
1
loader
.
call
(
@project
.
id
,
median_datetime
(
cte_table
,
interval_query
,
name
))
else
median_datetimes
(
cte_table
,
interval_query
,
name
,
:project_id
)
&
.
each
do
|
project_id
,
median
|
loader
.
call
(
project_id
,
median
)
end
end
end
end
...
...
lib/gitlab/cycle_analytics/usage_data.rb
View file @
335ee79a
...
...
@@ -5,27 +5,21 @@ module Gitlab
attr_reader
:projects
,
:options
def
initialize
(
projects
,
options
)
@projects
=
projects
@options
=
options
def
initialize
@projects
=
Project
.
sorted_by_activity
.
limit
(
PROJECTS_LIMIT
)
@options
=
{
from:
7
.
days
.
ago
}
end
def
to_json
total
=
0
values
=
{}
medians_per_stage
.
each
do
|
stage_name
,
medians
|
medians
=
medians
.
map
(
&
:presence
).
compact
values
=
medians_per_stage
.
each_with_object
({})
do
|
(
stage_name
,
medians
),
hsh
|
calculations
=
stage_values
(
medians
)
stage_values
=
{
average:
calc_average
(
medians
),
sd:
standard_deviation
(
medians
),
missing:
projects
.
length
-
medians
.
length
}
total
+=
stage_values
.
values
.
compact
.
sum
values
[
stage_name
]
=
stage_values
end
total
+=
calculations
.
values
.
compact
.
sum
hsh
[
stage_name
]
=
calculations
end
values
[
:total
]
=
total
...
...
@@ -43,26 +37,36 @@ module Gitlab
end
end
def
stage_values
(
medians
)
medians
=
medians
.
map
(
&
:presence
).
compact
average
=
calc_average
(
medians
)
{
average:
average
,
sd:
standard_deviation
(
medians
,
average
),
missing:
projects
.
length
-
medians
.
length
}
end
def
calc_average
(
values
)
return
if
values
.
empty?
(
values
.
sum
/
values
.
length
).
to_i
end
def
sample_variance
(
values
)
def
standard_deviation
(
values
,
average
)
Math
.
sqrt
(
sample_variance
(
values
,
average
)).
to_i
end
def
sample_variance
(
values
,
average
)
return
0
if
values
.
length
<=
1
avg
=
calc_average
(
values
)
sum
=
values
.
inject
(
0
)
do
|
acc
,
val
|
acc
+
(
val
-
av
g
)
**
2
acc
+
(
val
-
av
erage
)
**
2
end
sum
/
(
values
.
length
-
1
)
end
def
standard_deviation
(
values
)
Math
.
sqrt
(
sample_variance
(
values
)).
to_i
end
end
end
end
lib/gitlab/database/median.rb
View file @
335ee79a
...
...
@@ -2,32 +2,34 @@
module
Gitlab
module
Database
module
Median
def
median_datetime
(
arel_table
,
query_so_far
,
column_sym
)
extract_median
(
execute_queries
(
arel_table
,
query_so_far
,
column_sym
)).
presence
end
def
median_datetimes
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)
median_queries
=
if
Gitlab
::
Database
.
postgresql?
pg_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)
elsif
Gitlab
::
Database
.
mysql?
mysql_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
)
end
results
=
Array
.
wrap
(
median_queries
).
map
do
|
query
|
ActiveRecord
::
Base
.
connection
.
execute
(
query
)
end
extract_medians
(
results
).
presence
extract_medians
(
execute_queries
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)).
presence
end
def
extract_median
s
(
results
)
def
extract_median
(
results
)
result
=
results
.
compact
.
first
if
Gitlab
::
Database
.
postgresql?
result
.
values
.
map
do
|
id
,
median
|
[
id
.
to_i
,
median
&
.
to_f
]
end
.
to_h
result
=
result
.
first
.
presence
result
[
'median'
]
&
.
to_f
if
result
elsif
Gitlab
::
Database
.
mysql?
result
.
to_a
.
flatten
.
first
end
end
def
extract_medians
(
results
)
return
{}
if
Gitlab
::
Database
.
mysql?
results
.
compact
.
first
.
values
.
map
do
|
id
,
median
|
[
id
.
to_i
,
median
&
.
to_f
]
end
.
to_h
end
def
mysql_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
)
query
=
arel_table
.
from
(
arel_table
.
project
(
Arel
.
sql
(
'*'
)).
order
(
arel_table
[
column_sym
]).
as
(
arel_table
.
table_name
))
...
...
@@ -53,7 +55,7 @@ module Gitlab
]
end
def
pg_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)
def
pg_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
=
nil
)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
...
...
@@ -64,49 +66,107 @@ module Gitlab
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
#
# If a partition column is used we will do the same operation but for separate partitions,
# when that happens the CTE might look like this:
#
# project_id | star_count | row_id | ct
# ------------+------------+--------+----
# 1 | 5 | 1 | 2
# 1 | 9 | 2 | 2
# 2 | 10 | 1 | 3
# 2 | 15 | 2 | 3
# 2 | 20 | 3 | 3
cte_table
=
Arel
::
Table
.
new
(
"ordered_records"
)
cte
=
Arel
::
Nodes
::
As
.
new
(
cte_table
,
arel_table
.
project
(
arel_table
[
partition_column
],
arel_table
[
column_sym
].
as
(
column_sym
.
to_s
),
Arel
::
Nodes
::
Over
.
new
(
Arel
::
Nodes
::
NamedFunction
.
new
(
"rank"
,
[]),
Arel
::
Nodes
::
Window
.
new
.
partition
(
arel_table
[
partition_column
])
.
order
(
arel_table
[
column_sym
])).
as
(
'row_id'
),
arel_table
.
from
(
arel_table
.
alias
)
.
project
(
"COUNT(*)"
)
.
where
(
arel_table
[
partition_column
].
eq
(
arel_table
.
alias
[
partition_column
])).
as
(
'ct'
))
.
arel_table
.
project
(
*
rank_rows
(
arel_table
,
column_sym
,
partition_column
))
.
# Disallow negative values
where
(
arel_table
[
column_sym
].
gteq
(
zero_interval
)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table
.
project
(
cte_table
[
partition_column
])
.
project
(
average
([
extract_epoch
(
cte_table
[
column_sym
])],
"median"
))
.
where
(
Arel
::
Nodes
::
Between
.
new
(
cte_table
[
:row_id
],
Arel
::
Nodes
::
And
.
new
(
[(
cte_table
[
:ct
]
/
Arel
.
sql
(
'2.0'
)),
(
cte_table
[
:ct
]
/
Arel
.
sql
(
'2.0'
)
+
1
)]
result
=
cte_table
.
project
(
*
median_projections
(
cte_table
,
column_sym
,
partition_column
))
.
where
(
Arel
::
Nodes
::
Between
.
new
(
cte_table
[
:row_id
],
Arel
::
Nodes
::
And
.
new
(
[(
cte_table
[
:ct
]
/
Arel
.
sql
(
'2.0'
)),
(
cte_table
[
:ct
]
/
Arel
.
sql
(
'2.0'
)
+
1
)]
)
)
)
)
.
with
(
query_so_far
,
cte
)
.
group
(
cte_table
[
partition_column
])
.
order
(
cte_table
[
partition_column
])
.
to_sql
.
with
(
query_so_far
,
cte
)
result
.
group
(
cte_table
[
partition_column
]).
order
(
cte_table
[
partition_column
])
if
partition_column
result
.
to_sql
end
private
def
median_queries
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
=
nil
)
if
Gitlab
::
Database
.
postgresql?
pg_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)
elsif
Gitlab
::
Database
.
mysql?
mysql_median_datetime_sql
(
arel_table
,
query_so_far
,
column_sym
)
end
end
def
execute_queries
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
=
nil
)
queries
=
median_queries
(
arel_table
,
query_so_far
,
column_sym
,
partition_column
)
Array
.
wrap
(
queries
).
map
{
|
query
|
ActiveRecord
::
Base
.
connection
.
execute
(
query
)
}
end
def
average
(
args
,
as
)
Arel
::
Nodes
::
NamedFunction
.
new
(
"AVG"
,
args
,
as
)
end
def
rank_rows
(
arel_table
,
column_sym
,
partition_column
)
column_row
=
arel_table
[
column_sym
].
as
(
column_sym
.
to_s
)
if
partition_column
partition_row
=
arel_table
[
partition_column
]
row_id
=
Arel
::
Nodes
::
Over
.
new
(
Arel
::
Nodes
::
NamedFunction
.
new
(
'rank'
,
[]),
Arel
::
Nodes
::
Window
.
new
.
partition
(
arel_table
[
partition_column
])
.
order
(
arel_table
[
column_sym
])
).
as
(
'row_id'
)
count
=
arel_table
.
from
(
arel_table
.
alias
)
.
project
(
'COUNT(*)'
)
.
where
(
arel_table
[
partition_column
].
eq
(
arel_table
.
alias
[
partition_column
]))
.
as
(
'ct'
)
[
partition_row
,
column_row
,
row_id
,
count
]
else
row_id
=
Arel
::
Nodes
::
Over
.
new
(
Arel
::
Nodes
::
NamedFunction
.
new
(
'row_number'
,
[]),
Arel
::
Nodes
::
Window
.
new
.
order
(
arel_table
[
column_sym
])
).
as
(
'row_id'
)
count
=
arel_table
.
project
(
"COUNT(1)"
).
as
(
'ct'
)
[
column_row
,
row_id
,
count
]
end
end
def
median_projections
(
table
,
column_sym
,
partition_column
)
if
partition_column
[
table
[
partition_column
],
average
([
extract_epoch
(
table
[
column_sym
])],
"median"
)]
else
[
average
([
extract_epoch
(
table
[
column_sym
])],
"median"
)]
end
end
def
extract_epoch
(
arel_attribute
)
Arel
.
sql
(
%Q{EXTRACT(EPOCH FROM "
#{
arel_attribute
.
relation
.
name
}
"."
#{
arel_attribute
.
name
}
")}
)
end
...
...
lib/gitlab/usage_data.rb
View file @
335ee79a
...
...
@@ -73,12 +73,7 @@ module Gitlab
end
def
cycle_analytics_usage_data
# We only want to generate this data for instances that use PostgreSQL
return
{}
if
Gitlab
::
Database
.
mysql?
projects
=
Project
.
sorted_by_activity
.
limit
(
Gitlab
::
CycleAnalytics
::
UsageData
::
PROJECTS_LIMIT
)
Gitlab
::
CycleAnalytics
::
UsageData
.
new
(
projects
,
{
from:
7
.
days
.
ago
}).
to_json
Gitlab
::
CycleAnalytics
::
UsageData
.
new
.
to_json
end
def
features_usage_data
...
...
spec/features/cycle_analytics_spec.rb
View file @
335ee79a
...
...
@@ -41,7 +41,7 @@ feature 'Cycle Analytics', :js do
allow_any_instance_of
(
Gitlab
::
ReferenceExtractor
).
to
receive
(
:issues
).
and_return
([
issue
])
project
.
add_master
(
user
)
create_cycle
@build
=
create_cycle
(
user
,
project
,
issue
,
mr
,
milestone
,
pipeline
)
deploy_master
sign_in
(
user
)
...
...
@@ -117,7 +117,7 @@ feature 'Cycle Analytics', :js do
project
.
add_guest
(
guest
)
allow_any_instance_of
(
Gitlab
::
ReferenceExtractor
).
to
receive
(
:issues
).
and_return
([
issue
])
create_cycle
create_cycle
(
user
,
project
,
issue
,
mr
,
milestone
,
pipeline
)
deploy_master
sign_in
(
guest
)
...
...
@@ -166,16 +166,6 @@ feature 'Cycle Analytics', :js do
expect
(
find
(
'.stage-events'
)).
to
have_content
(
"!
#{
mr
.
iid
}
"
)
end
def
create_cycle
issue
.
update
(
milestone:
milestone
)
pipeline
.
run
@build
=
create
(
:ci_build
,
pipeline:
pipeline
,
status: :success
,
author:
user
)
merge_merge_requests_closing_issue
(
issue
)
ProcessCommitWorker
.
new
.
perform
(
project
.
id
,
user
.
id
,
mr
.
commits
.
last
.
to_hash
)
end
def
click_stage
(
stage_name
)
find
(
'.stage-nav li'
,
text:
stage_name
).
click
wait_for_requests
...
...
spec/lib/gitlab/cycle_analytics/usage_data_spec.rb
0 → 100644
View file @
335ee79a
require
'spec_helper'
describe
Gitlab
::
CycleAnalytics
::
UsageData
do
let
(
:project
)
{
create
(
:project
,
:repository
)
}
let
(
:user
)
{
create
(
:user
,
:admin
)
}
let
(
:issue
)
{
create
(
:issue
,
project:
project
,
created_at:
2
.
days
.
ago
)
}
let
(
:milestone
)
{
create
(
:milestone
,
project:
project
)
}
let
(
:mr
)
{
create_merge_request_closing_issue
(
issue
,
commit_message:
"References
#{
issue
.
to_reference
}
"
)
}
let
(
:pipeline
)
{
create
(
:ci_empty_pipeline
,
status:
'created'
,
project:
project
,
ref:
mr
.
source_branch
,
sha:
mr
.
source_branch_sha
,
head_pipeline_of:
mr
)
}
subject
{
described_class
.
new
([
project
])
}
describe
'#to_json'
do
before
do
allow_any_instance_of
(
Gitlab
::
ReferenceExtractor
).
to
receive
(
:issues
).
and_return
([
issue
])
create_cycle
(
user
,
project
,
issue
,
mr
,
milestone
,
pipeline
)
deploy_master
end
it
'returns the aggregated usage data of every selected project'
do
result
=
subject
.
to_json
avg_cycle_analytics
=
result
[
:avg_cycle_analytics
]
expect
(
result
).
to
have_key
(
:avg_cycle_analytics
)
CycleAnalytics
::
STAGES
.
each
do
|
stage_name
|
stage_values
=
avg_cycle_analytics
[
stage_name
]
expect
(
avg_cycle_analytics
).
to
have_key
(
stage_name
)
expect
(
stage_values
).
to
have_key
(
:average
)
expect
(
stage_values
).
to
have_key
(
:sd
)
expect
(
stage_values
).
to
have_key
(
:missing
)
end
end
end
end
spec/lib/gitlab/database/median_spec.rb
0 → 100644
View file @
335ee79a
require
'spec_helper'
describe
Gitlab
::
Database
::
Median
do
describe
'#extract_medians'
do
context
'when using MySQL'
do
it
'returns an empty hash'
do
values
=
[[
"1"
,
"1000"
]]
allow
(
Gitlab
::
Database
).
to
receive
(
:mysql?
).
and_return
(
true
)
expect
(
described_class
.
new
.
extract_median
(
values
)).
eq
({})
end
end
end
end
spec/models/cycle_analytics_spec.rb
0 → 100644
View file @
335ee79a
require
'spec_helper'
describe
CycleAnalytics
do
let
(
:project
)
{
create
(
:project
,
:repository
)
}
let
(
:from_date
)
{
10
.
days
.
ago
}
let
(
:user
)
{
create
(
:user
,
:admin
)
}
let
(
:issue
)
{
create
(
:issue
,
project:
project
,
created_at:
2
.
days
.
ago
)
}
let
(
:milestone
)
{
create
(
:milestone
,
project:
project
)
}
let
(
:mr
)
{
create_merge_request_closing_issue
(
issue
,
commit_message:
"References
#{
issue
.
to_reference
}
"
)
}
let
(
:pipeline
)
{
create
(
:ci_empty_pipeline
,
status:
'created'
,
project:
project
,
ref:
mr
.
source_branch
,
sha:
mr
.
source_branch_sha
,
head_pipeline_of:
mr
)
}
subject
{
described_class
.
new
(
project
,
from:
from_date
)
}
describe
'#all_medians_per_stage'
do
before
do
allow_any_instance_of
(
Gitlab
::
ReferenceExtractor
).
to
receive
(
:issues
).
and_return
([
issue
])
create_cycle
(
user
,
project
,
issue
,
mr
,
milestone
,
pipeline
)
deploy_master
end
it
'returns every median for each stage for a specific project'
do
values
=
described_class
::
STAGES
.
each_with_object
({})
do
|
stage_name
,
hsh
|
hsh
[
stage_name
]
=
subject
[
stage_name
].
median
.
presence
end
expect
(
subject
.
all_medians_per_stage
).
to
eq
(
values
)
end
end
end
spec/support/cycle_analytics_helpers.rb
View file @
335ee79a
...
...
@@ -26,6 +26,18 @@ module CycleAnalyticsHelpers
ref:
'refs/heads/master'
).
execute
end
def
create_cycle
(
user
,
project
,
issue
,
mr
,
milestone
,
pipeline
)
issue
.
update
(
milestone:
milestone
)
pipeline
.
run
ci_build
=
create
(
:ci_build
,
pipeline:
pipeline
,
status: :success
,
author:
user
)
merge_merge_requests_closing_issue
(
issue
)
ProcessCommitWorker
.
new
.
perform
(
project
.
id
,
user
.
id
,
mr
.
commits
.
last
.
to_hash
)
ci_build
end
def
create_merge_request_closing_issue
(
issue
,
message:
nil
,
source_branch:
nil
,
commit_message:
'commit message'
)
if
!
source_branch
||
project
.
repository
.
commit
(
source_branch
).
blank?
source_branch
=
generate
(
:branch
)
...
...
spec/support/cycle_analytics_helpers/test_generation.rb
View file @
335ee79a
...
...
@@ -50,7 +50,7 @@ module CycleAnalyticsHelpers
end
median_time_difference
=
time_differences
.
sort
[
2
]
expect
(
subject
[
phase
].
median
.
presence
).
to
be_within
(
5
).
of
(
median_time_difference
)
expect
(
subject
[
phase
].
median
).
to
be_within
(
5
).
of
(
median_time_difference
)
end
context
"when the data belongs to another project"
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment