Commit 307a96f5 authored by Kassio Borges's avatar Kassio Borges

GithubImporter: Count objects imported using github bulk import

Github Labels, Releases and Milestones uses a different approach to be
imported. These objects were not being counted, or logged, in the same
way of the rest of Github objects.

Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/335199
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65773
parent 7678d206
---
name: import_redis_increment_by
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65773
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/336226
milestone: '14.1'
type: development
group: group::import
default_enabled: false
......@@ -57,7 +57,7 @@ module Gitlab
# Sets a cache key to the given value.
#
# key - The cache key to write.
# raw_key - The cache key to write.
# value - The value to set.
# timeout - The time after which the cache key should expire.
def self.write(raw_key, value, timeout: TIMEOUT)
......@@ -73,7 +73,7 @@ module Gitlab
# Increment the integer value of a key by one.
# Sets the value to zero if missing before incrementing
#
# key - The cache key to increment.
# raw_key - The cache key to increment.
# timeout - The time after which the cache key should expire.
# @return - the incremented value
def self.increment(raw_key, timeout: TIMEOUT)
......@@ -85,6 +85,22 @@ module Gitlab
end
end
# Increment the integer value of a key by the given value.
# Sets the value to zero if missing before incrementing
#
# raw_key - The cache key to increment.
# value - The value to increment the key
# timeout - The time after which the cache key should expire.
# @return - the incremented value
def self.increment_by(raw_key, value, timeout: TIMEOUT)
key = cache_key_for(raw_key)
Redis::Cache.with do |redis|
redis.incrby(key, value)
redis.expire(key, timeout)
end
end
# Adds a value to a set.
#
# raw_key - The key of the set to add the value to.
......
......@@ -3,23 +3,60 @@
module Gitlab
module GithubImport
module BulkImporting
attr_reader :project, :client
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
def initialize(project, client)
@project = project
@client = client
end
# Builds and returns an Array of objects to bulk insert into the
# database.
#
# enum - An Enumerable that returns the objects to turn into database
# rows.
def build_database_rows(enum)
enum.each_with_object([]) do |(object, _), rows|
rows << build(object) unless already_imported?(object)
rows = enum.each_with_object([]) do |(object, _), result|
result << build(object) unless already_imported?(object)
end
log_and_increment_counter(rows.size, :fetched)
rows
end
# Bulk inserts the given rows into the database.
def bulk_insert(model, rows, batch_size: 100)
rows.each_slice(batch_size) do |slice|
Gitlab::Database.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert
log_and_increment_counter(slice.size, :imported)
end
end
def object_type
raise NotImplementedError
end
private
def log_and_increment_counter(value, operation)
Gitlab::Import::Logger.info(
import_source: :github,
project_id: project.id,
importer: self.class.name,
message: "#{value} #{object_type.to_s.pluralize} #{operation}"
)
Gitlab::GithubImport::ObjectCounter.increment(
project,
object_type,
operation,
value: value
)
end
end
end
end
......@@ -6,15 +6,9 @@ module Gitlab
class LabelsImporter
include BulkImporting
attr_reader :project, :client, :existing_labels
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
# rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client)
@project = project
@client = client
@existing_labels = project.labels.pluck(:title).to_set
def existing_labels
@existing_labels ||= project.labels.pluck(:title).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -51,6 +45,10 @@ module Gitlab
def each_label
client.labels(project.import_source)
end
def object_type
:label
end
end
end
end
......
......@@ -6,15 +6,9 @@ module Gitlab
class MilestonesImporter
include BulkImporting
attr_reader :project, :client, :existing_milestones
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client)
@project = project
@client = client
@existing_milestones = project.milestones.pluck(:iid).to_set
def existing_milestones
@existing_milestones ||= project.milestones.pluck(:iid).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -55,6 +49,10 @@ module Gitlab
def each_milestone
client.milestones(project.import_source, state: 'all')
end
def object_type
:milestone
end
end
end
end
......
......@@ -6,15 +6,9 @@ module Gitlab
class ReleasesImporter
include BulkImporting
attr_reader :project, :client, :existing_tags
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client)
@project = project
@client = client
@existing_tags = project.releases.pluck(:tag).to_set
def existing_tags
@existing_tags ||= project.releases.pluck(:tag).to_set
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -50,6 +44,10 @@ module Gitlab
def description_for(release)
release.body.presence || "Release for tag #{release.tag_name}"
end
def object_type
:release
end
end
end
end
......
......@@ -14,11 +14,16 @@ module Gitlab
CACHING = Gitlab::Cache::Import::Caching
class << self
def increment(project, object_type, operation)
# Increments the project and the global counters if the given value is >= 1
def increment(project, object_type, operation, value: 1)
integer = value.to_i
return if integer <= 0
validate_operation!(operation)
increment_project_counter(project, object_type, operation)
increment_global_counter(object_type, operation)
increment_project_counter(project, object_type, operation, integer)
increment_global_counter(object_type, operation, integer)
end
def summary(project)
......@@ -41,7 +46,7 @@ module Gitlab
# and it's used to report the health of the Github Importer
# in the Grafana Dashboard
# https://dashboards.gitlab.net/d/2zgM_rImz/github-importer?orgId=1
def increment_global_counter(object_type, operation)
def increment_global_counter(object_type, operation, value)
key = GLOBAL_COUNTER_KEY % {
operation: operation,
object_type: object_type
......@@ -51,18 +56,26 @@ module Gitlab
object_type: object_type.to_s.humanize
}
Gitlab::Metrics.counter(key.to_sym, description).increment
Gitlab::Metrics.counter(key.to_sym, description).increment(by: value)
end
# Project counters are short lived, in Redis,
# and it's used to report how successful a project
# import was with the #summary method.
def increment_project_counter(project, object_type, operation)
counter_key = PROJECT_COUNTER_KEY % { project: project.id, operation: operation, object_type: object_type }
def increment_project_counter(project, object_type, operation, value)
counter_key = PROJECT_COUNTER_KEY % {
project: project.id,
operation: operation,
object_type: object_type
}
add_counter_to_list(project, operation, counter_key)
CACHING.increment(counter_key)
if Feature.disabled?(:import_redis_increment_by, default_enabled: :yaml)
CACHING.increment(counter_key)
else
CACHING.increment_by(counter_key, value)
end
end
def add_counter_to_list(project, operation, key)
......@@ -75,7 +88,7 @@ module Gitlab
def validate_operation!(operation)
unless operation.to_s.presence_in(OPERATIONS)
raise ArgumentError, "Operation must be #{OPERATIONS.join(' or ')}"
raise ArgumentError, "operation must be #{OPERATIONS.join(' or ')}"
end
end
end
......
......@@ -3,8 +3,20 @@
require 'spec_helper'
RSpec.describe Gitlab::GithubImport::BulkImporting do
let(:importer) do
Class.new { include(Gitlab::GithubImport::BulkImporting) }.new
let(:project) { instance_double(Project, id: 1) }
let(:importer) { MyImporter.new(project, double) }
let(:importer_class) do
Class.new do
include Gitlab::GithubImport::BulkImporting
def object_type
:object_type
end
end
end
before do
stub_const 'MyImporter', importer_class
end
describe '#build_database_rows' do
......@@ -21,6 +33,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
.with(object)
.and_return(false)
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '1 object_types fetched'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.with(
project,
:object_type,
:fetched,
value: 1
)
enum = [[object, 1]].to_enum
expect(importer.build_database_rows(enum)).to eq([{ title: 'Foo' }])
......@@ -37,6 +67,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
.with(object)
.and_return(true)
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '0 object_types fetched'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.with(
project,
:object_type,
:fetched,
value: 0
)
enum = [[object, 1]].to_enum
expect(importer.build_database_rows(enum)).to be_empty
......@@ -48,6 +96,26 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
rows = [{ title: 'Foo' }] * 10
model = double(:model, table_name: 'kittens')
expect(Gitlab::Import::Logger)
.to receive(:info)
.twice
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '5 object_types imported'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.twice
.with(
project,
:object_type,
:imported,
value: 5
)
expect(Gitlab::Database)
.to receive(:bulk_insert)
.ordered
......
......@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do
it 'validates the operation being incremented' do
expect { described_class.increment(project, :issue, :unknown) }
.to raise_error(ArgumentError, 'Operation must be fetched or imported')
.to raise_error(ArgumentError, 'operation must be fetched or imported')
end
it 'increments the counter and saves the key to be listed in the summary later' do
......@@ -33,4 +33,20 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do
'imported' => { 'issue' => 2 }
})
end
it 'does not increment the counter if the given value is <= 0' do
expect(Gitlab::Metrics)
.not_to receive(:counter)
expect(Gitlab::Metrics)
.not_to receive(:counter)
described_class.increment(project, :issue, :fetched, value: 0)
described_class.increment(project, :issue, :imported, value: nil)
expect(described_class.summary(project)).to eq({
'fetched' => {},
'imported' => {}
})
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment