Commit f66f98f6 authored by Simon Schrottner's avatar Simon Schrottner

Add Caching to BitBucket Server Import for pull requests

The BitBucket Server Import is enhanced with Caching functionality,
this ensures, that already imported Pull Requests will not be imported
or attempted to be imported again. This is crucial as the
SidekiqMemoryKiller can interrupt the process and it will start from
scratch again.

relates to https://gitlab.com/gitlab-org/gitlab/-/issues/23586
parent 7729d4da
---
title: Add Caching to BitBucket Server Import for pull requests
merge_request: 45790
author: Simon Schrottner
type: performance
...@@ -4,11 +4,14 @@ module Gitlab ...@@ -4,11 +4,14 @@ module Gitlab
module BitbucketServerImport module BitbucketServerImport
class Importer class Importer
attr_reader :recover_missing_commits attr_reader :recover_missing_commits
attr_reader :project, :project_key, :repository_slug, :client, :errors, :users attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key
attr_accessor :logger attr_accessor :logger
REMOTE_NAME = 'bitbucket_server' REMOTE_NAME = 'bitbucket_server'
BATCH_SIZE = 100 BATCH_SIZE = 100
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
'bitbucket_server-importer/already-imported/%{project}/%{collection}'
TempBranch = Struct.new(:name, :sha) TempBranch = Struct.new(:name, :sha)
...@@ -36,6 +39,12 @@ module Gitlab ...@@ -36,6 +39,12 @@ module Gitlab
@users = {} @users = {}
@temp_branches = [] @temp_branches = []
@logger = Gitlab::Import::Logger.build @logger = Gitlab::Import::Logger.build
@already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
{ project: project.id, collection: collection_method }
end
def collection_method
:pull_requests
end end
def execute def execute
...@@ -47,6 +56,7 @@ module Gitlab ...@@ -47,6 +56,7 @@ module Gitlab
log_info(stage: "complete") log_info(stage: "complete")
Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, 15.minutes.to_i)
true true
end end
...@@ -158,6 +168,7 @@ module Gitlab ...@@ -158,6 +168,7 @@ module Gitlab
# on the remote server. Then we have to issue a `git fetch` to download these # on the remote server. Then we have to issue a `git fetch` to download these
# branches. # branches.
def import_pull_requests def import_pull_requests
log_info(stage: 'import_pull_requests', message: 'starting')
pull_requests = client.pull_requests(project_key, repository_slug).to_a pull_requests = client.pull_requests(project_key, repository_slug).to_a
# Creating branches on the server and fetching the newly-created branches # Creating branches on the server and fetching the newly-created branches
...@@ -167,7 +178,11 @@ module Gitlab ...@@ -167,7 +178,11 @@ module Gitlab
restore_branches(batch) if recover_missing_commits restore_branches(batch) if recover_missing_commits
batch.each do |pull_request| batch.each do |pull_request|
import_bitbucket_pull_request(pull_request) if already_imported?(pull_request)
log_info(stage: 'import_pull_requests', message: 'already imported', iid: pull_request.iid)
else
import_bitbucket_pull_request(pull_request)
end
rescue StandardError => e rescue StandardError => e
Gitlab::ErrorTracking.log_exception( Gitlab::ErrorTracking.log_exception(
e, e,
...@@ -180,6 +195,19 @@ module Gitlab ...@@ -180,6 +195,19 @@ module Gitlab
end end
end end
# Returns true if the given object has already been imported, false
# otherwise.
#
# object - The object to check.
def already_imported?(pull_request)
Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, pull_request.iid)
end
# Marks the given object as "already imported".
def mark_as_imported(pull_request)
Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, pull_request.iid)
end
def delete_temp_branches def delete_temp_branches
@temp_branches.each do |branch| @temp_branches.each do |branch|
client.delete_branch(project_key, repository_slug, branch.name, branch.sha) client.delete_branch(project_key, repository_slug, branch.name, branch.sha)
...@@ -227,6 +255,7 @@ module Gitlab ...@@ -227,6 +255,7 @@ module Gitlab
end end
log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid) log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid)
mark_as_imported(pull_request)
end end
def import_pull_request_comments(pull_request, merge_request) def import_pull_request_comments(pull_request, merge_request)
......
...@@ -115,6 +115,12 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do ...@@ -115,6 +115,12 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
allow(subject.client).to receive(:pull_requests).and_return([pull_request]) allow(subject.client).to receive(:pull_requests).and_return([pull_request])
end end
# As we are using Caching with redis, it is best to clean the cache after each test run, else we need to wait for
# the expiration by the importer
after do
Gitlab::Cache::Import::Caching.expire(subject.already_imported_cache_key, 0)
end
it 'imports merge event' do it 'imports merge event' do
expect(subject.client).to receive(:activities).and_return([merge_event]) expect(subject.client).to receive(:activities).and_return([merge_event])
...@@ -463,6 +469,47 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do ...@@ -463,6 +469,47 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
subject.execute subject.execute
end end
describe 'import pull requests with caching' do
let(:pull_request_already_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 11)
end
let(:pull_request_to_be_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 12,
source_branch_sha: sample.commits.last,
source_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.source_branch,
target_branch_sha: sample.commits.first,
target_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.target_branch,
title: 'This is a title',
description: 'This is a test pull request',
state: 'merged',
author: 'Test Author',
author_email: pull_request_author.email,
author_username: pull_request_author.username,
created_at: Time.now,
updated_at: Time.now,
raw: {},
merged?: true)
end
before do
Gitlab::Cache::Import::Caching.set_add(subject.already_imported_cache_key, pull_request_already_imported.iid)
allow(subject.client).to receive(:pull_requests).and_return([pull_request_to_be_imported, pull_request_already_imported])
end
it 'only imports one Merge Request, as the other on is in the cache' do
expect(subject.client).to receive(:activities).and_return([merge_event])
expect { subject.execute }.to change { MergeRequest.count }.by(1)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_already_imported.iid)).to eq(true)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_to_be_imported.iid)).to eq(true)
end
end
end end
describe 'inaccessible branches' do describe 'inaccessible branches' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment