Commit f66f98f6 authored by Simon Schrottner's avatar Simon Schrottner

Add Caching to BitBucket Server Import for pull requests

The BitBucket Server Import is enhanced with Caching functionality,
this ensures, that already imported Pull Requests will not be imported
or attempted to be imported again. This is crucial as the
SidekiqMemoryKiller can interrupt the process and it will start from
scratch again.

relates to https://gitlab.com/gitlab-org/gitlab/-/issues/23586
parent 7729d4da
---
title: Add Caching to BitBucket Server Import for pull requests
merge_request: 45790
author: Simon Schrottner
type: performance
......@@ -4,11 +4,14 @@ module Gitlab
module BitbucketServerImport
class Importer
attr_reader :recover_missing_commits
attr_reader :project, :project_key, :repository_slug, :client, :errors, :users
attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key
attr_accessor :logger
REMOTE_NAME = 'bitbucket_server'
BATCH_SIZE = 100
# The base cache key to use for tracking already imported objects.
ALREADY_IMPORTED_CACHE_KEY =
'bitbucket_server-importer/already-imported/%{project}/%{collection}'
TempBranch = Struct.new(:name, :sha)
......@@ -36,6 +39,12 @@ module Gitlab
@users = {}
@temp_branches = []
@logger = Gitlab::Import::Logger.build
@already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
{ project: project.id, collection: collection_method }
end
def collection_method
:pull_requests
end
def execute
......@@ -47,6 +56,7 @@ module Gitlab
log_info(stage: "complete")
Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, 15.minutes.to_i)
true
end
......@@ -158,6 +168,7 @@ module Gitlab
# on the remote server. Then we have to issue a `git fetch` to download these
# branches.
def import_pull_requests
log_info(stage: 'import_pull_requests', message: 'starting')
pull_requests = client.pull_requests(project_key, repository_slug).to_a
# Creating branches on the server and fetching the newly-created branches
......@@ -167,7 +178,11 @@ module Gitlab
restore_branches(batch) if recover_missing_commits
batch.each do |pull_request|
if already_imported?(pull_request)
log_info(stage: 'import_pull_requests', message: 'already imported', iid: pull_request.iid)
else
import_bitbucket_pull_request(pull_request)
end
rescue StandardError => e
Gitlab::ErrorTracking.log_exception(
e,
......@@ -180,6 +195,19 @@ module Gitlab
end
end
# Returns true if the given object has already been imported, false
# otherwise.
#
# object - The object to check.
def already_imported?(pull_request)
Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, pull_request.iid)
end
# Marks the given object as "already imported".
def mark_as_imported(pull_request)
Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, pull_request.iid)
end
def delete_temp_branches
@temp_branches.each do |branch|
client.delete_branch(project_key, repository_slug, branch.name, branch.sha)
......@@ -227,6 +255,7 @@ module Gitlab
end
log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid)
mark_as_imported(pull_request)
end
def import_pull_request_comments(pull_request, merge_request)
......
......@@ -115,6 +115,12 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
allow(subject.client).to receive(:pull_requests).and_return([pull_request])
end
# As we are using Caching with redis, it is best to clean the cache after each test run, else we need to wait for
# the expiration by the importer
after do
Gitlab::Cache::Import::Caching.expire(subject.already_imported_cache_key, 0)
end
it 'imports merge event' do
expect(subject.client).to receive(:activities).and_return([merge_event])
......@@ -463,6 +469,47 @@ RSpec.describe Gitlab::BitbucketServerImport::Importer do
subject.execute
end
describe 'import pull requests with caching' do
let(:pull_request_already_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 11)
end
let(:pull_request_to_be_imported) do
instance_double(
BitbucketServer::Representation::PullRequest,
iid: 12,
source_branch_sha: sample.commits.last,
source_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.source_branch,
target_branch_sha: sample.commits.first,
target_branch_name: Gitlab::Git::BRANCH_REF_PREFIX + sample.target_branch,
title: 'This is a title',
description: 'This is a test pull request',
state: 'merged',
author: 'Test Author',
author_email: pull_request_author.email,
author_username: pull_request_author.username,
created_at: Time.now,
updated_at: Time.now,
raw: {},
merged?: true)
end
before do
Gitlab::Cache::Import::Caching.set_add(subject.already_imported_cache_key, pull_request_already_imported.iid)
allow(subject.client).to receive(:pull_requests).and_return([pull_request_to_be_imported, pull_request_already_imported])
end
it 'only imports one Merge Request, as the other on is in the cache' do
expect(subject.client).to receive(:activities).and_return([merge_event])
expect { subject.execute }.to change { MergeRequest.count }.by(1)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_already_imported.iid)).to eq(true)
expect(Gitlab::Cache::Import::Caching.set_includes?(subject.already_imported_cache_key, pull_request_to_be_imported.iid)).to eq(true)
end
end
end
describe 'inaccessible branches' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment