Commit 825a9fd7 authored by Nick Thomas's avatar Nick Thomas Committed by Igor Drozdov

Speed up pull mirroring by asking Gitaly if tags have changed

Currently, pull mirroring will fetch the full list of tags for a
repository twice - before and after the pull - and use the diff between
them to work out which new tags have been added.

This seems unavoidable with the current output of `git fetch`, but we
*can* skip doing the second call (and cache invalidation) if there have
been no tag changes. This is the common, happy path, so should be worth
implementing.
parent 534b0be4
...@@ -465,7 +465,7 @@ group :ed25519 do ...@@ -465,7 +465,7 @@ group :ed25519 do
end end
# Gitaly GRPC protocol definitions # Gitaly GRPC protocol definitions
gem 'gitaly', '~> 13.7.0.pre.rc1' gem 'gitaly', '~> 13.8.0.pre.rc2'
gem 'grpc', '~> 1.30.2' gem 'grpc', '~> 1.30.2'
......
...@@ -417,7 +417,7 @@ GEM ...@@ -417,7 +417,7 @@ GEM
rails (>= 3.2.0) rails (>= 3.2.0)
git (1.7.0) git (1.7.0)
rchardet (~> 1.8) rchardet (~> 1.8)
gitaly (13.7.0.pre.rc1) gitaly (13.8.0.pre.rc2)
grpc (~> 1.0) grpc (~> 1.0)
github-markup (1.7.0) github-markup (1.7.0)
gitlab-chronic (0.10.5) gitlab-chronic (0.10.5)
...@@ -1358,7 +1358,7 @@ DEPENDENCIES ...@@ -1358,7 +1358,7 @@ DEPENDENCIES
gettext (~> 3.3) gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3) gettext_i18n_rails_js (~> 1.3)
gitaly (~> 13.7.0.pre.rc1) gitaly (~> 13.8.0.pre.rc2)
github-markup (~> 1.7.0) github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5) gitlab-chronic (~> 0.10.5)
gitlab-experiment (~> 0.4.5) gitlab-experiment (~> 0.4.5)
......
...@@ -297,7 +297,7 @@ module EE ...@@ -297,7 +297,7 @@ module EE
mirror? && !empty_repo? mirror? && !empty_repo?
end end
def fetch_mirror(forced: false) def fetch_mirror(forced: false, check_tags_changed: false)
return unless mirror? return unless mirror?
# Only send the password if it's needed # Only send the password if it's needed
...@@ -308,7 +308,7 @@ module EE ...@@ -308,7 +308,7 @@ module EE
username_only_import_url username_only_import_url
end end
repository.fetch_upstream(url, forced: forced) repository.fetch_upstream(url, forced: forced, check_tags_changed: check_tags_changed)
end end
def can_override_approvers? def can_override_approvers?
......
...@@ -33,9 +33,10 @@ module EE ...@@ -33,9 +33,10 @@ module EE
expire_content_cache expire_content_cache
end end
def fetch_upstream(url, forced: false) def fetch_upstream(url, forced: false, check_tags_changed: false)
add_remote(MIRROR_REMOTE, url) add_remote(MIRROR_REMOTE, url)
fetch_remote(MIRROR_REMOTE, ssh_auth: project&.import_data, forced: forced)
fetch_remote(MIRROR_REMOTE, ssh_auth: project&.import_data, forced: forced, check_tags_changed: check_tags_changed)
end end
def upstream_branches def upstream_branches
......
...@@ -33,7 +33,7 @@ module Projects ...@@ -33,7 +33,7 @@ module Projects
checksum_before = project.repository.checksum checksum_before = project.repository.checksum
update_tags do update_tags do
project.fetch_mirror(forced: true) project.fetch_mirror(forced: true, check_tags_changed: Feature.enabled?(:fetch_mirror_check_tags_changed, project))
end end
update_branches update_branches
...@@ -99,7 +99,7 @@ module Projects ...@@ -99,7 +99,7 @@ module Projects
old_tags = repository_tags_with_target.each_with_object({}) { |tag, tags| tags[tag.name] = tag } old_tags = repository_tags_with_target.each_with_object({}) { |tag, tags| tags[tag.name] = tag }
fetch_result = yield fetch_result = yield
return fetch_result unless fetch_result return fetch_result unless fetch_result&.tags_changed
repository.expire_tags_cache repository.expire_tags_cache
......
---
name: fetch_mirror_check_tags_changed
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/49629
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/292602
milestone: '13.8'
type: development
group: group::source code
default_enabled: false
...@@ -896,7 +896,7 @@ RSpec.describe Project do ...@@ -896,7 +896,7 @@ RSpec.describe Project do
let(:project) { build(:project, :mirror, import_url: import_url, import_data_attributes: { auth_method: auth_method } ) } let(:project) { build(:project, :mirror, import_url: import_url, import_data_attributes: { auth_method: auth_method } ) }
specify do specify do
expect(project.repository).to receive(:fetch_upstream).with(expected, forced: false) expect(project.repository).to receive(:fetch_upstream).with(expected, forced: false, check_tags_changed: false)
project.fetch_mirror project.fetch_mirror
end end
......
...@@ -100,14 +100,24 @@ RSpec.describe Projects::UpdateMirrorService do ...@@ -100,14 +100,24 @@ RSpec.describe Projects::UpdateMirrorService do
end end
context "updating tags" do context "updating tags" do
it "creates new tags" do it "creates new tags, expiring cache if there are tag changes" do
stub_fetch_mirror(project) stub_fetch_mirror(project)
expect(project.repository).to receive(:expire_tags_cache).and_call_original
service.execute service.execute
expect(project.repository.tag_names).to include('new-tag') expect(project.repository.tag_names).to include('new-tag')
end end
it 'does not expire cache if there are no tag changes' do
stub_fetch_mirror(project, tags_changed: false)
expect(project.repository).not_to receive(:expire_tags_cache)
service.execute
end
it "only invokes Git::TagPushService for tags pointing to commits" do it "only invokes Git::TagPushService for tags pointing to commits" do
stub_fetch_mirror(project) stub_fetch_mirror(project)
...@@ -429,11 +439,11 @@ RSpec.describe Projects::UpdateMirrorService do ...@@ -429,11 +439,11 @@ RSpec.describe Projects::UpdateMirrorService do
end end
end end
def stub_fetch_mirror(project, repository: project.repository) def stub_fetch_mirror(project, repository: project.repository, tags_changed: true )
allow(project).to receive(:fetch_mirror) { fetch_mirror(repository) } allow(project).to receive(:fetch_mirror) { fetch_mirror(repository, tags_changed: tags_changed) }
end end
def fetch_mirror(repository) def fetch_mirror(repository, tags_changed: true)
rugged = rugged_repo(repository) rugged = rugged_repo(repository)
masterrev = repository.find_branch("master").dereferenced_target.id masterrev = repository.find_branch("master").dereferenced_target.id
...@@ -457,6 +467,8 @@ RSpec.describe Projects::UpdateMirrorService do ...@@ -457,6 +467,8 @@ RSpec.describe Projects::UpdateMirrorService do
# New tag that point to a blob # New tag that point to a blob
rugged.references.create('refs/tags/new-tag-on-blob', 'c74175afd117781cbc983664339a0f599b5bb34e') rugged.references.create('refs/tags/new-tag-on-blob', 'c74175afd117781cbc983664339a0f599b5bb34e')
Gitaly::FetchRemoteResponse.new(tags_changed: tags_changed)
end end
def modify_tag(repository, tag_name) def modify_tag(repository, tag_name)
......
...@@ -801,7 +801,8 @@ module Gitlab ...@@ -801,7 +801,8 @@ module Gitlab
# forced - should we use --force flag? # forced - should we use --force flag?
# no_tags - should we use --no-tags flag? # no_tags - should we use --no-tags flag?
# prune - should we use --prune flag? # prune - should we use --prune flag?
def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true) # check_tags_changed - should we ask gitaly to calculate whether any tags changed?
def fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, prune: true, check_tags_changed: false)
wrapped_gitaly_errors do wrapped_gitaly_errors do
gitaly_repository_client.fetch_remote( gitaly_repository_client.fetch_remote(
remote, remote,
...@@ -809,6 +810,7 @@ module Gitlab ...@@ -809,6 +810,7 @@ module Gitlab
forced: forced, forced: forced,
no_tags: no_tags, no_tags: no_tags,
prune: prune, prune: prune,
check_tags_changed: check_tags_changed,
timeout: GITLAB_PROJECTS_TIMEOUT timeout: GITLAB_PROJECTS_TIMEOUT
) )
end end
......
...@@ -70,10 +70,11 @@ module Gitlab ...@@ -70,10 +70,11 @@ module Gitlab
end.join end.join
end end
def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true) def fetch_remote(remote, ssh_auth:, forced:, no_tags:, timeout:, prune: true, check_tags_changed: false)
request = Gitaly::FetchRemoteRequest.new( request = Gitaly::FetchRemoteRequest.new(
repository: @gitaly_repo, remote: remote, force: forced, repository: @gitaly_repo, remote: remote, force: forced,
no_tags: no_tags, timeout: timeout, no_prune: !prune no_tags: no_tags, timeout: timeout, no_prune: !prune,
check_tags_changed: check_tags_changed
) )
if ssh_auth&.ssh_mirror_url? if ssh_auth&.ssh_mirror_url?
......
...@@ -520,12 +520,13 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do ...@@ -520,12 +520,13 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
forced: true, forced: true,
no_tags: true, no_tags: true,
timeout: described_class::GITLAB_PROJECTS_TIMEOUT, timeout: described_class::GITLAB_PROJECTS_TIMEOUT,
prune: false prune: false,
check_tags_changed: false
} }
expect(repository.gitaly_repository_client).to receive(:fetch_remote).with('remote-name', expected_opts) expect(repository.gitaly_repository_client).to receive(:fetch_remote).with('remote-name', expected_opts)
repository.fetch_remote('remote-name', ssh_auth: ssh_auth, forced: true, no_tags: true, prune: false) repository.fetch_remote('remote-name', ssh_auth: ssh_auth, forced: true, no_tags: true, prune: false, check_tags_changed: false)
end end
it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RepositoryService, :fetch_remote do it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RepositoryService, :fetch_remote do
......
...@@ -131,7 +131,8 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do ...@@ -131,7 +131,8 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do
known_hosts: '', known_hosts: '',
force: false, force: false,
no_tags: false, no_tags: false,
no_prune: false no_prune: false,
check_tags_changed: false
) )
expect_any_instance_of(Gitaly::RepositoryService::Stub) expect_any_instance_of(Gitaly::RepositoryService::Stub)
...@@ -139,7 +140,7 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do ...@@ -139,7 +140,7 @@ RSpec.describe Gitlab::GitalyClient::RepositoryService do
.with(expected_request, kind_of(Hash)) .with(expected_request, kind_of(Hash))
.and_return(double(value: true)) .and_return(double(value: true))
client.fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, timeout: 1) client.fetch_remote(remote, ssh_auth: nil, forced: false, no_tags: false, timeout: 1, check_tags_changed: false)
end end
context 'SSH auth' do context 'SSH auth' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment