Commit 1ef39a79 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '5447-geo-use-git-clone-for-first-sync-instead-of-git-fetch' into 'master'

Geo: Use `git clone` for first sync instead of `git fetch`

See merge request gitlab-org/gitlab!77143
parents ebc6ad98 81d2c45c
......@@ -481,7 +481,7 @@ gem 'ssh_data', '~> 1.2'
gem 'spamcheck', '~> 0.1.0'
# Gitaly GRPC protocol definitions
gem 'gitaly', '~> 14.9.0.pre.rc4'
gem 'gitaly', '~> 14.9.0-rc5'
# KAS GRPC protocol definitions
gem 'kas-grpc', '~> 0.0.2'
......
......@@ -455,7 +455,7 @@ GEM
rails (>= 3.2.0)
git (1.7.0)
rchardet (~> 1.8)
gitaly (14.9.0.pre.rc4)
gitaly (14.9.0.pre.rc5)
grpc (~> 1.0)
github-markup (1.7.0)
gitlab (4.16.1)
......@@ -534,7 +534,7 @@ GEM
signet (~> 0.12)
google-cloud-env (1.5.0)
faraday (>= 0.17.3, < 2.0)
google-protobuf (3.19.1)
google-protobuf (3.19.4)
googleapis-common-protos-types (1.3.0)
google-protobuf (~> 3.14)
googleauth (0.14.0)
......@@ -1492,7 +1492,7 @@ DEPENDENCIES
gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3)
gitaly (~> 14.9.0.pre.rc4)
gitaly (~> 14.9.0.pre.rc5)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-dangerfiles (~> 3.0)
......
......@@ -941,6 +941,10 @@ class Repository
end
end
def clone_as_mirror(url, http_authorization_header: "")
import_repository(url, http_authorization_header: http_authorization_header, mirror: true)
end
def fetch_as_mirror(url, forced: false, refmap: :all_refs, prune: true, http_authorization_header: "")
fetch_remote(url, refmap: refmap, forced: forced, prune: prune, http_authorization_header: http_authorization_header)
end
......
---
name: geo_use_clone_on_first_sync
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/77143
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/357462
milestone: '14.10'
type: development
group: group::geo
default_enabled: false
......@@ -78,14 +78,20 @@ module Geo
redownload_repository
@new_repository = true
elsif repository.exists?
fetch_geo_mirror(repository)
fetch_geo_mirror
else
ensure_repository
# Because we ensure a repository exists by this point, we need to
# mark it as new, even if fetching the mirror fails, we should run
# housekeeping to enable object deduplication to run
@new_repository = true
fetch_geo_mirror(repository)
if Feature.enabled?('geo_use_clone_on_first_sync', default_enabled: :yaml)
clone_geo_mirror
@new_repository = true
else
ensure_repository
# Because we ensure a repository exists by this point, we need to
# mark it as new, even if fetching the mirror fails, we should run
# housekeeping to enable object deduplication to run
@new_repository = true
fetch_geo_mirror
end
end
update_root_ref
......@@ -102,9 +108,13 @@ module Geo
log_info("Attempting to fetch repository via git")
# `git fetch` needs an empty bare repository to fetch into
temp_repo.create_repository
fetch_geo_mirror(temp_repo)
if Feature.enabled?('geo_use_clone_on_first_sync', default_enabled: :yaml)
clone_geo_mirror(target_repository: temp_repo)
temp_repo.create_repository unless temp_repo.exists?
else
temp_repo.create_repository
fetch_geo_mirror(target_repository: temp_repo)
end
set_temp_repository_as_main
ensure
......@@ -115,9 +125,19 @@ module Geo
::Gitlab::Geo.current_node
end
def fetch_geo_mirror(repository)
# Updates an existing repository using JWT authentication mechanism
#
# @param [Repository] target_repository specify a different temporary repository (default: current repository)
def fetch_geo_mirror(target_repository: repository)
# Fetch the repository, using a JWT header for authentication
repository.fetch_as_mirror(replicator.remote_url, forced: true, http_authorization_header: replicator.jwt_authentication_header)
target_repository.fetch_as_mirror(replicator.remote_url, forced: true, http_authorization_header: replicator.jwt_authentication_header)
end
# Clone a Geo repository using JWT authentication mechanism
#
# @param [Repository] target_repository specify a different temporary repository (default: current repository)
def clone_geo_mirror(target_repository: repository)
target_repository.clone_as_mirror(replicator.remote_url, http_authorization_header: replicator.jwt_authentication_header)
end
# Use snapshotting for redownloads *only* when enabled.
......
......@@ -41,6 +41,10 @@ module Geo
LEASE_TIMEOUT
end
def repository
raise NotImplementedError, 'Define a reference to the repository'
end
private
def fetch_repository
......@@ -57,14 +61,21 @@ module Geo
redownload_repository
@new_repository = true
elsif repository.exists?
fetch_geo_mirror(repository)
fetch_geo_mirror
else
ensure_repository
# Because we ensure a repository exists by this point, we need to
# mark it as new, even if fetching the mirror fails, we should run
# housekeeping to enable object deduplication to run
@new_repository = true
fetch_geo_mirror(repository)
if Feature.enabled?('geo_use_clone_on_first_sync', default_enabled: :yaml)
clone_geo_mirror
@new_repository = true
else
ensure_repository
# Because we ensure a repository exists by this point, we need to
# mark it as new, even if fetching the mirror fails, we should run
# housekeeping to enable object deduplication to run
@new_repository = true
fetch_geo_mirror
end
end
update_root_ref
......@@ -85,9 +96,13 @@ module Geo
log_info("Attempting to fetch repository via git")
# `git fetch` needs an empty bare repository to fetch into
temp_repo.create_repository
fetch_geo_mirror(temp_repo)
if Feature.enabled?('geo_use_clone_on_first_sync', default_enabled: :yaml)
clone_geo_mirror(target_repository: temp_repo)
else
# `git fetch` needs an empty bare repository to fetch into
temp_repo.create_repository
fetch_geo_mirror(target_repository: temp_repo)
end
set_temp_repository_as_main
ensure
......@@ -98,9 +113,22 @@ module Geo
::Gitlab::Geo.current_node
end
def fetch_geo_mirror(repository)
# Updates an existing repository using JWT authentication mechanism
#
# @param [Repository] target_repository specify a different temporary repository (default: current repository)
def fetch_geo_mirror(target_repository: repository)
# Fetch the repository, using a JWT header for authentication
repository.fetch_as_mirror(remote_url, forced: true, http_authorization_header: jwt_authentication_header)
target_repository.fetch_as_mirror(remote_url,
forced: true,
http_authorization_header: jwt_authentication_header)
end
# Clone a Geo repository using JWT authentication mechanism
#
# @param [Repository] target_repository specify a different temporary repository (default: current repository)
def clone_geo_mirror(target_repository: repository)
target_repository.clone_as_mirror(remote_url,
http_authorization_header: jwt_authentication_header)
end
# Build a JWT header for authentication
......
......@@ -3,7 +3,16 @@
require 'spec_helper'
RSpec.describe Geo::RepositoryBaseSyncService do
include ::EE::GeoHelpers
let(:project) { build('project') }
let(:repository) { project.repository }
let_it_be(:geo_primary) { create(:geo_node, :primary) }
let_it_be(:geo_secondary) { create(:geo_node, :secondary) }
before do
stub_current_geo_node(geo_secondary)
end
subject { described_class.new(project) }
......@@ -15,4 +24,38 @@ RSpec.describe Geo::RepositoryBaseSyncService do
expect(subject.lease_key).to eq('geo_sync_service:wiki:999')
end
end
describe '#lease_timeout' do
it 'returns a lease timeout value' do
expect(subject.lease_timeout). to eq(8.hours)
end
end
describe '#repository' do
it 'raises a NotImplementedError' do
expect { subject.repository }.to raise_error(NotImplementedError)
end
end
context 'with a repository defined' do
before do
allow(subject).to receive(:repository) { repository }
end
describe '#fetch_geo_mirror' do
it 'delegates to repository#fetch_as_mirror' do
expect(repository).to receive(:fetch_as_mirror)
subject.send(:fetch_geo_mirror)
end
end
describe '#clone_geo_mirror' do
it 'delegates to repository#clone_as_mirror' do
expect(repository).to receive(:clone_as_mirror)
subject.send(:clone_geo_mirror)
end
end
end
end
......@@ -62,9 +62,14 @@ RSpec.shared_examples 'geo base sync fetch' do
describe '#fetch_repository' do
let(:fetch_repository) { subject.send(:fetch_repository) }
let(:temp_repo) { subject.send(:temp_repo) }
before do
allow(subject).to receive(:fetch_geo_mirror).and_return(true)
allow(subject).to receive(:clone_geo_mirror).and_return(true)
allow(subject).to receive(:clone_geo_mirror).with(target_repository: temp_repo) do
temp_repo.create_repository
end
allow(repository).to receive(:update_root_ref)
end
......@@ -74,25 +79,27 @@ RSpec.shared_examples 'geo base sync fetch' do
fetch_repository
end
it 'fetches repository from geo node' do
is_expected.to receive(:fetch_geo_mirror).with(subject.send(:repository))
fetch_repository
end
it 'syncs the HEAD ref' do
expect(repository).to receive(:update_root_ref)
fetch_repository
end
context 'repository does not exist' do
before do
allow_any_instance_of(Repository).to receive(:exists?) { false }
context 'with existing repository' do
it 'fetches repository from geo node' do
subject.send(:ensure_repository)
is_expected.to receive(:fetch_geo_mirror)
fetch_repository
end
end
context 'with a never synced repository' do
it 'clones repository from geo node' do
allow(repository).to receive(:exists?) { false }
it 'ensures repository is created' do
is_expected.to receive(:ensure_repository)
is_expected.to receive(:clone_geo_mirror)
fetch_repository
end
......@@ -109,9 +116,11 @@ RSpec.shared_examples 'sync retries use the snapshot RPC' do
end
it 'does not attempt to snapshot for initial sync' do
allow(repository).to receive(:exists?) { false }
expect(repository).not_to receive_create_from_snapshot
expect(temp_repo).not_to receive_create_from_snapshot
expect(subject).to receive(:fetch_geo_mirror).with(repository)
expect(subject).to receive(:clone_geo_mirror)
subject.execute
end
......@@ -121,7 +130,7 @@ RSpec.shared_examples 'sync retries use the snapshot RPC' do
expect(repository).not_to receive_create_from_snapshot
expect(temp_repo).not_to receive_create_from_snapshot
expect(subject).to receive(:fetch_geo_mirror).with(repository)
expect(subject).to receive(:fetch_geo_mirror)
subject.execute
end
......@@ -132,16 +141,17 @@ RSpec.shared_examples 'sync retries use the snapshot RPC' do
it 'attempts to snapshot' do
expect(repository).not_to receive_create_from_snapshot
expect(temp_repo).to receive_create_from_snapshot
expect(subject).not_to receive(:fetch_geo_mirror).with(temp_repo)
expect(subject).not_to receive(:fetch_geo_mirror)
expect(subject).not_to receive(:clone_geo_mirror)
expect(subject).to receive(:set_temp_repository_as_main)
subject.execute
end
it 'attempts to fetch if snapshotting raises an exception' do
it 'attempts to clone if snapshotting raises an exception' do
expect(repository).not_to receive_create_from_snapshot
expect(temp_repo).to receive_create_from_snapshot.and_raise(ArgumentError)
expect(subject).to receive(:fetch_geo_mirror).with(temp_repo)
expect(subject).to receive(:clone_geo_mirror)
subject.execute
end
......
......@@ -841,11 +841,11 @@ module Gitlab
end
end
def import_repository(url)
def import_repository(url, http_authorization_header: '', mirror: false)
raise ArgumentError, "don't use disk paths with import_repository: #{url.inspect}" if url.start_with?('.', '/')
wrapped_gitaly_errors do
gitaly_repository_client.import_repository(url)
gitaly_repository_client.import_repository(url, http_authorization_header: http_authorization_header, mirror: mirror)
end
end
......
......@@ -145,10 +145,12 @@ module Gitlab
)
end
def import_repository(source)
def import_repository(source, http_authorization_header: '', mirror: false)
request = Gitaly::CreateRepositoryFromURLRequest.new(
repository: @gitaly_repo,
url: source
url: source,
http_authorization_header: http_authorization_header,
mirror: mirror
)
GitalyClient.call(
......
......@@ -2448,7 +2448,7 @@ RSpec.describe Gitlab::Git::Repository, :seed_helper do
it 'delegates to Gitaly' do
expect_next_instance_of(Gitlab::GitalyClient::RepositoryService) do |svc|
expect(svc).to receive(:import_repository).with(url).and_return(nil)
expect(svc).to receive(:import_repository).with(url, http_authorization_header: '', mirror: false).and_return(nil)
end
repository.import_repository(url)
......
......@@ -22,7 +22,7 @@ RSpec.describe ProjectImportState, type: :model do
before do
allow_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:import_repository)
.with(project.import_url).and_return(true)
.with(project.import_url, http_authorization_header: '', mirror: false).and_return(true)
# Works around https://github.com/rspec/rspec-mocks/issues/910
allow(Project).to receive(:find).with(project.id).and_return(project)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment