Move checksum calculation to Gitlab::Git::Repository

parent d248b7a0
...@@ -436,7 +436,7 @@ group :ed25519 do ...@@ -436,7 +436,7 @@ group :ed25519 do
end end
# Gitaly GRPC client # Gitaly GRPC client
gem 'gitaly-proto', '~> 0.91.0', require: 'gitaly' gem 'gitaly-proto', '~> 0.94.0', require: 'gitaly'
gem 'grpc', '~> 1.10.0' gem 'grpc', '~> 1.10.0'
# Locked until https://github.com/google/protobuf/issues/4210 is closed # Locked until https://github.com/google/protobuf/issues/4210 is closed
......
...@@ -314,7 +314,7 @@ GEM ...@@ -314,7 +314,7 @@ GEM
po_to_json (>= 1.0.0) po_to_json (>= 1.0.0)
rails (>= 3.2.0) rails (>= 3.2.0)
gherkin-ruby (0.3.2) gherkin-ruby (0.3.2)
gitaly-proto (0.91.0) gitaly-proto (0.94.0)
google-protobuf (~> 3.1) google-protobuf (~> 3.1)
grpc (~> 1.0) grpc (~> 1.0)
github-linguist (5.3.3) github-linguist (5.3.3)
...@@ -1095,7 +1095,7 @@ DEPENDENCIES ...@@ -1095,7 +1095,7 @@ DEPENDENCIES
gettext (~> 3.2.2) gettext (~> 3.2.2)
gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3) gettext_i18n_rails_js (~> 1.3)
gitaly-proto (~> 0.91.0) gitaly-proto (~> 0.94.0)
github-linguist (~> 5.3.3) github-linguist (~> 5.3.3)
gitlab-flowdock-git-hook (~> 1.0.1) gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-license (~> 1.0) gitlab-license (~> 1.0)
......
...@@ -6,6 +6,10 @@ module EE ...@@ -6,6 +6,10 @@ module EE
module Repository module Repository
extend ActiveSupport::Concern extend ActiveSupport::Concern
included do
delegate :checksum, to: :raw_repository
end
# Transiently sets a configuration variable # Transiently sets a configuration variable
def with_config(values = {}) def with_config(values = {})
values.each { |k, v| rugged.config[k] = v } values.each { |k, v| rugged.config[k] = v }
......
...@@ -2,6 +2,8 @@ module Geo ...@@ -2,6 +2,8 @@ module Geo
class RepositoryUpdatedService class RepositoryUpdatedService
include ::Gitlab::Geo::ProjectLogHelpers include ::Gitlab::Geo::ProjectLogHelpers
RepositoryUpdateError = Class.new(StandardError)
def initialize(project, params = {}) def initialize(project, params = {})
@project = project @project = project
@params = params @params = params
...@@ -37,7 +39,7 @@ module Geo ...@@ -37,7 +39,7 @@ module Geo
repository_state.update!("#{repository_checksum_column}" => nil, "#{repository_failure_column}" => nil) repository_state.update!("#{repository_checksum_column}" => nil, "#{repository_failure_column}" => nil)
rescue => e rescue => e
log_error('Cannot reset repository checksum', e) log_error('Cannot reset repository checksum', e)
raise Gitlab::Git::Checksum::Failure, "Cannot reset repository checksum: #{e}" raise RepositoryUpdateError, "Cannot reset repository checksum: #{e}"
end end
def repository_checksum_column def repository_checksum_column
......
...@@ -40,14 +40,14 @@ module Geo ...@@ -40,14 +40,14 @@ module Geo
end end
def verify_checksum def verify_checksum
checksum = calculate_checksum(project.repository_storage, repository_path) checksum = project.repository.checksum
if mismatch?(checksum) if mismatch?(checksum)
update_registry!(failure: "#{type.to_s.capitalize} checksum mismatch: #{repository_path}") update_registry!(failure: "#{type.to_s.capitalize} checksum mismatch: #{repository_path}")
else else
update_registry!(checksum: checksum) update_registry!(checksum: checksum)
end end
rescue ::Gitlab::Git::Repository::NoRepository, ::Gitlab::Git::Checksum::Failure, Timeout::Error => e rescue ::Gitlab::Git::Repository::NoRepository, ::Gitlab::Git::Repository::ChecksumError, Timeout::Error => e
update_registry!(failure: "Error verifying #{type.to_s.capitalize} checksum: #{repository_path}", exception: e) update_registry!(failure: "Error verifying #{type.to_s.capitalize} checksum: #{repository_path}", exception: e)
end end
...@@ -55,10 +55,6 @@ module Geo ...@@ -55,10 +55,6 @@ module Geo
primary_checksum != checksum primary_checksum != checksum
end end
def calculate_checksum(storage, relative_path)
Gitlab::Git::Checksum.new(storage, relative_path).calculate
end
def update_registry!(checksum: nil, failure: nil, exception: nil, details: {}) def update_registry!(checksum: nil, failure: nil, exception: nil, details: {})
attrs = { attrs = {
"#{type}_verification_checksum_sha" => checksum, "#{type}_verification_checksum_sha" => checksum,
......
...@@ -26,16 +26,15 @@ module Geo ...@@ -26,16 +26,15 @@ module Geo
private private
def calculate_repository_checksum def calculate_repository_checksum
calculate_checksum(:repository, project.disk_path) calculate_checksum(:repository, project.repository)
end end
def calculate_wiki_checksum def calculate_wiki_checksum
calculate_checksum(:wiki, project.wiki.disk_path) calculate_checksum(:wiki, project.wiki.repository)
end end
def calculate_checksum(type, repository_relative_path) def calculate_checksum(type, repository)
checksum = Gitlab::Git::Checksum.new(project.repository_storage, repository_relative_path) update_repository_state!(type, checksum: repository.checksum)
update_repository_state!(type, checksum: checksum.calculate)
rescue => e rescue => e
log_error('Error calculating the repository checksum', e, type: type) log_error('Error calculating the repository checksum', e, type: type)
update_repository_state!(type, failure: e.message) update_repository_state!(type, failure: e.message)
......
...@@ -52,12 +52,12 @@ describe Geo::RepositoryUpdatedService do ...@@ -52,12 +52,12 @@ describe Geo::RepositoryUpdatedService do
expect { described_class.new(create(:project)) }.not_to raise_error expect { described_class.new(create(:project)) }.not_to raise_error
end end
it 'raises a Gitlab::Git::Checksum error when an error occurs' do it 'raises a Geo::RepositoryUpdatedService::RepositoryUpdateError when an error occurs' do
allow(subject.repository_state).to receive(:update!) allow(subject.repository_state).to receive(:update!)
.with("#{method_prefix}_verification_checksum" => nil, "last_#{method_prefix}_verification_failure" => nil) .with("#{method_prefix}_verification_checksum" => nil, "last_#{method_prefix}_verification_failure" => nil)
.and_raise(ActiveRecord::RecordInvalid.new(repository_state)) .and_raise(ActiveRecord::RecordInvalid.new(repository_state))
expect { subject.execute }.to raise_error Gitlab::Git::Checksum::Failure, /Cannot reset repository checksum/ expect { subject.execute }.to raise_error Geo::RepositoryUpdatedService::RepositoryUpdateError, /Cannot reset repository checksum/
end end
end end
end end
......
...@@ -10,16 +10,12 @@ describe Geo::RepositoryVerifySecondaryService, :geo do ...@@ -10,16 +10,12 @@ describe Geo::RepositoryVerifySecondaryService, :geo do
end end
shared_examples 'verify checksums for repositories/wikis' do |type| shared_examples 'verify checksums for repositories/wikis' do |type|
let(:checksum) { instance_double('Gitlab::Git::Checksum') }
let(:storage) { project.repository_storage }
let(:relative_path) { service.send(:repository_path) }
subject(:service) { described_class.new(registry, type) } subject(:service) { described_class.new(registry, type) }
it 'does not calculate the checksum when not running on a secondary' do it 'does not calculate the checksum when not running on a secondary' do
allow(Gitlab::Geo).to receive(:secondary?) { false } allow(Gitlab::Geo).to receive(:secondary?) { false }
expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) expect(registry.project.repository).not_to receive(:checksum)
service.execute service.execute
end end
...@@ -27,7 +23,7 @@ describe Geo::RepositoryVerifySecondaryService, :geo do ...@@ -27,7 +23,7 @@ describe Geo::RepositoryVerifySecondaryService, :geo do
it 'does not verify the checksum if resync is needed' do it 'does not verify the checksum if resync is needed' do
registry.assign_attributes("resync_#{type}" => true) registry.assign_attributes("resync_#{type}" => true)
expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) expect(registry.project.repository).not_to receive(:checksum)
service.execute service.execute
end end
...@@ -35,7 +31,7 @@ describe Geo::RepositoryVerifySecondaryService, :geo do ...@@ -35,7 +31,7 @@ describe Geo::RepositoryVerifySecondaryService, :geo do
it 'does not verify the checksum if primary was never verified' do it 'does not verify the checksum if primary was never verified' do
repository_state.assign_attributes("#{type}_verification_checksum" => nil) repository_state.assign_attributes("#{type}_verification_checksum" => nil)
expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) expect(registry.project.repository).not_to receive(:checksum)
service.execute service.execute
end end
...@@ -44,22 +40,20 @@ describe Geo::RepositoryVerifySecondaryService, :geo do ...@@ -44,22 +40,20 @@ describe Geo::RepositoryVerifySecondaryService, :geo do
repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum') repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum')
registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum') registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum')
expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) expect(registry.project.repository).not_to receive(:checksum)
service.execute service.execute
end end
it 'sets checksum when the checksum matches' do it 'sets checksum when the checksum matches' do
expect(Gitlab::Git::Checksum).to receive(:new).with(storage, relative_path) { checksum } expect(registry.project.repository).to receive(:checksum).and_return('my_checksum')
expect(checksum).to receive(:calculate).and_return('my_checksum')
expect { service.execute }.to change(registry, "#{type}_verification_checksum_sha") expect { service.execute }.to change(registry, "#{type}_verification_checksum_sha")
.from(nil).to('my_checksum') .from(nil).to('my_checksum')
end end
it 'keeps track of failure when the checksum mismatch' do it 'keeps track of failure when the checksum mismatch' do
expect(Gitlab::Git::Checksum).to receive(:new).with(storage, relative_path) { checksum } expect(registry.project.repository).to receive(:checksum).and_return('other_checksum')
expect(checksum).to receive(:calculate).and_return('other_checksum')
expect { service.execute }.to change(registry, "last_#{type}_verification_failure") expect { service.execute }.to change(registry, "last_#{type}_verification_failure")
.from(nil).to(/#{Regexp.quote(type.to_s.capitalize)} checksum mismatch/) .from(nil).to(/#{Regexp.quote(type.to_s.capitalize)} checksum mismatch/)
......
...@@ -20,7 +20,7 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean ...@@ -20,7 +20,7 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean
it 'does not calculate the checksum when not running on a primary' do it 'does not calculate the checksum when not running on a primary' do
allow(Gitlab::Geo).to receive(:primary?) { false } allow(Gitlab::Geo).to receive(:primary?) { false }
expect_any_instance_of(Gitlab::Git::Checksum).not_to receive(:calculate) expect(project_without_repositories.repository).not_to receive(:checksum)
subject.perform(project_without_repositories.id) subject.perform(project_without_repositories.id)
end end
...@@ -28,7 +28,7 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean ...@@ -28,7 +28,7 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean
it 'does not calculate the checksum when project is pending deletion' do it 'does not calculate the checksum when project is pending deletion' do
project_with_repositories.update!(pending_delete: true) project_with_repositories.update!(pending_delete: true)
expect_any_instance_of(Gitlab::Git::Checksum).not_to receive(:calculate) expect(project_with_repositories.repository).not_to receive(:checksum)
subject.perform(project_with_repositories.id) subject.perform(project_with_repositories.id)
end end
...@@ -126,9 +126,9 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean ...@@ -126,9 +126,9 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean
expect(project_without_repositories.repository_state).to have_attributes( expect(project_without_repositories.repository_state).to have_attributes(
repository_verification_checksum: nil, repository_verification_checksum: nil,
last_repository_verification_failure: /No repository for such path/, last_repository_verification_failure: /not a git repository/,
wiki_verification_checksum: nil, wiki_verification_checksum: nil,
last_wiki_verification_failure: /No repository for such path/ last_wiki_verification_failure: /not a git repository/
) )
end end
end end
......
module Gitlab
module Git
class Checksum
include Gitlab::Git::Popen
EMPTY_REPOSITORY_CHECKSUM = '0000000000000000000000000000000000000000'.freeze
Failure = Class.new(StandardError)
attr_reader :path, :relative_path, :storage, :storage_path, :gl_repository
def initialize(storage, relative_path, gl_repository)
@storage = storage
@storage_path = Gitlab.config.repositories.storages[storage].legacy_disk_path
@relative_path = "#{relative_path}.git"
@path = File.join(storage_path, @relative_path)
@gl_repository = gl_repository
end
def calculate
unless repository_exists?
failure!(Gitlab::Git::Repository::NoRepository, 'No repository for such path')
end
raw_repository.gitaly_migrate(:calculate_checksum) do |is_enabled|
if is_enabled
calculate_checksum_gitaly
else
calculate_checksum_by_shelling_out
end
end
end
private
def repository_exists?
raw_repository.exists?
end
def calculate_checksum_gitaly
gitaly_repository_client.calculate_checksum
end
def calculate_checksum_by_shelling_out
args = %W(--git-dir=#{path} show-ref --heads --tags)
output, status = run_git(args)
if status&.zero?
refs = output.split("\n")
result = refs.inject(nil) do |checksum, ref|
value = Digest::SHA1.hexdigest(ref).hex
if checksum.nil?
value
else
checksum ^ value
end
end
result.to_s(16)
else
# Empty repositories return with a non-zero status and an empty output.
if output&.empty?
EMPTY_REPOSITORY_CHECKSUM
else
failure!(Gitlab::Git::Checksum::Failure, output)
end
end
end
def failure!(klass, message)
Gitlab::GitLogger.error("'git show-ref --heads --tags' in #{path}: #{message}")
raise klass.new("Could not calculate the checksum for #{path}: #{message}")
end
def circuit_breaker
@circuit_breaker ||= Gitlab::Git::Storage::CircuitBreaker.for_storage(storage)
end
def raw_repository
@raw_repository ||= Gitlab::Git::Repository.new(storage, relative_path, gl_repository)
end
def gitaly_repository_client
raw_repository.gitaly_repository_client
end
def run_git(args)
circuit_breaker.perform do
popen([Gitlab.config.git.bin_path, *args], path)
end
end
end
end
end
...@@ -23,6 +23,7 @@ module Gitlab ...@@ -23,6 +23,7 @@ module Gitlab
SQUASH_WORKTREE_PREFIX = 'squash'.freeze SQUASH_WORKTREE_PREFIX = 'squash'.freeze
GITALY_INTERNAL_URL = 'ssh://gitaly/internal.git'.freeze GITALY_INTERNAL_URL = 'ssh://gitaly/internal.git'.freeze
GITLAB_PROJECTS_TIMEOUT = Gitlab.config.gitlab_shell.git_timeout GITLAB_PROJECTS_TIMEOUT = Gitlab.config.gitlab_shell.git_timeout
EMPTY_REPOSITORY_CHECKSUM = '0000000000000000000000000000000000000000'.freeze
NoRepository = Class.new(StandardError) NoRepository = Class.new(StandardError)
InvalidBlobName = Class.new(StandardError) InvalidBlobName = Class.new(StandardError)
...@@ -31,6 +32,7 @@ module Gitlab ...@@ -31,6 +32,7 @@ module Gitlab
DeleteBranchError = Class.new(StandardError) DeleteBranchError = Class.new(StandardError)
CreateTreeError = Class.new(StandardError) CreateTreeError = Class.new(StandardError)
TagExistsError = Class.new(StandardError) TagExistsError = Class.new(StandardError)
ChecksumError = Class.new(StandardError)
class << self class << self
# Unlike `new`, `create` takes the repository path # Unlike `new`, `create` takes the repository path
...@@ -1470,6 +1472,16 @@ module Gitlab ...@@ -1470,6 +1472,16 @@ module Gitlab
run_git!(['rev-list', '--max-count=1', oldrev, "^#{newrev}"]) run_git!(['rev-list', '--max-count=1', oldrev, "^#{newrev}"])
end end
def checksum
gitaly_migrate(:calculate_checksum) do |is_enabled|
if is_enabled
gitaly_repository_client.calculate_checksum
else
calculate_checksum_by_shelling_out
end
end
end
private private
def local_write_ref(ref_path, ref, old_ref: nil, shell: true) def local_write_ref(ref_path, ref, old_ref: nil, shell: true)
...@@ -2423,6 +2435,34 @@ module Gitlab ...@@ -2423,6 +2435,34 @@ module Gitlab
def sha_from_ref(ref) def sha_from_ref(ref)
rev_parse_target(ref).oid rev_parse_target(ref).oid
end end
def calculate_checksum_by_shelling_out
raise NoRepository unless exists?
args = %W(--git-dir=#{path} show-ref --heads --tags)
output, status = run_git(args)
if status.nil? || !status.zero?
# Empty repositories return with a non-zero status and an empty output.
return EMPTY_REPOSITORY_CHECKSUM if output&.empty?
raise ChecksumError, output
end
refs = output.split("\n")
result = refs.inject(nil) do |checksum, ref|
value = Digest::SHA1.hexdigest(ref).hex
if checksum.nil?
value
else
checksum ^ value
end
end
result.to_s(16)
end
end end
end end
end end
require 'spec_helper'
describe Gitlab::Git::Checksum, seed_helper: true do
let(:storage) { 'default' }
let(:gl_repository) { 'project-123' }
shared_examples 'calculating checksum' do
it 'raises Gitlab::Git::Repository::NoRepository when there is no repo' do
checksum = described_class.new(storage, 'nonexistent-repo', gl_repository)
expect { checksum.calculate }.to raise_error Gitlab::Git::Repository::NoRepository
end
it 'pretends that checksum is 000000... when the repo is empty' do
FileUtils.rm_rf(File.join(SEED_STORAGE_PATH, 'empty-repo.git'))
system(git_env, *%W(#{Gitlab.config.git.bin_path} init --bare empty-repo.git),
chdir: SEED_STORAGE_PATH,
out: '/dev/null',
err: '/dev/null')
checksum = described_class.new(storage, 'empty-repo', gl_repository)
expect(checksum.calculate).to eq '0000000000000000000000000000000000000000'
end
it 'calculates the checksum when there is a repo' do
checksum = described_class.new(storage, 'gitlab-git-test', gl_repository)
expect(checksum.calculate).to eq '54f21be4c32c02f6788d72207fa03ad3bce725e4'
end
end
context 'when calculate_checksum Gitaly feature is enabled' do
it_behaves_like 'calculating checksum'
end
context 'when calculate_checksum Gitaly feature is disabled', :disable_gitaly do
it_behaves_like 'calculating checksum'
it "raises a Gitlab::Git::Repository::Failure error if the `popen` call to git returns a non-zero exit code" do
checksum = described_class.new(storage, 'gitlab-git-test', gl_repository)
allow(checksum).to receive(:popen).and_return(['output', nil])
expect { checksum.calculate }.to raise_error Gitlab::Git::Checksum::Failure
end
end
end
...@@ -2178,6 +2178,55 @@ describe Gitlab::Git::Repository, seed_helper: true do ...@@ -2178,6 +2178,55 @@ describe Gitlab::Git::Repository, seed_helper: true do
end end
end end
describe '#checksum' do
shared_examples 'calculating checksum' do
it 'calculates the checksum for non-empty repo' do
expect(repository.checksum).to eq '54f21be4c32c02f6788d72207fa03ad3bce725e4'
end
it 'returns 0000000000000000000000000000000000000000 for an empty repo' do
FileUtils.rm_rf(File.join(storage_path, 'empty-repo.git'))
system(git_env, *%W(#{Gitlab.config.git.bin_path} init --bare empty-repo.git),
chdir: storage_path,
out: '/dev/null',
err: '/dev/null')
empty_repo = described_class.new('default', 'empty-repo.git', '')
expect(empty_repo.checksum).to eq '0000000000000000000000000000000000000000'
end
it 'raises a no repository exception when there is no repo' do
broken_repo = described_class.new('default', 'a/path.git', '')
expect { broken_repo.checksum }.to raise_error(Gitlab::Git::Repository::NoRepository)
end
end
context 'when calculate_checksum Gitaly feature is enabled' do
it_behaves_like 'calculating checksum'
end
context 'when calculate_checksum Gitaly feature is disabled', :disable_gitaly do
it_behaves_like 'calculating checksum'
describe 'when storage is broken', :broken_storage do
it 'raises a storage exception when storage is not available' do
broken_repo = described_class.new('broken', 'a/path.git', '')
expect { broken_repo.rugged }.to raise_error(Gitlab::Git::Storage::Inaccessible)
end
end
it "raises a Gitlab::Git::Repository::Failure error if the `popen` call to git returns a non-zero exit code" do
allow(repository).to receive(:popen).and_return(['output', nil])
expect { repository.checksum }.to raise_error Gitlab::Git::Repository::ChecksumError
end
end
end
context 'gitlab_projects commands' do context 'gitlab_projects commands' do
let(:gitlab_projects) { repository.gitlab_projects } let(:gitlab_projects) { repository.gitlab_projects }
let(:timeout) { Gitlab.config.gitlab_shell.git_timeout } let(:timeout) { Gitlab.config.gitlab_shell.git_timeout }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment