Commit 61b0577d authored by Stan Hu's avatar Stan Hu

Merge branch 'mk/rake-task-verify-remote-files' into 'master'

Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks

Closes #47139

See merge request gitlab-org/gitlab-ce!19501
parents cc1b0354 2f40fb45
---
title: Add support for verifying remote uploads, artifacts, and LFS objects in check rake tasks
merge_request: 19501
author:
type: added
......@@ -78,9 +78,10 @@ Example output:
## Uploaded Files Integrity
Various types of file can be uploaded to a GitLab installation by users.
Checksums are generated and stored in the database upon upload, and integrity
checks using those checksums can be run. These checks also detect missing files.
Various types of files can be uploaded to a GitLab installation by users.
These integrity checks can detect missing files. Additionally, for locally
stored files, checksums are generated and stored in the database upon upload,
and these checks will verify them against current files.
Currently, integrity checks are supported for the following types of file:
......
......@@ -7,13 +7,15 @@ module Gitlab
@batch_size = batch_size
@start = start
@finish = finish
fix_google_api_logger
end
# Yields a Range of IDs and a Hash of failed verifications (object => error)
def run_batches(&blk)
relation.in_batches(of: batch_size, start: start, finish: finish) do |relation| # rubocop: disable Cop/InBatches
range = relation.first.id..relation.last.id
failures = run_batch(relation)
all_relation.in_batches(of: batch_size, start: start, finish: finish) do |batch| # rubocop: disable Cop/InBatches
range = batch.first.id..batch.last.id
failures = run_batch_for(batch)
yield(range, failures)
end
......@@ -29,24 +31,56 @@ module Gitlab
private
def run_batch(relation)
relation.map { |upload| verify(upload) }.compact.to_h
def run_batch_for(batch)
batch.map { |upload| verify(upload) }.compact.to_h
end
def verify(object)
local?(object) ? verify_local(object) : verify_remote(object)
rescue => err
failure(object, err.inspect)
end
def verify_local(object)
expected = expected_checksum(object)
actual = actual_checksum(object)
raise 'Checksum missing' unless expected.present?
raise 'Checksum mismatch' unless expected == actual
return failure(object, 'Checksum missing') unless expected.present?
return failure(object, 'Checksum mismatch') unless expected == actual
success
end
# We don't calculate checksum for remote objects, so just check existence
def verify_remote(object)
return failure(object, 'Remote object does not exist') unless remote_object_exists?(object)
success
end
def success
nil
rescue => err
[object, err]
end
def failure(object, message)
[object, message]
end
# It's already set to Logger::INFO, but acts as if it is set to
# Logger::DEBUG, and this fixes it...
def fix_google_api_logger
if Object.const_defined?('Google::Apis')
Google::Apis.logger.level = Logger::INFO
end
end
# This should return an ActiveRecord::Relation suitable for calling #in_batches on
def relation
def all_relation
raise NotImplementedError.new
end
# Should return true if the object is stored locally
def local?(_object)
raise NotImplementedError.new
end
......@@ -59,6 +93,11 @@ module Gitlab
def actual_checksum(_object)
raise NotImplementedError.new
end
# Be sure to perform a hard check of the remote object (don't just check DB value)
def remote_object_exists?(object)
raise NotImplementedError.new
end
end
end
end
......@@ -11,10 +11,14 @@ module Gitlab
private
def relation
def all_relation
::Ci::JobArtifact.all
end
def local?(artifact)
artifact.local_store?
end
def expected_checksum(artifact)
artifact.file_sha256
end
......@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(artifact)
Digest::SHA256.file(artifact.file.path).hexdigest
end
def remote_object_exists?(artifact)
artifact.file.file.exists?
end
end
end
end
......@@ -11,8 +11,12 @@ module Gitlab
private
def relation
LfsObject.with_files_stored_locally
def all_relation
LfsObject.all
end
def local?(lfs_object)
lfs_object.local_store?
end
def expected_checksum(lfs_object)
......@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(lfs_object)
LfsObject.calculate_oid(lfs_object.file.path)
end
def remote_object_exists?(lfs_object)
lfs_object.file.file.exists?
end
end
end
end
......@@ -45,7 +45,7 @@ module Gitlab
return unless verbose?
failures.each do |object, error|
say " - #{verifier.describe(object)}: #{error.inspect}".color(:red)
say " - #{verifier.describe(object)}: #{error}".color(:red)
end
end
end
......
......@@ -11,8 +11,12 @@ module Gitlab
private
def relation
Upload.with_files_stored_locally
def all_relation
Upload.all
end
def local?(upload)
upload.local?
end
def expected_checksum(upload)
......@@ -22,6 +26,10 @@ module Gitlab
def actual_checksum(upload)
Upload.hexdigest(upload.absolute_path)
end
def remote_object_exists?(upload)
upload.build_uploader.file.exists?
end
end
end
end
......@@ -21,15 +21,38 @@ describe Gitlab::Verify::JobArtifacts do
FileUtils.rm_f(artifact.file.path)
expect(failures.keys).to contain_exactly(artifact)
expect(failure).to be_a(Errno::ENOENT)
expect(failure.to_s).to include(artifact.file.path)
expect(failure).to include('No such file or directory')
expect(failure).to include(artifact.file.path)
end
it 'fails artifacts with a mismatched checksum' do
File.truncate(artifact.file.path, 0)
expect(failures.keys).to contain_exactly(artifact)
expect(failure.to_s).to include('Checksum mismatch')
expect(failure).to include('Checksum mismatch')
end
context 'with remote files' do
let(:file) { double(:file) }
before do
stub_artifacts_object_storage
artifact.update!(file_store: ObjectStorage::Store::REMOTE)
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
end
it 'passes artifacts in object storage that exist' do
expect(file).to receive(:exists?).and_return(true)
expect(failures).to eq({})
end
it 'fails artifacts in object storage that do not exist' do
expect(file).to receive(:exists?).and_return(false)
expect(failures.keys).to contain_exactly(artifact)
expect(failure).to include('Remote object does not exist')
end
end
end
end
......@@ -21,30 +21,37 @@ describe Gitlab::Verify::LfsObjects do
FileUtils.rm_f(lfs_object.file.path)
expect(failures.keys).to contain_exactly(lfs_object)
expect(failure).to be_a(Errno::ENOENT)
expect(failure.to_s).to include(lfs_object.file.path)
expect(failure).to include('No such file or directory')
expect(failure).to include(lfs_object.file.path)
end
it 'fails LFS objects with a mismatched oid' do
File.truncate(lfs_object.file.path, 0)
expect(failures.keys).to contain_exactly(lfs_object)
expect(failure.to_s).to include('Checksum mismatch')
expect(failure).to include('Checksum mismatch')
end
context 'with remote files' do
let(:file) { double(:file) }
before do
stub_lfs_object_storage
lfs_object.update!(file_store: ObjectStorage::Store::REMOTE)
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
end
it 'skips LFS objects in object storage' do
local_failure = create(:lfs_object)
create(:lfs_object, :object_storage)
it 'passes LFS objects in object storage that exist' do
expect(file).to receive(:exists?).and_return(true)
expect(failures).to eq({})
end
failures = {}
described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
it 'fails LFS objects in object storage that do not exist' do
expect(file).to receive(:exists?).and_return(false)
expect(failures.keys).to contain_exactly(local_failure)
expect(failures.keys).to contain_exactly(lfs_object)
expect(failure).to include('Remote object does not exist')
end
end
end
......
......@@ -23,37 +23,44 @@ describe Gitlab::Verify::Uploads do
FileUtils.rm_f(upload.absolute_path)
expect(failures.keys).to contain_exactly(upload)
expect(failure).to be_a(Errno::ENOENT)
expect(failure.to_s).to include(upload.absolute_path)
expect(failure).to include('No such file or directory')
expect(failure).to include(upload.absolute_path)
end
it 'fails uploads with a mismatched checksum' do
upload.update!(checksum: 'something incorrect')
expect(failures.keys).to contain_exactly(upload)
expect(failure.to_s).to include('Checksum mismatch')
expect(failure).to include('Checksum mismatch')
end
it 'fails uploads with a missing precalculated checksum' do
upload.update!(checksum: '')
expect(failures.keys).to contain_exactly(upload)
expect(failure.to_s).to include('Checksum missing')
expect(failure).to include('Checksum missing')
end
context 'with remote files' do
let(:file) { double(:file) }
before do
stub_uploads_object_storage(AvatarUploader)
upload.update!(store: ObjectStorage::Store::REMOTE)
expect(CarrierWave::Storage::Fog::File).to receive(:new).and_return(file)
end
it 'skips uploads in object storage' do
local_failure = create(:upload)
create(:upload, :object_storage)
it 'passes uploads in object storage that exist' do
expect(file).to receive(:exists?).and_return(true)
expect(failures).to eq({})
end
failures = {}
described_class.new(batch_size: 10).run_batches { |_, failed| failures.merge!(failed) }
it 'fails uploads in object storage that do not exist' do
expect(file).to receive(:exists?).and_return(false)
expect(failures.keys).to contain_exactly(local_failure)
expect(failures.keys).to contain_exactly(upload)
expect(failure).to include('Remote object does not exist')
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment