Commit 4c9df826 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch 'mk/add-repo-checksum-task' into 'master'

Add rake gitlab:git:checksum_projects

See merge request gitlab-org/gitlab!49965
parents 0859b783 4f3e6340
---
title: Add rake gitlab:git:checksum_projects
merge_request: 49965
author:
type: added
...@@ -51,6 +51,62 @@ sudo gitlab-rake gitlab:git:fsck ...@@ -51,6 +51,62 @@ sudo gitlab-rake gitlab:git:fsck
sudo -u git -H bundle exec rake gitlab:git:fsck RAILS_ENV=production sudo -u git -H bundle exec rake gitlab:git:fsck RAILS_ENV=production
``` ```
## Checksum of repository refs
One Git repository can be compared to another by checksumming all refs of each
repository. If both repositories have the same refs, and if both repositories
pass an integrity check, then we can be confident that both repositories are the
same.
For example, this can be used to compare a backup of a repository against the
source repository.
### Check all GitLab repositories
This task loops through all repositories on the GitLab server and outputs
checksums in the format `<PROJECT ID>,<CHECKSUM>`.
- If a repository doesn't exist, the project ID will have a blank checksum.
- If a repository exists but is empty, the output checksum is `0000000000000000000000000000000000000000`.
- Projects which don't exist are skipped.
**Omnibus Installation**
```shell
sudo gitlab-rake gitlab:git:checksum_projects
```
**Source Installation**
```shell
sudo -u git -H bundle exec rake gitlab:git:checksum_projects RAILS_ENV=production
```
For example, if:
- Project with ID#2 doesn't exist, it will be skipped.
- Project with ID#4 doesn't have a repository, its checksum will be blank.
- Project with ID#5 has an empty repository, its checksum will be `0000000000000000000000000000000000000000`.
The output would then look something like:
```plaintext
1,cfa3f06ba235c13df0bb28e079bcea62c5848af2
3,3f3fb58a8106230e3a6c6b48adc2712fb3b6ef87
4,
5,0000000000000000000000000000000000000000
6,6c6b48adc2712fb3b6ef87cfa3f06ba235c13df0
```
### Check specific GitLab repositories
Optionally, specific project IDs can be checksummed by setting an environment
variable `CHECKSUM_PROJECT_IDS` with a list of comma-separated integers, for example:
```shell
CHECKSUM_PROJECT_IDS="1,3" sudo gitlab-rake gitlab:git:checksum_projects
```
## Uploaded files integrity ## Uploaded files integrity
Various types of files can be uploaded to a GitLab installation by users. Various types of files can be uploaded to a GitLab installation by users.
......
...@@ -21,5 +21,39 @@ namespace :gitlab do ...@@ -21,5 +21,39 @@ namespace :gitlab do
failures.each { |f| puts "- #{f}" } failures.each { |f| puts "- #{f}" }
end end
end end
# Example for all projects:
#
# $ bin/rake gitlab:git:checksum_projects
# 1,cfa3f06ba235c13df0bb28e079bcea62c5848af2
# 2,
# 3,3f3fb58a8106230e3a6c6b48adc2712fb3b6ef87
# 4,0000000000000000000000000000000000000000
#
# Example with a list of project IDs:
#
# $ CHECKSUM_PROJECT_IDS="1,3" bin/rake gitlab:git:checksum_projects
# 1,cfa3f06ba235c13df0bb28e079bcea62c5848af2
# 3,3f3fb58a8106230e3a6c6b48adc2712fb3b6ef87
#
# - If a repository does not exist, the project ID is output with a blank checksum
# - If a repository exists but is empty, the output checksum is `0000000000000000000000000000000000000000`
# - If given specific IDs, projects which do not exist are skipped
desc 'GitLab | Git | Generate checksum of project repository refs'
task checksum_projects: :environment do
project_ids = ENV['CHECKSUM_PROJECT_IDS']&.split(',')
relation = Project
relation = relation.where(id: project_ids) if project_ids.present?
relation.find_each(batch_size: 100) do |project|
next unless project.repo_exists?
result = project.repository.checksum
rescue => e
result = "Ignored error: #{e.message}".squish.truncate(255)
ensure
puts "#{project.id},#{result}"
end
end
end end
end end
...@@ -19,4 +19,38 @@ RSpec.describe 'gitlab:git rake tasks' do ...@@ -19,4 +19,38 @@ RSpec.describe 'gitlab:git rake tasks' do
expect { run_rake_task('gitlab:git:fsck') }.to output(/Performed integrity check for/).to_stdout expect { run_rake_task('gitlab:git:fsck') }.to output(/Performed integrity check for/).to_stdout
end end
end end
describe 'checksum_projects' do
it 'outputs the checksum for a repo' do
expected = /#{project.id},#{project.repository.checksum}/
expect { run_rake_task('gitlab:git:checksum_projects') }.to output(expected).to_stdout
end
it 'outputs blank checksum for no repo' do
no_repo = create(:project)
expected = /#{no_repo.id},$/
expect { run_rake_task('gitlab:git:checksum_projects') }.to output(expected).to_stdout
end
it 'outputs zeroes for empty repo' do
empty_repo = create(:project, :empty_repo)
expected = /#{empty_repo.id},0000000000000000000000000000000000000000/
expect { run_rake_task('gitlab:git:checksum_projects') }.to output(expected).to_stdout
end
it 'outputs errors' do
allow_next_found_instance_of(Project) do |project|
allow(project).to receive(:repo_exists?).and_raise('foo')
end
expected = /#{project.id},Ignored error: foo/
expect { run_rake_task('gitlab:git:checksum_projects') }.to output(expected).to_stdout
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment