Commit 21fd3c4d authored by Douwe Maan's avatar Douwe Maan

Merge branch 'tc-repo-verify-mails-ee' into 'master'

Repository verification on Geo secondary

Closes #5564 and #4746

See merge request gitlab-org/gitlab-ee!5550
parents 88e5f4e9 a224f6f4
......@@ -12,7 +12,7 @@
Enable Repository Checks
.help-block
GitLab will periodically run
%a{ href: 'https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html', target: 'blank' } 'git fsck'
%a{ href: 'https://git-scm.com/docs/git-fsck', target: 'blank' } 'git fsck'
in all project and wiki repositories to look for silent disk corruption issues.
.form-group
.col-sm-offset-2.col-sm-10
......
......@@ -3,6 +3,12 @@ class AdminEmailWorker
include CronjobQueue
def perform
send_repository_check_mail if Gitlab::CurrentSettings.repository_checks_enabled
end
private
def send_repository_check_mail
repository_check_failed_count = Project.where(last_repository_check_failed: true).count
return if repository_check_failed_count.zero?
......
module RepositoryCheck
class BatchWorker
prepend ::EE::RepositoryCheck::BatchWorker
include ApplicationWorker
include CronjobQueue
RUN_TIME = 3600
BATCH_SIZE = 10_000
def perform
return unless Gitlab::CurrentSettings.repository_checks_enabled
start = Time.now
# This loop will break after a little more than one hour ('a little
......@@ -15,7 +20,6 @@ module RepositoryCheck
# check, only one (or two) will be checked at a time.
project_ids.each do |project_id|
break if Time.now - start >= RUN_TIME
break unless current_settings.repository_checks_enabled
next unless try_obtain_lease(project_id)
......@@ -31,12 +35,20 @@ module RepositoryCheck
# getting ID's from Postgres is not terribly slow, and because no user
# has to sit and wait for this query to finish.
def project_ids
limit = 10_000
never_checked_projects = Project.where('last_repository_check_at IS NULL AND created_at < ?', 24.hours.ago)
.limit(limit).pluck(:id)
old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago)
.reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
never_checked_projects + old_check_projects
never_checked_project_ids(BATCH_SIZE) + old_checked_project_ids(BATCH_SIZE)
end
def never_checked_project_ids(batch_size)
Project.where(last_repository_check_at: nil)
.where('created_at < ?', 24.hours.ago)
.limit(batch_size).pluck(:id)
end
def old_checked_project_ids(batch_size)
Project.where.not(last_repository_check_at: nil)
.where('last_repository_check_at < ?', 1.month.ago)
.reorder(last_repository_check_at: :asc)
.limit(batch_size).pluck(:id)
end
def try_obtain_lease(id)
......@@ -47,16 +59,5 @@ module RepositoryCheck
timeout: 24.hours
).try_obtain
end
def current_settings
# No caching of the settings! If we cache them and an admin disables
# this feature, an active RepositoryCheckWorker would keep going for up
# to 1 hour after the feature was disabled.
if Rails.env.test?
Gitlab::CurrentSettings.fake_application_settings
else
ApplicationSetting.current
end
end
end
end
......@@ -3,29 +3,38 @@ module RepositoryCheck
include ApplicationWorker
include RepositoryCheckQueue
prepend ::EE::RepositoryCheck::SingleRepositoryWorker
def perform(project_id)
project = Project.find(project_id)
healthy = project_healthy?(project)
update_repository_check_status(project, healthy)
end
private
def update_repository_check_status(project, healthy)
project.update_columns(
last_repository_check_failed: !check(project),
last_repository_check_failed: !healthy,
last_repository_check_at: Time.now
)
end
private
def project_healthy?(project)
repo_healthy?(project) && wiki_repo_healthy?(project)
end
def check(project)
if has_pushes?(project) && !git_fsck(project.repository)
false
elsif project.wiki_enabled?
# Historically some projects never had their wiki repos initialized;
# this happens on project creation now. Let's initialize an empty repo
# if it is not already there.
project.create_wiki
def repo_healthy?(project)
return true unless has_changes?(project)
git_fsck(project.wiki.repository)
else
true
end
git_fsck(project.repository)
end
def wiki_repo_healthy?(project)
return true unless has_wiki_changes?(project)
git_fsck(project.wiki.repository)
end
def git_fsck(repository)
......@@ -39,8 +48,19 @@ module RepositoryCheck
false
end
def has_pushes?(project)
def has_changes?(project)
Project.with_push.exists?(project.id)
end
def has_wiki_changes?(project)
return false unless project.wiki_enabled?
# Historically some projects never had their wiki repos initialized;
# this happens on project creation now. Let's initialize an empty repo
# if it is not already there.
return false unless project.create_wiki
has_changes?(project)
end
end
end
......@@ -13,12 +13,12 @@ checks failed you can see their output on the admin log page under
## Periodic checks
When enabled, GitLab periodically runs a repository check on all project
repositories and wiki repositories in order to detect data corruption problems.
When enabled, GitLab periodically runs a repository check on all project
repositories and wiki repositories in order to detect data corruption.
A project will be checked no more than once per month. If any projects
fail their repository checks all GitLab administrators will receive an email
notification of the situation. This notification is sent out once a week on
Sunday, by default.
notification of the situation. This notification is sent out once a week,
by default, midnight at the start of Sunday.
## Disabling periodic checks
......@@ -28,16 +28,18 @@ panel.
## What to do if a check failed
If the repository check fails for some repository you should look up the error
in repocheck.log (in the admin panel or on disk; see
`/var/log/gitlab/gitlab-rails` for Omnibus installations or
`/home/git/gitlab/log` for installations from source). Once you have
resolved the issue use the admin panel to trigger a new repository check on
the project. This will clear the 'check failed' state.
in `repocheck.log`:
- in the [admin panel](logs.md#repocheck.log)
- or on disk, see:
- `/var/log/gitlab/gitlab-rails` for Omnibus installations
- `/home/git/gitlab/log` for installations from source
If for some reason the periodic repository check caused a lot of false
alarms you can choose to clear ALL repository check states from the
'Settings' page of the admin panel.
alarms you can choose to clear *all* repository check states by
clicking "Clear all repository checks" on the **Settings** page of the
admin panel (`/admin/application_settings`).
---
[ce-3232]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/3232 "Auto git fsck"
[git-fsck]: https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html "git fsck documentation"
[git-fsck]: https://git-scm.com/docs/git-fsck "git fsck documentation"
module EE
module RepositoryCheck
module BatchWorker
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
private
override :never_checked_project_ids
def never_checked_project_ids(batch_size)
return super unless ::Gitlab::Geo.secondary?
Geo::ProjectRegistry.synced_repos.synced_wikis
.where(last_repository_check_at: nil)
.where('last_repository_synced_at < ?', 24.hours.ago)
.where('last_wiki_synced_at < ?', 24.hours.ago)
.limit(batch_size).pluck(:project_id)
end
override :old_checked_project_ids
def old_checked_project_ids(batch_size)
return super unless ::Gitlab::Geo.secondary?
Geo::ProjectRegistry.synced_repos.synced_wikis
.where('last_repository_check_at < ?', 1.month.ago)
.reorder(last_repository_check_at: :asc)
.limit(batch_size).pluck(:project_id)
end
end
end
end
module EE
module RepositoryCheck
module SingleRepositoryWorker
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
private
override :update_repository_check_status
def update_repository_check_status(project, healthy)
return super unless ::Gitlab::Geo.secondary?
project_registry = ::Geo::ProjectRegistry.find_or_initialize_by(project: project)
project_registry.assign_attributes(
last_repository_check_failed: !healthy,
last_repository_check_at: Time.zone.now
)
project_registry.save!
end
end
end
end
---
title: Run repository verification on Geo secondary
merge_request: 5550
author:
type: added
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddRepositoryCheckToGeoProjectRegistry < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :project_registry, :last_repository_check_failed, :boolean
add_column :project_registry, :last_repository_check_at, :datetime_with_timezone
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180419192603) do
ActiveRecord::Schema.define(version: 20180427114641) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -74,6 +74,8 @@ ActiveRecord::Schema.define(version: 20180419192603) do
t.binary "wiki_verification_checksum_sha"
t.boolean "repository_checksum_mismatch", default: false, null: false
t.boolean "wiki_checksum_mismatch", default: false, null: false
t.boolean "last_repository_check_failed"
t.datetime "last_repository_check_at"
end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
......
require 'spec_helper'
describe EE::RepositoryCheck::BatchWorker do
include ::EE::GeoHelpers
subject(:worker) { RepositoryCheck::BatchWorker.new }
context 'Geo primary' do
set(:primary) { create(:geo_node, :primary) }
before do
stub_current_geo_node(primary)
end
it 'loads project ids from main database' do
projects = create_list(:project, 3, created_at: 1.week.ago)
expect(worker.perform).to eq(projects.map(&:id))
end
end
context 'Geo secondary' do
set(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
it 'loads project ids from tracking database' do
project_registries = create_list(:geo_project_registry, 3, :synced)
expect(worker.perform).to eq(project_registries.map(&:project_id))
end
it 'loads project ids that were checked more than a month ago from tracking database' do
project_registries = create_list(:geo_project_registry, 3, :synced,
last_repository_check_failed: false,
last_repository_check_at: 42.days.ago)
expect(worker.perform).to eq(project_registries.map(&:project_id))
end
end
end
require 'spec_helper'
describe EE::RepositoryCheck::SingleRepositoryWorker do
include ::EE::GeoHelpers
set(:project) { create(:project) }
subject(:worker) { RepositoryCheck::SingleRepositoryWorker.new }
context 'Geo primary' do
set(:primary) { create(:geo_node, :primary) }
before do
stub_current_geo_node(primary)
end
it 'saves results to main database' do
expect do
worker.perform(project.id)
end.to change { project.reload.last_repository_check_at }
expect(project.last_repository_check_failed).to be_falsy
end
end
context 'Geo secondary' do
set(:project_registry) { create(:geo_project_registry, project: project) }
set(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
it 'saves results to Geo registry' do
expect do
worker.perform(project.id)
end.to change { project_registry.reload.last_repository_check_at }
expect(project_registry.last_repository_check_failed).to be_falsy
end
it 'creates Geo registry when not yet exists' do
project_registry.destroy!
worker.perform(project.id)
expect(Geo::ProjectRegistry.find_by!(project: project.id).last_repository_check_failed).to be_falsy
end
end
end
require 'spec_helper'
describe AdminEmailWorker do
subject(:worker) { described_class.new }
describe '.perform' do
it 'does not attempt to send repository check mail when they are disabled' do
stub_application_setting(repository_checks_enabled: false)
expect(worker).not_to receive(:send_repository_check_mail)
worker.perform
end
context 'repository_checks enabled' do
before do
stub_application_setting(repository_checks_enabled: true)
end
it 'checks if repository check mail should be sent' do
expect(worker).to receive(:send_repository_check_mail)
worker.perform
end
it 'does not send mail when there are no failed repos' do
expect(RepositoryCheckMailer).not_to receive(:notify)
worker.perform
end
it 'send mail when there is a failed repo' do
create(:project, last_repository_check_failed: true, last_repository_check_at: Date.yesterday)
expect(RepositoryCheckMailer).to receive(:notify).and_return(spy)
worker.perform
end
end
end
end
......@@ -31,8 +31,8 @@ describe RepositoryCheck::BatchWorker do
it 'does nothing when repository checks are disabled' do
create(:project, created_at: 1.week.ago)
current_settings = double('settings', repository_checks_enabled: false)
expect(subject).to receive(:current_settings) { current_settings }
stub_application_setting(repository_checks_enabled: false)
expect(subject.perform).to eq(nil)
end
......
......@@ -2,44 +2,60 @@ require 'spec_helper'
require 'fileutils'
describe RepositoryCheck::SingleRepositoryWorker do
subject { described_class.new }
subject(:worker) { described_class.new }
it 'passes when the project has no push events' do
project = create(:project_empty_repo, :wiki_disabled)
it 'skips when the project has no push events' do
project = create(:project, :repository, :wiki_disabled)
project.events.destroy_all
break_repo(project)
break_project(project)
subject.perform(project.id)
expect(worker).not_to receive(:git_fsck)
worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(false)
end
it 'fails when the project has push events and a broken repository' do
project = create(:project_empty_repo)
project = create(:project, :repository)
create_push_event(project)
break_repo(project)
break_project(project)
subject.perform(project.id)
worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(true)
end
it 'succeeds when the project repo is valid' do
project = create(:project, :repository, :wiki_disabled)
create_push_event(project)
expect(worker).to receive(:git_fsck).and_call_original
expect do
worker.perform(project.id)
end.to change { project.reload.last_repository_check_at }
expect(project.reload.last_repository_check_failed).to eq(false)
end
it 'fails if the wiki repository is broken' do
project = create(:project_empty_repo, :wiki_enabled)
project = create(:project, :repository, :wiki_enabled)
project.create_wiki
create_push_event(project)
# Test sanity: everything should be fine before the wiki repo is broken
subject.perform(project.id)
worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(false)
break_wiki(project)
subject.perform(project.id)
worker.perform(project.id)
expect(project.reload.last_repository_check_failed).to eq(true)
end
it 'skips wikis when disabled' do
project = create(:project_empty_repo, :wiki_disabled)
project = create(:project, :wiki_disabled)
# Make sure the test would fail if the wiki repo was checked
break_wiki(project)
......@@ -49,8 +65,8 @@ describe RepositoryCheck::SingleRepositoryWorker do
end
it 'creates missing wikis' do
project = create(:project_empty_repo, :wiki_enabled)
FileUtils.rm_rf(wiki_path(project))
project = create(:project, :wiki_enabled)
Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path)
subject.perform(project.id)
......@@ -58,34 +74,39 @@ describe RepositoryCheck::SingleRepositoryWorker do
end
it 'does not create a wiki if the main repo does not exist at all' do
project = create(:project_empty_repo)
create_push_event(project)
FileUtils.rm_rf(project.repository.path_to_repo)
FileUtils.rm_rf(wiki_path(project))
project = create(:project, :repository)
Gitlab::Shell.new.rm_directory(project.repository_storage, project.path)
Gitlab::Shell.new.rm_directory(project.repository_storage, project.wiki.path)
subject.perform(project.id)
expect(File.exist?(wiki_path(project))).to eq(false)
expect(Gitlab::Shell.new.exists?(project.repository_storage, project.wiki.path)).to eq(false)
end
def break_wiki(project)
objects_dir = wiki_path(project) + '/objects'
def create_push_event(project)
project.events.create(action: Event::PUSHED, author_id: create(:user).id)
end
# Replace the /objects directory with a file so that the repo is
# invalid, _and_ 'git init' cannot fix it.
FileUtils.rm_rf(objects_dir)
FileUtils.touch(objects_dir) if File.directory?(wiki_path(project))
def break_wiki(project)
break_repo(wiki_path(project))
end
def wiki_path(project)
project.wiki.repository.path_to_repo
end
def create_push_event(project)
project.events.create(action: Event::PUSHED, author_id: create(:user).id)
def break_project(project)
break_repo(project.repository.path_to_repo)
end
def break_repo(project)
FileUtils.rm_rf(File.join(project.repository.path_to_repo, 'objects'))
def break_repo(repo)
# Create or replace blob ffffffffffffffffffffffffffffffffffffffff with an empty file
# This will make the repo invalid, _and_ 'git init' cannot fix it.
path = File.join(repo, 'objects', 'ff')
file = File.join(path, 'ffffffffffffffffffffffffffffffffffffff')
FileUtils.mkdir_p(path)
FileUtils.rm_f(file)
FileUtils.touch(file)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment