Commit 95a867e0 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'auto-fsck' into 'master'

Auto git fsck

Closes https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/3232

See merge request !3232
parents 58ca8490 cce21e74
......@@ -34,6 +34,7 @@ v 8.7.0 (unreleased)
- Fix a bug whith trailing slash in bamboo_url
- Add links to CI setup documentation from project settings and builds pages
- Handle nil descriptions in Slack issue messages (Stan Hu)
- Add automated repository integrity checks
- API: Expose open_issues_count, closed_issues_count, open_merge_requests_count for labels (Robert Schilling)
- API: Ability to star and unstar a project (Robert Schilling)
- Add default scope to projects to exclude projects pending deletion
......
......@@ -19,6 +19,15 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController
redirect_to admin_runners_path
end
def clear_repository_check_states
RepositoryCheck::ClearWorker.perform_async
redirect_to(
admin_application_settings_path,
notice: 'Started asynchronous removal of all repository check states.'
)
end
private
def set_application_setting
......@@ -82,6 +91,7 @@ class Admin::ApplicationSettingsController < Admin::ApplicationController
:akismet_enabled,
:akismet_api_key,
:email_author_in_body,
:repository_checks_enabled,
restricted_visibility_levels: [],
import_sources: []
)
......
class Admin::ProjectsController < Admin::ApplicationController
before_action :project, only: [:show, :transfer]
before_action :project, only: [:show, :transfer, :repository_check]
before_action :group, only: [:show, :transfer]
def index
......@@ -8,6 +8,7 @@ class Admin::ProjectsController < Admin::ApplicationController
@projects = @projects.where("projects.visibility_level IN (?)", params[:visibility_levels]) if params[:visibility_levels].present?
@projects = @projects.with_push if params[:with_push].present?
@projects = @projects.abandoned if params[:abandoned].present?
@projects = @projects.where(last_repository_check_failed: true) if params[:last_repository_check_failed].present?
@projects = @projects.non_archived unless params[:with_archived].present?
@projects = @projects.search(params[:name]) if params[:name].present?
@projects = @projects.sort(@sort = params[:sort])
......@@ -30,6 +31,15 @@ class Admin::ProjectsController < Admin::ApplicationController
redirect_to admin_namespace_project_path(@project.namespace, @project)
end
def repository_check
RepositoryCheck::SingleRepositoryWorker.perform_async(@project.id)
redirect_to(
admin_namespace_project_path(@project.namespace, @project),
notice: 'Repository check was triggered.'
)
end
protected
def project
......
class RepositoryCheckMailer < BaseMailer
def notify(failed_count)
if failed_count == 1
@message = "One project failed its last repository check"
else
@message = "#{failed_count} projects failed their last repository check"
end
mail(
to: User.admins.pluck(:email),
subject: @message
)
end
end
......@@ -153,7 +153,8 @@ class ApplicationSetting < ActiveRecord::Base
require_two_factor_authentication: false,
two_factor_grace_period: 48,
recaptcha_enabled: false,
akismet_enabled: false
akismet_enabled: false,
repository_checks_enabled: true,
)
end
......
......@@ -271,5 +271,24 @@
.col-sm-10
= f.text_field :sentry_dsn, class: 'form-control'
%fieldset
%legend Repository Checks
.form-group
.col-sm-offset-2.col-sm-10
.checkbox
= f.label :repository_checks_enabled do
= f.check_box :repository_checks_enabled
Enable Repository Checks
.help-block
GitLab will periodically run
%a{ href: 'https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html', target: 'blank' } 'git fsck'
in all project and wiki repositories to look for silent disk corruption issues.
.form-group
.col-sm-offset-2.col-sm-10
= link_to 'Clear all repository checks', clear_repository_check_states_admin_application_settings_path, data: { confirm: 'This will clear repository check states for ALL projects in the database. This cannot be undone. Are you sure?' }, method: :put, class: "btn btn-sm btn-remove"
.help-block
If you got a lot of false alarms from repository checks you can choose to clear all repository check information from the database.
.form-actions
= f.submit 'Save', class: 'btn btn-save'
- page_title "Logs"
- loggers = [Gitlab::GitLogger, Gitlab::AppLogger,
Gitlab::ProductionLogger, Gitlab::SidekiqLogger]
Gitlab::ProductionLogger, Gitlab::SidekiqLogger,
Gitlab::RepositoryCheckLogger]
%ul.nav-links.log-tabs
- loggers.each do |klass|
%li{ class: (klass == Gitlab::GitLogger ? 'active' : '') }
......
......@@ -3,7 +3,7 @@
.row.prepend-top-default
%aside.col-md-3
.admin-filter
.panel.admin-filter
= form_tag admin_namespaces_projects_path, method: :get, class: '' do
.form-group
= label_tag :name, 'Name:'
......@@ -38,7 +38,13 @@
%span.descr
= visibility_level_icon(level)
= label
%hr
%fieldset
%strong Problems
.checkbox
= label_tag :last_repository_check_failed do
= check_box_tag :last_repository_check_failed, 1, params[:last_repository_check_failed]
%span Last repository check failed
= hidden_field_tag :sort, params[:sort]
= button_tag "Search", class: "btn submit btn-primary"
= link_to "Reset", admin_namespaces_projects_path, class: "btn btn-cancel"
......
......@@ -5,6 +5,16 @@
%i.fa.fa-pencil-square-o
Edit
%hr
- if @project.last_repository_check_failed?
.row
.col-md-12
.panel
.panel-heading.alert.alert-danger
Last repository check
= "(#{time_ago_in_words(@project.last_repository_check_at)} ago)"
failed. See
= link_to 'repocheck.log', admin_logs_path
for error messages.
.row
.col-md-6
.panel.panel-default
......@@ -95,6 +105,32 @@
.col-sm-offset-2.col-sm-10
= f.submit 'Transfer', class: 'btn btn-primary'
.panel.panel-default.repository-check
.panel-heading
Repository check
.panel-body
= form_for @project, url: repository_check_admin_namespace_project_path(@project.namespace, @project), method: :post do |f|
.form-group
- if @project.last_repository_check_at.nil?
This repository has never been checked.
- else
This repository was last checked
= @project.last_repository_check_at.to_s(:medium) + '.'
The check
- if @project.last_repository_check_failed?
= succeed '.' do
%strong.cred failed
See
= link_to 'repocheck.log', admin_logs_path
for error messages.
- else
passed.
= link_to icon('question-circle'), help_page_path('administration', 'repository_checks')
.form-group
= f.submit 'Trigger repository check', class: 'btn btn-primary'
.col-md-6
- if @group
.panel.panel-default
......
%p
#{@message}.
%p
= link_to "See the affected projects in the GitLab admin panel", admin_namespaces_projects_url(last_repository_check_failed: 1)
#{@message}.
\
View details: #{admin_namespaces_projects_url(last_repository_check_failed: 1)}
class AdminEmailWorker
include Sidekiq::Worker
sidekiq_options retry: false # this job auto-repeats via sidekiq-cron
def perform
repository_check_failed_count = Project.where(last_repository_check_failed: true).count
return if repository_check_failed_count.zero?
RepositoryCheckMailer.notify(repository_check_failed_count).deliver_now
end
end
module RepositoryCheck
class BatchWorker
include Sidekiq::Worker
RUN_TIME = 3600
sidekiq_options retry: false
def perform
start = Time.now
# This loop will break after a little more than one hour ('a little
# more' because `git fsck` may take a few minutes), or if it runs out of
# projects to check. By default sidekiq-cron will start a new
# RepositoryCheckWorker each hour so that as long as there are repositories to
# check, only one (or two) will be checked at a time.
project_ids.each do |project_id|
break if Time.now - start >= RUN_TIME
break unless current_settings.repository_checks_enabled
next unless try_obtain_lease(project_id)
SingleRepositoryWorker.new.perform(project_id)
end
end
private
# Project.find_each does not support WHERE clauses and
# Project.find_in_batches does not support ordering. So we just build an
# array of ID's. This is OK because we do it only once an hour, because
# getting ID's from Postgres is not terribly slow, and because no user
# has to sit and wait for this query to finish.
def project_ids
limit = 10_000
never_checked_projects = Project.where('last_repository_check_at IS NULL').limit(limit).
pluck(:id)
old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago).
reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
never_checked_projects + old_check_projects
end
def try_obtain_lease(id)
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
# super slow we definitely do not want to run it twice in parallel.
Gitlab::ExclusiveLease.new(
"project_repository_check:#{id}",
timeout: 24.hours
).try_obtain
end
def current_settings
# No caching of the settings! If we cache them and an admin disables
# this feature, an active RepositoryCheckWorker would keep going for up
# to 1 hour after the feature was disabled.
if Rails.env.test?
Gitlab::CurrentSettings.fake_application_settings
else
ApplicationSetting.current
end
end
end
end
module RepositoryCheck
class ClearWorker
include Sidekiq::Worker
sidekiq_options retry: false
def perform
# Do small batched updates because these updates will be slow and locking
Project.select(:id).find_in_batches(batch_size: 100) do |batch|
Project.where(id: batch.map(&:id)).update_all(
last_repository_check_failed: nil,
last_repository_check_at: nil,
)
end
end
end
end
module RepositoryCheck
class SingleRepositoryWorker
include Sidekiq::Worker
sidekiq_options retry: false
def perform(project_id)
project = Project.find(project_id)
project.update_columns(
last_repository_check_failed: !check(project),
last_repository_check_at: Time.now,
)
end
private
def check(project)
# Use 'map do', not 'all? do', to prevent short-circuiting
[project.repository, project.wiki.repository].map do |repository|
git_fsck(repository.path_to_repo)
end.all?
end
def git_fsck(path)
cmd = %W(nice git --git-dir=#{path} fsck)
output, status = Gitlab::Popen.popen(cmd)
if status.zero?
true
else
Gitlab::RepositoryCheckLogger.error("command failed: #{cmd.join(' ')}\n#{output}")
false
end
end
end
end
......@@ -164,6 +164,13 @@ production: &base
# Flag stuck CI builds as failed
stuck_ci_builds_worker:
cron: "0 0 * * *"
# Periodically run 'git fsck' on all repositories. If started more than
# once per hour you will have concurrent 'git fsck' jobs.
repository_check_worker:
cron: "20 * * * *"
# Send admin emails once a day
admin_email_worker:
cron: "0 0 * * *"
# Remove outdated repository archives
repository_archive_cache_worker:
......
......@@ -241,11 +241,16 @@ Settings['cron_jobs'] ||= Settingslogic.new({})
Settings.cron_jobs['stuck_ci_builds_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['stuck_ci_builds_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['stuck_ci_builds_worker']['job_class'] = 'StuckCiBuildsWorker'
Settings.cron_jobs['repository_check_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['repository_check_worker']['cron'] ||= '20 * * * *'
Settings.cron_jobs['repository_check_worker']['job_class'] = 'RepositoryCheck::BatchWorker'
Settings.cron_jobs['admin_email_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['admin_email_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['admin_email_worker']['job_class'] = 'AdminEmailWorker'
Settings.cron_jobs['repository_archive_cache_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['repository_archive_cache_worker']['cron'] ||= '0 * * * *'
Settings.cron_jobs['repository_archive_cache_worker']['job_class'] = 'RepositoryArchiveCacheWorker'
#
# GitLab Shell
#
......
......@@ -264,6 +264,7 @@ Rails.application.routes.draw do
member do
put :transfer
post :repository_check
end
resources :runner_projects
......@@ -281,6 +282,7 @@ Rails.application.routes.draw do
resource :application_settings, only: [:show, :update] do
resources :services
put :reset_runners_token
put :clear_repository_check_states
end
resources :labels
......
class ProjectAddRepositoryCheck < ActiveRecord::Migration
def change
add_column :projects, :last_repository_check_failed, :boolean
add_index :projects, :last_repository_check_failed
add_column :projects, :last_repository_check_at, :datetime
end
end
class AddRepositoryChecksEnabledSetting < ActiveRecord::Migration
def change
add_column :application_settings, :repository_checks_enabled, :boolean, default: true
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20160331223143) do
ActiveRecord::Schema.define(version: 20160412140240) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -77,6 +77,7 @@ ActiveRecord::Schema.define(version: 20160331223143) do
t.string "akismet_api_key"
t.boolean "email_author_in_body", default: false
t.integer "default_group_visibility"
t.boolean "repository_checks_enabled", default: true
end
create_table "audit_events", force: :cascade do |t|
......@@ -743,6 +744,8 @@ ActiveRecord::Schema.define(version: 20160331223143) do
t.boolean "public_builds", default: true, null: false
t.string "main_language"
t.integer "pushes_since_gc", default: 0
t.boolean "last_repository_check_failed"
t.datetime "last_repository_check_at"
end
add_index "projects", ["builds_enabled", "shared_runners_enabled"], name: "index_projects_on_builds_enabled_and_shared_runners_enabled", using: :btree
......@@ -752,6 +755,7 @@ ActiveRecord::Schema.define(version: 20160331223143) do
add_index "projects", ["creator_id"], name: "index_projects_on_creator_id", using: :btree
add_index "projects", ["description"], name: "index_projects_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"}
add_index "projects", ["last_activity_at"], name: "index_projects_on_last_activity_at", using: :btree
add_index "projects", ["last_repository_check_failed"], name: "index_projects_on_last_repository_check_failed", using: :btree
add_index "projects", ["name"], name: "index_projects_on_name_trigram", using: :gin, opclasses: {"name"=>"gin_trgm_ops"}
add_index "projects", ["namespace_id"], name: "index_projects_on_namespace_id", using: :btree
add_index "projects", ["path"], name: "index_projects_on_path", using: :btree
......
......@@ -31,6 +31,7 @@
- [Environment Variables](administration/environment_variables.md) to configure GitLab.
- [Operations](operations/README.md) Keeping GitLab up and running
- [Raketasks](raketasks/README.md) Backups, maintenance, automatic webhook setup and the importing of projects.
- [Repository checks](administration/repository_checks.md) Periodic Git repository checks
- [Security](security/README.md) Learn what you can do to further secure your GitLab instance.
- [System hooks](system_hooks/system_hooks.md) Notifications when users, projects and keys are changed.
- [Update](update/README.md) Update guides to upgrade your installation.
......
# Repository checks
>**Note:**
This feature was [introduced][ce-3232] in GitLab 8.7.
Git has a built-in mechanism, [git fsck][git-fsck], to verify the
integrity of all data commited to a repository. GitLab administrators
can trigger such a check for a project via the project page under the
admin panel. The checks run asynchronously so it may take a few minutes
before the check result is visible on the project admin page. If the
checks failed you can see their output on the admin log page under
'repocheck.log'.
## Periodic checks
GitLab periodically runs a repository check on all project repositories and
wiki repositories in order to detect data corruption problems. A
project will be checked no more than once per week. If any projects
fail their repository checks all GitLab administrators will receive an email
notification of the situation. This notification is sent out no more
than once a day.
## Disabling periodic checks
You can disable the periodic checks on the 'Settings' page of the admin
panel.
## What to do if a check failed
If the repository check fails for some repository you should look up the error
in repocheck.log (in the admin panel or on disk; see
`/var/log/gitlab/gitlab-rails` for Omnibus installations or
`/home/git/gitlab/log` for installations from source). Once you have
resolved the issue use the admin panel to trigger a new repository check on
the project. This will clear the 'check failed' state.
If for some reason the periodic repository check caused a lot of false
alarms you can choose to clear ALL repository check states from the
'Settings' page of the admin panel.
---
[ce-3232]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/3232 "Auto git fsck"
[git-fsck]: https://www.kernel.org/pub/software/scm/git/docs/git-fsck.html "git fsck documentation"
\ No newline at end of file
......@@ -34,7 +34,8 @@ module Gitlab
max_artifacts_size: Settings.artifacts['max_size'],
require_two_factor_authentication: false,
two_factor_grace_period: 48,
akismet_enabled: false
akismet_enabled: false,
repository_checks_enabled: true,
)
end
......
module Gitlab
class RepositoryCheckLogger < Gitlab::Logger
def self.file_name_noext
'repocheck'
end
end
end
require 'rails_helper'
feature 'Admin uses repository checks', feature: true do
before { login_as :admin }
scenario 'to trigger a single check' do
project = create(:empty_project)
visit_admin_project_page(project)
page.within('.repository-check') do
click_button 'Trigger repository check'
end
expect(page).to have_content('Repository check was triggered')
end
scenario 'to see a single failed repository check' do
project = create(:empty_project)
project.update_columns(
last_repository_check_failed: true,
last_repository_check_at: Time.now,
)
visit_admin_project_page(project)
page.within('.alert') do
expect(page.text).to match(/Last repository check \(.* ago\) failed/)
end
end
scenario 'to clear all repository checks', js: true do
visit admin_application_settings_path
expect(RepositoryCheck::ClearWorker).to receive(:perform_async)
click_link 'Clear all repository checks'
expect(page).to have_content('Started asynchronous removal of all repository check states.')
end
def visit_admin_project_page(project)
visit admin_namespace_project_path(project.namespace, project)
end
end
require 'rails_helper'
describe RepositoryCheckMailer do
include EmailSpec::Matchers
describe '.notify' do
it 'emails all admins' do
admins = create_list(:admin, 3)
mail = described_class.notify(1)
expect(mail).to deliver_to admins.map(&:email)
end
it 'mentions the number of failed checks' do
mail = described_class.notify(3)
expect(mail).to have_subject '3 projects failed their last repository check'
end
end
end
require 'spec_helper'
describe RepositoryCheck::BatchWorker do
subject { described_class.new }
it 'prefers projects that have never been checked' do
projects = create_list(:project, 3)
projects[0].update_column(:last_repository_check_at, 4.months.ago)
projects[2].update_column(:last_repository_check_at, 3.months.ago)
expect(subject.perform).to eq(projects.values_at(1, 0, 2).map(&:id))
end
it 'sorts projects by last_repository_check_at' do
projects = create_list(:project, 3)
projects[0].update_column(:last_repository_check_at, 2.months.ago)
projects[1].update_column(:last_repository_check_at, 4.months.ago)
projects[2].update_column(:last_repository_check_at, 3.months.ago)
expect(subject.perform).to eq(projects.values_at(1, 2, 0).map(&:id))
end
it 'excludes projects that were checked recently' do
projects = create_list(:project, 3)
projects[0].update_column(:last_repository_check_at, 2.days.ago)
projects[1].update_column(:last_repository_check_at, 2.months.ago)
projects[2].update_column(:last_repository_check_at, 3.days.ago)
expect(subject.perform).to eq([projects[1].id])
end
it 'does nothing when repository checks are disabled' do
create(:empty_project)
current_settings = double('settings', repository_checks_enabled: false)
expect(subject).to receive(:current_settings) { current_settings }
expect(subject.perform).to eq(nil)
end
end
require 'spec_helper'
describe RepositoryCheck::ClearWorker do
it 'clears repository check columns' do
project = create(:empty_project)
project.update_columns(
last_repository_check_failed: true,
last_repository_check_at: Time.now,
)
described_class.new.perform
project.reload
expect(project.last_repository_check_failed).to be_nil
expect(project.last_repository_check_at).to be_nil
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment