Commit 424cbf46 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'repository-archive-worker' into 'master'

Archive repositories in background worker.

Depends on https://gitlab.com/gitlab-org/gitlab_git/merge_requests/17 being merged, a new `gitlab_git` being released and this MR's `Gemfile.lock` being updated..

See private issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2173.

To do after this is merged: Update https://gitlab.com/gitlab-org/omnibus-gitlab/blob/master/files/gitlab-cookbooks/gitlab/templates/default/sv-sidekiq-run.erb in omnibus.

See merge request !436
parents f8f97503 85b29f99
...@@ -54,6 +54,7 @@ v 7.10.0 (unreleased) ...@@ -54,6 +54,7 @@ v 7.10.0 (unreleased)
- Don't leak private group existence by redirecting from namespace controller to group controller. - Don't leak private group existence by redirecting from namespace controller to group controller.
- Ability to skip some items from backup (database, respositories or uploads) - Ability to skip some items from backup (database, respositories or uploads)
- Fix "Hello @username." references not working by no longer allowing usernames to end in period. - Fix "Hello @username." references not working by no longer allowing usernames to end in period.
- Archive repositories in background worker.
v 7.9.2 v 7.9.2
......
...@@ -122,6 +122,7 @@ gem 'slim' ...@@ -122,6 +122,7 @@ gem 'slim'
gem 'sinatra', require: nil gem 'sinatra', require: nil
gem 'sidekiq', '~> 3.3' gem 'sidekiq', '~> 3.3'
gem 'sidetiq', '0.6.3' gem 'sidetiq', '0.6.3'
gem 'sidekiq-limit_fetch'
# HTTP requests # HTTP requests
gem "httparty" gem "httparty"
......
...@@ -547,6 +547,8 @@ GEM ...@@ -547,6 +547,8 @@ GEM
json json
redis (>= 3.0.6) redis (>= 3.0.6)
redis-namespace (>= 1.3.1) redis-namespace (>= 1.3.1)
sidekiq-limit_fetch (2.4.1)
sidekiq (>= 2.6.5, < 4.0)
sidetiq (0.6.3) sidetiq (0.6.3)
celluloid (>= 0.14.1) celluloid (>= 0.14.1)
ice_cube (= 0.11.1) ice_cube (= 0.11.1)
...@@ -769,6 +771,7 @@ DEPENDENCIES ...@@ -769,6 +771,7 @@ DEPENDENCIES
settingslogic settingslogic
shoulda-matchers (~> 2.7.0) shoulda-matchers (~> 2.7.0)
sidekiq (~> 3.3) sidekiq (~> 3.3)
sidekiq-limit_fetch
sidetiq (= 0.6.3) sidetiq (= 0.6.3)
simplecov simplecov
sinatra sinatra
......
web: bundle exec unicorn_rails -p ${PORT:="3000"} -E ${RAILS_ENV:="development"} -c ${UNICORN_CONFIG:="config/unicorn.rb"} web: bundle exec unicorn_rails -p ${PORT:="3000"} -E ${RAILS_ENV:="development"} -c ${UNICORN_CONFIG:="config/unicorn.rb"}
worker: bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default worker: bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default
...@@ -11,18 +11,18 @@ class Projects::RepositoriesController < Projects::ApplicationController ...@@ -11,18 +11,18 @@ class Projects::RepositoriesController < Projects::ApplicationController
end end
def archive def archive
unless can?(current_user, :download_code, @project) begin
render_404 and return file_path = ArchiveRepositoryService.new(@project, params[:ref], params[:format]).execute
rescue
return head :not_found
end end
file_path = ArchiveRepositoryService.new.execute(@project, params[:ref], params[:format])
if file_path if file_path
# Send file to user # Send file to user
response.headers["Content-Length"] = File.open(file_path).size.to_s response.headers["Content-Length"] = File.open(file_path).size.to_s
send_file file_path send_file file_path
else else
render_404 redirect_to request.fullpath
end end
end end
end end
...@@ -267,6 +267,9 @@ class Repository ...@@ -267,6 +267,9 @@ class Repository
# Remove archives older than 2 hours # Remove archives older than 2 hours
def clean_old_archives def clean_old_archives
repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path
return unless File.directory?(repository_downloads_path)
Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete)) Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete))
end end
......
class ArchiveRepositoryService class ArchiveRepositoryService
def execute(project, ref, format) attr_reader :project, :ref, :format
storage_path = Gitlab.config.gitlab.repository_downloads_path
unless File.directory?(storage_path) def initialize(project, ref, format)
FileUtils.mkdir_p(storage_path) format ||= 'tar.gz'
@project, @ref, @format = project, ref, format.downcase
end end
format ||= 'tar.gz' def execute(options = {})
repository = project.repository project.repository.clean_old_archives
repository.clean_old_archives
repository.archive_repo(ref, storage_path, format.downcase) raise "No archive file path" unless file_path
return file_path if archived?
unless archiving?
RepositoryArchiveWorker.perform_async(project.id, ref, format)
end
archived = wait_until_archived(options[:timeout] || 5.0)
file_path if archived
end
private
def storage_path
Gitlab.config.gitlab.repository_downloads_path
end
def file_path
@file_path ||= project.repository.archive_file_path(ref, storage_path, format)
end
def pid_file_path
@pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format)
end
def archived?
File.exist?(file_path)
end
def archiving?
File.exist?(pid_file_path)
end
def wait_until_archived(timeout = 5.0)
return archived? if timeout == 0.0
t1 = Time.now
begin
sleep 0.1
success = archived?
t2 = Time.now
end until success || t2 - t1 >= timeout
success
end end
end end
class RepositoryArchiveWorker
include Sidekiq::Worker
sidekiq_options queue: :archive_repo
attr_accessor :project, :ref, :format
def perform(project_id, ref, format)
@project = Project.find(project_id)
@ref, @format = ref, format.downcase
repository = project.repository
repository.clean_old_archives
return unless file_path
return if archived? || archiving?
repository.archive_repo(ref, storage_path, format)
end
private
def storage_path
Gitlab.config.gitlab.repository_downloads_path
end
def file_path
@file_path ||= project.repository.archive_file_path(ref, storage_path, format)
end
def pid_file_path
@pid_file_path ||= project.repository.archive_pid_file_path(ref, storage_path, format)
end
def archived?
File.exist?(file_path)
end
def archiving?
File.exist?(pid_file_path)
end
end
...@@ -37,7 +37,7 @@ start_no_deamonize() ...@@ -37,7 +37,7 @@ start_no_deamonize()
start_sidekiq() start_sidekiq()
{ {
bundle exec sidekiq -q post_receive -q mailer -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1 bundle exec sidekiq -q post_receive -q mailer -q archive_repo -q system_hook -q project_web_hook -q gitlab_shell -q common -q default -e $RAILS_ENV -P $sidekiq_pidfile $@ >> $sidekiq_logfile 2>&1
} }
load_ok() load_ok()
......
...@@ -25,3 +25,5 @@ Sidekiq.configure_client do |config| ...@@ -25,3 +25,5 @@ Sidekiq.configure_client do |config|
namespace: 'resque:gitlab' namespace: 'resque:gitlab'
} }
end end
Sidekiq::Queue["archive_repo"].limit = 2
...@@ -133,10 +133,11 @@ module API ...@@ -133,10 +133,11 @@ module API
authorize! :download_code, user_project authorize! :download_code, user_project
begin begin
file_path = ArchiveRepositoryService.new.execute( file_path = ArchiveRepositoryService.new(
user_project, user_project,
params[:sha], params[:sha],
params[:format]) params[:format]
).execute
rescue rescue
not_found!('File') not_found!('File')
end end
...@@ -149,7 +150,7 @@ module API ...@@ -149,7 +150,7 @@ module API
env['api.format'] = :binary env['api.format'] = :binary
present data present data
else else
not_found!('File') redirect request.fullpath
end end
end end
......
require "spec_helper"
describe Projects::RepositoriesController do
let(:project) { create(:project) }
let(:user) { create(:user) }
describe "GET archive" do
before do
sign_in(user)
project.team << [user, :developer]
allow(ArchiveRepositoryService).to receive(:new).and_return(service)
end
let(:service) { ArchiveRepositoryService.new(project, "master", "zip") }
it "executes ArchiveRepositoryService" do
expect(ArchiveRepositoryService).to receive(:new).with(project, "master", "zip")
expect(service).to receive(:execute)
get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
end
context "when the service raises an error" do
before do
allow(service).to receive(:execute).and_raise("Archive failed")
end
it "renders Not Found" do
get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
expect(response.status).to eq(404)
end
end
context "when the service doesn't return a path" do
before do
allow(service).to receive(:execute).and_return(nil)
end
it "reloads the page" do
get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
expect(response).to redirect_to(archive_namespace_project_repository_path(project.namespace, project, ref: "master", format: "zip"))
end
end
context "when the service returns a path" do
let(:path) { Rails.root.join("spec/fixtures/dk.png").to_s }
before do
allow(service).to receive(:execute).and_return(path)
end
it "sends the file" do
get :archive, namespace_id: project.namespace.path, project_id: project.path, ref: "master", format: "zip"
expect(response.body).to eq(File.binread(path))
end
end
end
end
require 'spec_helper'
describe ArchiveRepositoryService do
let(:project) { create(:project) }
subject { ArchiveRepositoryService.new(project, "master", "zip") }
describe "#execute" do
it "cleans old archives" do
expect(project.repository).to receive(:clean_old_archives)
subject.execute(timeout: 0.0)
end
context "when the repository doesn't have an archive file path" do
before do
allow(project.repository).to receive(:archive_file_path).and_return(nil)
end
it "raises an error" do
expect {
subject.execute(timeout: 0.0)
}.to raise_error
end
end
context "when the repository has an archive file path" do
let(:file_path) { "/archive.zip" }
let(:pid_file_path) { "/archive.zip.pid" }
before do
allow(project.repository).to receive(:archive_file_path).and_return(file_path)
allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path)
end
context "when the archive file already exists" do
before do
allow(File).to receive(:exist?).with(file_path).and_return(true)
end
it "returns the file path" do
expect(subject.execute(timeout: 0.0)).to eq(file_path)
end
end
context "when the archive file doesn't exist yet" do
before do
allow(File).to receive(:exist?).with(file_path).and_return(false)
allow(File).to receive(:exist?).with(pid_file_path).and_return(true)
end
context "when the archive pid file doesn't exist yet" do
before do
allow(File).to receive(:exist?).with(pid_file_path).and_return(false)
end
it "queues the RepositoryArchiveWorker" do
expect(RepositoryArchiveWorker).to receive(:perform_async)
subject.execute(timeout: 0.0)
end
end
context "when the archive pid file already exists" do
it "doesn't queue the RepositoryArchiveWorker" do
expect(RepositoryArchiveWorker).not_to receive(:perform_async)
subject.execute(timeout: 0.0)
end
end
context "when the archive file exists after a little while" do
before do
Thread.new do
sleep 0.1
allow(File).to receive(:exist?).with(file_path).and_return(true)
end
end
it "returns the file path" do
expect(subject.execute(timeout: 0.2)).to eq(file_path)
end
end
context "when the archive file doesn't exist after the timeout" do
it "returns nil" do
expect(subject.execute(timeout: 0.0)).to eq(nil)
end
end
end
end
end
end
require 'spec_helper'
describe RepositoryArchiveWorker do
let(:project) { create(:project) }
subject { RepositoryArchiveWorker.new }
before do
allow(Project).to receive(:find).and_return(project)
end
describe "#perform" do
it "cleans old archives" do
expect(project.repository).to receive(:clean_old_archives)
subject.perform(project.id, "master", "zip")
end
context "when the repository doesn't have an archive file path" do
before do
allow(project.repository).to receive(:archive_file_path).and_return(nil)
end
it "doesn't archive the repo" do
expect(project.repository).not_to receive(:archive_repo)
subject.perform(project.id, "master", "zip")
end
end
context "when the repository has an archive file path" do
let(:file_path) { "/archive.zip" }
let(:pid_file_path) { "/archive.zip.pid" }
before do
allow(project.repository).to receive(:archive_file_path).and_return(file_path)
allow(project.repository).to receive(:archive_pid_file_path).and_return(pid_file_path)
end
context "when the archive file already exists" do
before do
allow(File).to receive(:exist?).with(file_path).and_return(true)
end
it "doesn't archive the repo" do
expect(project.repository).not_to receive(:archive_repo)
subject.perform(project.id, "master", "zip")
end
end
context "when the archive file doesn't exist yet" do
before do
allow(File).to receive(:exist?).with(file_path).and_return(false)
allow(File).to receive(:exist?).with(pid_file_path).and_return(true)
end
context "when the archive pid file doesn't exist yet" do
before do
allow(File).to receive(:exist?).with(pid_file_path).and_return(false)
end
it "archives the repo" do
expect(project.repository).to receive(:archive_repo)
subject.perform(project.id, "master", "zip")
end
end
context "when the archive pid file already exists" do
it "doesn't archive the repo" do
expect(project.repository).not_to receive(:archive_repo)
subject.perform(project.id, "master", "zip")
end
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment