Commit 69fbeb10 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '35789-hashed-storage-attachments-migration-object-storage' into 'master'

Hashed Storage attachments migration: exclude files in object storage as they are all hashed already

See merge request gitlab-org/gitlab!20338
parents 405046cb 98d1d503
...@@ -374,9 +374,17 @@ class Project < ApplicationRecord ...@@ -374,9 +374,17 @@ class Project < ApplicationRecord
scope :pending_delete, -> { where(pending_delete: true) } scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) } scope :without_deleted, -> { where(pending_delete: false) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) } scope :with_storage_feature, ->(feature) do
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) } where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[feature]))
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) } end
scope :without_storage_feature, ->(feature) do
where(arel_table[:storage_version].lt(HASHED_STORAGE_FEATURES[feature])
.or(arel_table[:storage_version].eq(nil)))
end
scope :with_unmigrated_storage, -> do
where(arel_table[:storage_version].lt(LATEST_STORAGE_VERSION)
.or(arel_table[:storage_version].eq(nil)))
end
# last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push # last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push
scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) } scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) }
......
...@@ -23,6 +23,21 @@ class Upload < ApplicationRecord ...@@ -23,6 +23,21 @@ class Upload < ApplicationRecord
after_destroy :delete_file!, if: -> { uploader_class <= FileUploader } after_destroy :delete_file!, if: -> { uploader_class <= FileUploader }
class << self class << self
def inner_join_local_uploads_projects
upload_table = Upload.arel_table
project_table = Project.arel_table
join_statement = upload_table.project(upload_table[Arel.star])
.join(project_table)
.on(
upload_table[:model_type].eq('Project')
.and(upload_table[:model_id].eq(project_table[:id]))
.and(upload_table[:store].eq(ObjectStorage::Store::LOCAL))
)
joins(join_statement.join_sources)
end
## ##
# FastDestroyAll concerns # FastDestroyAll concerns
def begin_fast_destroy def begin_fast_destroy
......
...@@ -14,7 +14,7 @@ module HashedStorage ...@@ -14,7 +14,7 @@ module HashedStorage
try_obtain_lease do try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id) project = Project.without_deleted.find_by(id: project_id)
break unless project break unless project && project.storage_upgradable?
old_disk_path ||= Storage::LegacyProject.new(project).disk_path old_disk_path ||= Storage::LegacyProject.new(project).disk_path
......
---
title: 'Hashed Storage attachments migration: exclude files in object storage as they
are all hashed already'
merge_request: 20338
author:
type: changed
...@@ -47,23 +47,13 @@ module Gitlab ...@@ -47,23 +47,13 @@ module Gitlab
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.legacy_attachments_relation def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments]) Upload.inner_join_local_uploads_projects.merge(Project.without_storage_feature(:attachments))
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.hashed_attachments_relation def self.hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments]) Upload.inner_join_local_uploads_projects.merge(Project.with_storage_feature(:attachments))
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end end
# rubocop: enable CodeReuse/ActiveRecord
def self.relation_summary(relation_name, relation) def self.relation_summary(relation_name, relation)
relation_count = relation.count relation_count = relation.count
......
...@@ -5,13 +5,13 @@ require 'spec_helper' ...@@ -5,13 +5,13 @@ require 'spec_helper'
describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
describe '#perform' do
let(:project) { create(:project, :empty_repo, :legacy_storage) }
let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:migration_service) { ::Projects::HashedStorage::MigrationService } let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
describe '#perform' do
it 'skips when project no longer exists' do it 'skips when project no longer exists' do
stub_exclusive_lease(lease_key(-1), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(-1) subject.perform(-1)
...@@ -19,14 +19,41 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d ...@@ -19,14 +19,41 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d
it 'skips when project is pending delete' do it 'skips when project is pending delete' do
pending_delete_project = create(:project, :empty_repo, pending_delete: true) pending_delete_project = create(:project, :empty_repo, pending_delete: true)
stub_exclusive_lease(lease_key(pending_delete_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(pending_delete_project.id) subject.perform(pending_delete_project.id)
end end
it 'delegates migration to service class when we have exclusive lease' do it 'skips when project is already migrated' do
stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout) migrated_project = create(:project, :empty_repo)
stub_exclusive_lease(lease_key(migrated_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(migrated_project.id)
end
context 'with exclusive lease available' do
it 'delegates migration to service class' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
end
it 'delegates migration to service class with correct path in a partially migrated project' do
project = create(:project, :empty_repo, storage_version: 1)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy service_spy = spy
...@@ -38,13 +65,21 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d ...@@ -38,13 +65,21 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d
expect(service_spy).to have_received(:execute) expect(service_spy).to have_received(:execute)
end end
end
context 'with exclusive lease taken' do
it 'skips when it cant acquire the exclusive lease' do it 'skips when it cant acquire the exclusive lease' do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout) project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease_taken(lease_key(project.id), timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(project.id) subject.perform(project.id)
end end
end end
end
def lease_key(key)
"project_migrate_hashed_storage_worker:#{key}"
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment