Commit 98d1d503 authored by Gabriel Mazetto's avatar Gabriel Mazetto Committed by Mayra Cabrera

Filter legacy attachments correctly

An attachment from an unmigrated project will only be using the legacy
storage when stored locally. Any file on object storage already use
hashed storage format.
parent 405046cb
......@@ -374,9 +374,17 @@ class Project < ApplicationRecord
scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) }
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) }
scope :with_storage_feature, ->(feature) do
where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[feature]))
end
scope :without_storage_feature, ->(feature) do
where(arel_table[:storage_version].lt(HASHED_STORAGE_FEATURES[feature])
.or(arel_table[:storage_version].eq(nil)))
end
scope :with_unmigrated_storage, -> do
where(arel_table[:storage_version].lt(LATEST_STORAGE_VERSION)
.or(arel_table[:storage_version].eq(nil)))
end
# last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push
scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) }
......
......@@ -23,6 +23,21 @@ class Upload < ApplicationRecord
after_destroy :delete_file!, if: -> { uploader_class <= FileUploader }
class << self
def inner_join_local_uploads_projects
upload_table = Upload.arel_table
project_table = Project.arel_table
join_statement = upload_table.project(upload_table[Arel.star])
.join(project_table)
.on(
upload_table[:model_type].eq('Project')
.and(upload_table[:model_id].eq(project_table[:id]))
.and(upload_table[:store].eq(ObjectStorage::Store::LOCAL))
)
joins(join_statement.join_sources)
end
##
# FastDestroyAll concerns
def begin_fast_destroy
......
......@@ -14,7 +14,7 @@ module HashedStorage
try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id)
break unless project
break unless project && project.storage_upgradable?
old_disk_path ||= Storage::LegacyProject.new(project).disk_path
......
---
title: 'Hashed Storage attachments migration: exclude files in object storage as they
are all hashed already'
merge_request: 20338
author:
type: changed
......@@ -47,23 +47,13 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
Upload.inner_join_local_uploads_projects.merge(Project.without_storage_feature(:attachments))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
Upload.inner_join_local_uploads_projects.merge(Project.with_storage_feature(:attachments))
end
# rubocop: enable CodeReuse/ActiveRecord
def self.relation_summary(relation_name, relation)
relation_count = relation.count
......
......@@ -5,13 +5,13 @@ require 'spec_helper'
describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do
include ExclusiveLeaseHelpers
describe '#perform' do
let(:project) { create(:project, :empty_repo, :legacy_storage) }
let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
describe '#perform' do
it 'skips when project no longer exists' do
stub_exclusive_lease(lease_key(-1), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(-1)
......@@ -19,32 +19,67 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d
it 'skips when project is pending delete' do
pending_delete_project = create(:project, :empty_repo, pending_delete: true)
stub_exclusive_lease(lease_key(pending_delete_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(pending_delete_project.id)
end
it 'delegates migration to service class when we have exclusive lease' do
stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout)
it 'skips when project is already migrated' do
migrated_project = create(:project, :empty_repo)
stub_exclusive_lease(lease_key(migrated_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(migrated_project.id)
end
context 'with exclusive lease available' do
it 'delegates migration to service class' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
expect(service_spy).to have_received(:execute)
end
it 'delegates migration to service class with correct path in a partially migrated project' do
project = create(:project, :empty_repo, storage_version: 1)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
end
end
it 'skips when it cant acquire the exclusive lease' do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout)
context 'with exclusive lease taken' do
it 'skips when it cant acquire the exclusive lease' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease_taken(lease_key(project.id), timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
expect(migration_service).not_to receive(:new)
subject.perform(project.id)
subject.perform(project.id)
end
end
end
def lease_key(key)
"project_migrate_hashed_storage_worker:#{key}"
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment