Commit 98d1d503 authored by Gabriel Mazetto's avatar Gabriel Mazetto Committed by Mayra Cabrera

Filter legacy attachments correctly

An attachment from an unmigrated project will only be using the legacy
storage when stored locally. Any file on object storage already use
hashed storage format.
parent 405046cb
...@@ -374,9 +374,17 @@ class Project < ApplicationRecord ...@@ -374,9 +374,17 @@ class Project < ApplicationRecord
scope :pending_delete, -> { where(pending_delete: true) } scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) } scope :without_deleted, -> { where(pending_delete: false) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) } scope :with_storage_feature, ->(feature) do
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) } where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[feature]))
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) } end
scope :without_storage_feature, ->(feature) do
where(arel_table[:storage_version].lt(HASHED_STORAGE_FEATURES[feature])
.or(arel_table[:storage_version].eq(nil)))
end
scope :with_unmigrated_storage, -> do
where(arel_table[:storage_version].lt(LATEST_STORAGE_VERSION)
.or(arel_table[:storage_version].eq(nil)))
end
# last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push # last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push
scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) } scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) }
......
...@@ -23,6 +23,21 @@ class Upload < ApplicationRecord ...@@ -23,6 +23,21 @@ class Upload < ApplicationRecord
after_destroy :delete_file!, if: -> { uploader_class <= FileUploader } after_destroy :delete_file!, if: -> { uploader_class <= FileUploader }
class << self class << self
def inner_join_local_uploads_projects
upload_table = Upload.arel_table
project_table = Project.arel_table
join_statement = upload_table.project(upload_table[Arel.star])
.join(project_table)
.on(
upload_table[:model_type].eq('Project')
.and(upload_table[:model_id].eq(project_table[:id]))
.and(upload_table[:store].eq(ObjectStorage::Store::LOCAL))
)
joins(join_statement.join_sources)
end
## ##
# FastDestroyAll concerns # FastDestroyAll concerns
def begin_fast_destroy def begin_fast_destroy
......
...@@ -14,7 +14,7 @@ module HashedStorage ...@@ -14,7 +14,7 @@ module HashedStorage
try_obtain_lease do try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id) project = Project.without_deleted.find_by(id: project_id)
break unless project break unless project && project.storage_upgradable?
old_disk_path ||= Storage::LegacyProject.new(project).disk_path old_disk_path ||= Storage::LegacyProject.new(project).disk_path
......
---
title: 'Hashed Storage attachments migration: exclude files in object storage as they
are all hashed already'
merge_request: 20338
author:
type: changed
...@@ -47,23 +47,13 @@ module Gitlab ...@@ -47,23 +47,13 @@ module Gitlab
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.legacy_attachments_relation def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments]) Upload.inner_join_local_uploads_projects.merge(Project.without_storage_feature(:attachments))
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.hashed_attachments_relation def self.hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments]) Upload.inner_join_local_uploads_projects.merge(Project.with_storage_feature(:attachments))
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end end
# rubocop: enable CodeReuse/ActiveRecord
def self.relation_summary(relation_name, relation) def self.relation_summary(relation_name, relation)
relation_count = relation.count relation_count = relation.count
......
...@@ -5,13 +5,13 @@ require 'spec_helper' ...@@ -5,13 +5,13 @@ require 'spec_helper'
describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
describe '#perform' do let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:project) { create(:project, :empty_repo, :legacy_storage) } let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
describe '#perform' do
it 'skips when project no longer exists' do it 'skips when project no longer exists' do
stub_exclusive_lease(lease_key(-1), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(-1) subject.perform(-1)
...@@ -19,32 +19,67 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d ...@@ -19,32 +19,67 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d
it 'skips when project is pending delete' do it 'skips when project is pending delete' do
pending_delete_project = create(:project, :empty_repo, pending_delete: true) pending_delete_project = create(:project, :empty_repo, pending_delete: true)
stub_exclusive_lease(lease_key(pending_delete_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(pending_delete_project.id) subject.perform(pending_delete_project.id)
end end
it 'delegates migration to service class when we have exclusive lease' do it 'skips when project is already migrated' do
stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout) migrated_project = create(:project, :empty_repo)
stub_exclusive_lease(lease_key(migrated_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(migrated_project.id)
end
context 'with exclusive lease available' do
it 'delegates migration to service class' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy service_spy = spy
allow(migration_service) allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger) .to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy) .and_return(service_spy)
subject.perform(project.id) subject.perform(project.id)
expect(service_spy).to have_received(:execute) expect(service_spy).to have_received(:execute)
end
it 'delegates migration to service class with correct path in a partially migrated project' do
project = create(:project, :empty_repo, storage_version: 1)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
end
end end
it 'skips when it cant acquire the exclusive lease' do context 'with exclusive lease taken' do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout) it 'skips when it cant acquire the exclusive lease' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease_taken(lease_key(project.id), timeout: lease_timeout)
expect(migration_service).not_to receive(:new) expect(migration_service).not_to receive(:new)
subject.perform(project.id) subject.perform(project.id)
end
end end
end end
def lease_key(key)
"project_migrate_hashed_storage_worker:#{key}"
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment