Commit 9a2cb68d authored by Gabriel Mazetto's avatar Gabriel Mazetto

Improve storage migration rake task

parent 7ac92899
......@@ -277,8 +277,9 @@ class Project < ActiveRecord::Base
scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) }
scope :with_hashed_storage, -> { where('storage_version >= 1') }
scope :with_legacy_storage, -> { where(storage_version: [nil, 0]) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) }
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) }
scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) }
scope :sorted_by_stars, -> { reorder('projects.star_count DESC') }
......
......@@ -2,10 +2,10 @@ namespace :gitlab do
namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do
legacy_projects_count = Project.with_legacy_storage.count
legacy_projects_count = Project.with_unmigrated_storage.count
if legacy_projects_count == 0
puts 'There are no projects using legacy storage. Nothing to do!'
puts 'There are no projects requiring storage migration. Nothing to do!'
next
end
......@@ -23,22 +23,42 @@ namespace :gitlab do
desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage'
task legacy_projects: :environment do
projects_summary(Project.with_legacy_storage)
relation_summary('projects', Project.without_storage_feature(:repository))
end
desc 'Gitlab | Storage | List existing projects using Legacy Storage'
task list_legacy_projects: :environment do
projects_list(Project.with_legacy_storage)
projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository))
end
desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage'
task hashed_projects: :environment do
projects_summary(Project.with_hashed_storage)
relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | List existing projects using Hashed Storage'
task list_hashed_projects: :environment do
projects_list(Project.with_hashed_storage)
projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
task legacy_attachments: :environment do
relation_summary('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Legacy Storage'
task list_legacy_attachments: :environment do
attachments_list('attachments using Legacy Storage', legacy_attachments_relation)
end
desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage'
task hashed_attachments: :environment do
relation_summary('attachments using Hashed Storage', hashed_attachments_relation)
end
desc 'Gitlab | Storage | List existing project attachments using Hashed Storage'
task list_hashed_attachments: :environment do
attachments_list('attachments using Hashed Storage', hashed_attachments_relation)
end
def batch_size
......@@ -46,29 +66,43 @@ namespace :gitlab do
end
def project_id_batches(&block)
Project.with_legacy_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
end
end
def projects_summary(relation)
projects_count = relation.count
puts "* Found #{projects_count} projects".color(:green)
def legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def relation_summary(relation_name, relation)
relation_count = relation.count
puts "* Found #{relation_count} #{relation_name}".color(:green)
projects_count
relation_count
end
def projects_list(relation)
projects_count = projects_summary(relation)
def projects_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
projects = relation.with_route
limit = ENV.fetch('LIMIT', 500).to_i
return unless projects_count > 0
return unless relation_count > 0
puts " ! Displaying first #{limit} projects..." if projects_count > limit
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
projects.find_in_batches(batch_size: batch_size) do |batch|
......@@ -81,5 +115,26 @@ namespace :gitlab do
end
end
end
def attachments_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
limit = ENV.fetch('LIMIT', 500).to_i
return unless relation_count > 0
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
relation.find_in_batches(batch_size: batch_size) do |batch|
batch.each do |upload|
counter += 1
puts " - #{upload.path} (id: #{upload.id})".color(:red)
return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment