Commit 74b3870a authored by Michael Kozono's avatar Michael Kozono

Address Rubocop offenses

parent dd4b35f8
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class PopulateUntrackedUploads
class UntrackedFile < ActiveRecord::Base
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
self.table_name = 'untracked_files_for_uploads'
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z}
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}}
FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
FULL_PATH_CAPTURE = %r{\A(.+)#{FILE_UPLOADER_PATH}}
# These regex patterns are tested against a relative path, relative to
# the upload directory.
......@@ -44,7 +50,7 @@ module Gitlab
model_type: 'Project'
},
{
pattern: FILE_UPLOADER_PATH_PATTERN,
pattern: FILE_UPLOADER_PATH,
uploader: 'FileUploader',
model_type: 'Project'
}
......@@ -63,9 +69,10 @@ module Gitlab
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path ||= if uploader == 'FileUploader'
@upload_path ||=
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
......@@ -83,7 +90,8 @@ module Gitlab
def model_id
return @model_id if defined?(@model_id)
matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern])
pattern = matching_pattern_map[:pattern]
matchd = path_relative_to_upload_dir.match(pattern)
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
......@@ -105,14 +113,20 @@ module Gitlab
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end
raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map
unless @matching_pattern_map
raise "Unknown upload path pattern \"#{path}\""
end
@matching_pattern_map
end
def file_uploader_model_id
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN)
raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd
matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
not_found_msg = <<~MSG
Could not capture project full_path from a FileUploader path:
"#{path_relative_to_upload_dir}"
MSG
raise not_found_msg unless matchd
full_path = matchd[1]
project = Project.find_by_full_path(full_path)
......@@ -123,7 +137,8 @@ module Gitlab
# Not including a leading slash
def path_relative_to_upload_dir
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/}
upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
base = %r{\A#{Regexp.escape(upload_dir)}/}
@path_relative_to_upload_dir ||= path.sub(base, '')
end
......@@ -132,6 +147,7 @@ module Gitlab
end
end
# This class is used to query the `uploads` table.
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
......@@ -192,8 +208,10 @@ module Gitlab
end
ids.each do |model_type, model_ids|
found_ids = Object.const_get(model_type).where(id: model_ids.uniq).pluck(:id)
ids[model_type] = ids[model_type] - found_ids # replace with deleted ids
model_class = Object.const_get(model_type)
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
end
ids
......@@ -204,11 +222,15 @@ module Gitlab
file.to_h.merge(created_at: 'NOW()')
end
Gitlab::Database.bulk_insert('uploads', rows, disable_quote: :created_at)
Gitlab::Database.bulk_insert('uploads',
rows,
disable_quote: :created_at)
end
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
if UntrackedFile.all.empty?
UntrackedFile.connection.drop_table(:untracked_files_for_uploads)
end
end
end
end
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class PrepareUntrackedUploads
# This class finds all non-hashed uploaded file paths and saves them to a
# `untracked_files_for_uploads` table.
class PrepareUntrackedUploads # rubocop:disable Metrics/ClassLength
# For bulk_queue_background_migration_jobs_by_range
include Database::MigrationHelpers
FILE_PATH_BATCH_SIZE = 500
FIND_BATCH_SIZE = 500
RELATIVE_UPLOAD_DIR = "uploads".freeze
ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
......@@ -12,6 +16,8 @@ module Gitlab
EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
# This class is used to iterate over batches of
# `untracked_files_for_uploads` rows.
class UntrackedFile < ActiveRecord::Base
include EachBatch
......@@ -39,8 +45,9 @@ module Gitlab
private
def ensure_temporary_tracking_table_exists
unless UntrackedFile.connection.table_exists?(:untracked_files_for_uploads)
UntrackedFile.connection.create_table :untracked_files_for_uploads do |t|
table_name = :untracked_files_for_uploads
unless UntrackedFile.connection.table_exists?(table_name)
UntrackedFile.connection.create_table table_name do |t|
t.string :path, limit: 600, null: false
t.index :path, unique: true
end
......@@ -54,7 +61,7 @@ module Gitlab
def store_untracked_file_paths
return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR)
each_file_batch(ABSOLUTE_UPLOAD_DIR, FILE_PATH_BATCH_SIZE) do |file_paths|
each_file_batch(ABSOLUTE_UPLOAD_DIR, FIND_BATCH_SIZE) do |file_paths|
insert_file_paths(file_paths)
end
end
......@@ -85,12 +92,17 @@ module Gitlab
end
def build_find_command(search_dir)
cmd = %W[find #{search_dir} -type f ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) -print0]
cmd = %W[find #{search_dir}
-type f
! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune )
! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune )
-print0]
ionice = which_ionice
cmd = %W[#{ionice} -c Idle] + cmd if ionice
Rails.logger.info "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\""
log_msg = "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\""
Rails.logger.info log_msg
cmd
end
......@@ -98,25 +110,32 @@ module Gitlab
def which_ionice
Gitlab::Utils.which('ionice')
rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
# In this case, returning false is relatively safe,
# even though it isn't very nice
false
end
def insert_file_paths(file_paths)
sql = if postgresql_pre_9_5?
sql = insert_sql(file_paths)
ActiveRecord::Base.connection.execute(sql)
end
def insert_sql(file_paths)
if postgresql_pre_9_5?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
elsif postgresql?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;"
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)}"\
" ON CONFLICT DO NOTHING;"
else # MySQL
"INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};"
"INSERT IGNORE INTO"\
" #{table_columns_and_values_for_insert(file_paths)};"
end
ActiveRecord::Base.connection.execute(sql)
end
def table_columns_and_values_for_insert(file_paths)
values = file_paths.map do |file_path|
ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend
ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend, Metrics/LineLength
end.join(', ')
"#{UntrackedFile.table_name} (path) VALUES #{values}"
......@@ -131,11 +150,13 @@ module Gitlab
end
def postgresql_pre_9_5?
@postgresql_pre_9_5 ||= postgresql? && Gitlab::Database.version.to_f < 9.5
@postgresql_pre_9_5 ||= postgresql? &&
Gitlab::Database.version.to_f < 9.5
end
def schedule_populate_untracked_uploads_jobs
bulk_queue_background_migration_jobs_by_range(UntrackedFile, FOLLOW_UP_MIGRATION)
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment