Commit 74b3870a authored by Michael Kozono's avatar Michael Kozono

Address Rubocop offenses

parent dd4b35f8
# frozen_string_literal: true
module Gitlab module Gitlab
module BackgroundMigration module BackgroundMigration
class PopulateUntrackedUploads # This class processes a batch of rows in `untracked_files_for_uploads` by
class UntrackedFile < ActiveRecord::Base # adding each file to the `uploads` table if it does not exist.
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
self.table_name = 'untracked_files_for_uploads' self.table_name = 'untracked_files_for_uploads'
# Ends with /:random_hex/:filename # Ends with /:random_hex/:filename
FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z} FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}} FULL_PATH_CAPTURE = %r{\A(.+)#{FILE_UPLOADER_PATH}}
# These regex patterns are tested against a relative path, relative to # These regex patterns are tested against a relative path, relative to
# the upload directory. # the upload directory.
...@@ -44,7 +50,7 @@ module Gitlab ...@@ -44,7 +50,7 @@ module Gitlab
model_type: 'Project' model_type: 'Project'
}, },
{ {
pattern: FILE_UPLOADER_PATH_PATTERN, pattern: FILE_UPLOADER_PATH,
uploader: 'FileUploader', uploader: 'FileUploader',
model_type: 'Project' model_type: 'Project'
} }
...@@ -63,13 +69,14 @@ module Gitlab ...@@ -63,13 +69,14 @@ module Gitlab
def upload_path def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader # UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path ||= if uploader == 'FileUploader' @upload_path ||=
# Path relative to project directory in uploads if uploader == 'FileUploader'
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN) # Path relative to project directory in uploads
matchd[0].sub(%r{\A/}, '') # remove leading slash matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
else matchd[0].sub(%r{\A/}, '') # remove leading slash
path else
end path
end
end end
def uploader def uploader
...@@ -83,7 +90,8 @@ module Gitlab ...@@ -83,7 +90,8 @@ module Gitlab
def model_id def model_id
return @model_id if defined?(@model_id) return @model_id if defined?(@model_id)
matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern]) pattern = matching_pattern_map[:pattern]
matchd = path_relative_to_upload_dir.match(pattern)
# If something is captured (matchd[1] is not nil), it is a model_id # If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID # Only the FileUploader pattern will not match an ID
...@@ -105,14 +113,20 @@ module Gitlab ...@@ -105,14 +113,20 @@ module Gitlab
path_relative_to_upload_dir.match(path_pattern_map[:pattern]) path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end end
raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map unless @matching_pattern_map
raise "Unknown upload path pattern \"#{path}\""
end
@matching_pattern_map @matching_pattern_map
end end
def file_uploader_model_id def file_uploader_model_id
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN) matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd not_found_msg = <<~MSG
Could not capture project full_path from a FileUploader path:
"#{path_relative_to_upload_dir}"
MSG
raise not_found_msg unless matchd
full_path = matchd[1] full_path = matchd[1]
project = Project.find_by_full_path(full_path) project = Project.find_by_full_path(full_path)
...@@ -123,7 +137,8 @@ module Gitlab ...@@ -123,7 +137,8 @@ module Gitlab
# Not including a leading slash # Not including a leading slash
def path_relative_to_upload_dir def path_relative_to_upload_dir
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/} upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
base = %r{\A#{Regexp.escape(upload_dir)}/}
@path_relative_to_upload_dir ||= path.sub(base, '') @path_relative_to_upload_dir ||= path.sub(base, '')
end end
...@@ -132,6 +147,7 @@ module Gitlab ...@@ -132,6 +147,7 @@ module Gitlab
end end
end end
# This class is used to query the `uploads` table.
class Upload < ActiveRecord::Base class Upload < ActiveRecord::Base
self.table_name = 'uploads' self.table_name = 'uploads'
end end
...@@ -192,8 +208,10 @@ module Gitlab ...@@ -192,8 +208,10 @@ module Gitlab
end end
ids.each do |model_type, model_ids| ids.each do |model_type, model_ids|
found_ids = Object.const_get(model_type).where(id: model_ids.uniq).pluck(:id) model_class = Object.const_get(model_type)
ids[model_type] = ids[model_type] - found_ids # replace with deleted ids found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
end end
ids ids
...@@ -204,11 +222,15 @@ module Gitlab ...@@ -204,11 +222,15 @@ module Gitlab
file.to_h.merge(created_at: 'NOW()') file.to_h.merge(created_at: 'NOW()')
end end
Gitlab::Database.bulk_insert('uploads', rows, disable_quote: :created_at) Gitlab::Database.bulk_insert('uploads',
rows,
disable_quote: :created_at)
end end
def drop_temp_table_if_finished def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty? if UntrackedFile.all.empty?
UntrackedFile.connection.drop_table(:untracked_files_for_uploads)
end
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab module Gitlab
module BackgroundMigration module BackgroundMigration
class PrepareUntrackedUploads # This class finds all non-hashed uploaded file paths and saves them to a
# `untracked_files_for_uploads` table.
class PrepareUntrackedUploads # rubocop:disable Metrics/ClassLength
# For bulk_queue_background_migration_jobs_by_range # For bulk_queue_background_migration_jobs_by_range
include Database::MigrationHelpers include Database::MigrationHelpers
FILE_PATH_BATCH_SIZE = 500 FIND_BATCH_SIZE = 500
RELATIVE_UPLOAD_DIR = "uploads".freeze RELATIVE_UPLOAD_DIR = "uploads".freeze
ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
...@@ -12,6 +16,8 @@ module Gitlab ...@@ -12,6 +16,8 @@ module Gitlab
EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
# This class is used to iterate over batches of
# `untracked_files_for_uploads` rows.
class UntrackedFile < ActiveRecord::Base class UntrackedFile < ActiveRecord::Base
include EachBatch include EachBatch
...@@ -39,8 +45,9 @@ module Gitlab ...@@ -39,8 +45,9 @@ module Gitlab
private private
def ensure_temporary_tracking_table_exists def ensure_temporary_tracking_table_exists
unless UntrackedFile.connection.table_exists?(:untracked_files_for_uploads) table_name = :untracked_files_for_uploads
UntrackedFile.connection.create_table :untracked_files_for_uploads do |t| unless UntrackedFile.connection.table_exists?(table_name)
UntrackedFile.connection.create_table table_name do |t|
t.string :path, limit: 600, null: false t.string :path, limit: 600, null: false
t.index :path, unique: true t.index :path, unique: true
end end
...@@ -54,7 +61,7 @@ module Gitlab ...@@ -54,7 +61,7 @@ module Gitlab
def store_untracked_file_paths def store_untracked_file_paths
return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR) return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR)
each_file_batch(ABSOLUTE_UPLOAD_DIR, FILE_PATH_BATCH_SIZE) do |file_paths| each_file_batch(ABSOLUTE_UPLOAD_DIR, FIND_BATCH_SIZE) do |file_paths|
insert_file_paths(file_paths) insert_file_paths(file_paths)
end end
end end
...@@ -85,12 +92,17 @@ module Gitlab ...@@ -85,12 +92,17 @@ module Gitlab
end end
def build_find_command(search_dir) def build_find_command(search_dir)
cmd = %W[find #{search_dir} -type f ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) -print0] cmd = %W[find #{search_dir}
-type f
! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune )
! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune )
-print0]
ionice = which_ionice ionice = which_ionice
cmd = %W[#{ionice} -c Idle] + cmd if ionice cmd = %W[#{ionice} -c Idle] + cmd if ionice
Rails.logger.info "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\"" log_msg = "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\""
Rails.logger.info log_msg
cmd cmd
end end
...@@ -98,25 +110,32 @@ module Gitlab ...@@ -98,25 +110,32 @@ module Gitlab
def which_ionice def which_ionice
Gitlab::Utils.which('ionice') Gitlab::Utils.which('ionice')
rescue StandardError rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice # In this case, returning false is relatively safe,
# even though it isn't very nice
false false
end end
def insert_file_paths(file_paths) def insert_file_paths(file_paths)
sql = if postgresql_pre_9_5? sql = insert_sql(file_paths)
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
elsif postgresql?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;"
else # MySQL
"INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};"
end
ActiveRecord::Base.connection.execute(sql) ActiveRecord::Base.connection.execute(sql)
end end
def insert_sql(file_paths)
if postgresql_pre_9_5?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
elsif postgresql?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)}"\
" ON CONFLICT DO NOTHING;"
else # MySQL
"INSERT IGNORE INTO"\
" #{table_columns_and_values_for_insert(file_paths)};"
end
end
def table_columns_and_values_for_insert(file_paths) def table_columns_and_values_for_insert(file_paths)
values = file_paths.map do |file_path| values = file_paths.map do |file_path|
ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend, Metrics/LineLength
end.join(', ') end.join(', ')
"#{UntrackedFile.table_name} (path) VALUES #{values}" "#{UntrackedFile.table_name} (path) VALUES #{values}"
...@@ -131,11 +150,13 @@ module Gitlab ...@@ -131,11 +150,13 @@ module Gitlab
end end
def postgresql_pre_9_5? def postgresql_pre_9_5?
@postgresql_pre_9_5 ||= postgresql? && Gitlab::Database.version.to_f < 9.5 @postgresql_pre_9_5 ||= postgresql? &&
Gitlab::Database.version.to_f < 9.5
end end
def schedule_populate_untracked_uploads_jobs def schedule_populate_untracked_uploads_jobs
bulk_queue_background_migration_jobs_by_range(UntrackedFile, FOLLOW_UP_MIGRATION) bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment