Commit 6d09e946 authored by James Lopez's avatar James Lopez

import_url migration performance improvements

 Nullifying empty import_urls upfront so the number of projects with import_url not NULL decreases to 1/5.

 Also, now processing batches in blocks of 1000, with a threaded process - a bit experimental.
parent 86d83a3a
...@@ -11,7 +11,7 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration ...@@ -11,7 +11,7 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
attr_reader :results, :query attr_reader :results, :query
def initialize(batch_size: 100, query:) def initialize(batch_size: 1000, query:)
@offset = 0 @offset = 0
@batch_size = batch_size @batch_size = batch_size
@query = query @query = query
...@@ -58,22 +58,40 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration ...@@ -58,22 +58,40 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
return return
end end
say('Nullifying empty import URLs')
nullify_empty_urls
say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.') say('Cleaning up invalid import URLs... This may take a few minutes if we have a large number of imported projects.')
invalid_import_url_project_ids.each { |project_id| cleanup_import_url(project_id) } process_invalid_import_urls
end end
def invalid_import_url_project_ids def process_invalid_import_urls
ids = [] @threads = []
batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL") batches = SqlBatches.new(query: "SELECT id, import_url FROM projects WHERE import_url IS NOT NULL")
while batches.next? while batches.next?
project_ids = []
batches.results.each do |result| batches.results.each do |result|
ids << result['id'] unless valid_url?(result['import_url']) project_ids << result['id'] unless valid_url?(result['import_url'])
end
process_batch(project_ids)
end end
@threads.each(&:join)
end end
ids def process_batch(project_ids)
@threads << Thread.new do
begin
project_ids.each { |project_id| cleanup_import_url(project_id) }
ensure
ActiveRecord::Base.connection.close
end
end
end end
def valid_url?(url) def valid_url?(url)
...@@ -83,4 +101,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration ...@@ -83,4 +101,8 @@ class FixNoValidatableImportUrl < ActiveRecord::Migration
def cleanup_import_url(project_id) def cleanup_import_url(project_id)
execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}") execute("UPDATE projects SET import_url = NULL WHERE id = #{project_id}")
end end
def nullify_empty_urls
execute("UPDATE projects SET import_url = NULL WHERE import_url = ''")
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment