Commit c133f1a7 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'mk-fix-no-untracked-upload-files-error' into 'master'

Resolve "PrepareUntrackedUploads PostgreSQL syntax error"

Closes #42881

See merge request gitlab-org/gitlab-ce!17019
parents 41285af4 4e6a8eaa
---
title: Resolve PrepareUntrackedUploads PostgreSQL syntax error
merge_request: 17019
author:
type: fixed
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class SchedulePopulateUntrackedUploadsIfNeeded < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
class UntrackedFile < ActiveRecord::Base
include EachBatch
self.table_name = 'untracked_files_for_uploads'
end
def up
if table_exists?(:untracked_files_for_uploads)
process_or_remove_table
end
end
def down
# nothing
end
private
def process_or_remove_table
if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs
end
end
def drop_temp_table
drop_table(:untracked_files_for_uploads, if_exists: true)
end
def schedule_populate_untracked_uploads_jobs
say "Scheduling #{FOLLOW_UP_MIGRATION} background migration jobs since there are rows in untracked_files_for_uploads."
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180206200543) do ActiveRecord::Schema.define(version: 20180208183958) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
......
...@@ -43,8 +43,12 @@ module Gitlab ...@@ -43,8 +43,12 @@ module Gitlab
store_untracked_file_paths store_untracked_file_paths
if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs schedule_populate_untracked_uploads_jobs
end end
end
private private
...@@ -92,7 +96,7 @@ module Gitlab ...@@ -92,7 +96,7 @@ module Gitlab
end end
end end
yield(paths) yield(paths) if paths.any?
end end
def build_find_command(search_dir) def build_find_command(search_dir)
...@@ -165,6 +169,11 @@ module Gitlab ...@@ -165,6 +169,11 @@ module Gitlab
bulk_queue_background_migration_jobs_by_range( bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION) UntrackedFile, FOLLOW_UP_MIGRATION)
end end
def drop_temp_table
UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if_exists: true)
end
end end
end end
end end
...@@ -8,8 +8,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -8,8 +8,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
before do before do
DatabaseCleaner.clean DatabaseCleaner.clean
drop_temp_table_if_exists
end end
after do after do
...@@ -23,31 +21,21 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -23,31 +21,21 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end end
end end
# E.g. The installation is in use at the time of migration, and someone has shared_examples 'prepares the untracked_files_for_uploads table' do
# just uploaded a file context 'when files were uploaded before and after hashed storage was enabled' do
shared_examples 'does not add files in /uploads/tmp' do let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') } let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do before do
FileUtils.mkdir(File.dirname(tmp_file)) # Markdown upload before enabling hashed_storage
FileUtils.touch(tmp_file) UploadService.new(project1, uploaded_file, FileUploader).execute
end
after do
FileUtils.rm(tmp_file)
end
it 'does not add files from /uploads/tmp' do
described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(5) stub_application_setting(hashed_storage_enabled: true)
end
end
it 'ensures the untracked_files_for_uploads table exists' do # Markdown upload after enabling hashed_storage
expect do UploadService.new(project2, uploaded_file, FileUploader).execute
described_class.new.perform
end.to change { ActiveRecord::Base.connection.table_exists?(:untracked_files_for_uploads) }.from(false).to(true)
end end
it 'has a path field long enough for really long paths' do it 'has a path field long enough for really long paths' do
...@@ -65,31 +53,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -65,31 +53,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
expect(record.reload.path.size).to eq(519) expect(record.reload.path.size).to eq(519)
end end
context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE" do
around do |example|
# If this is CI, we use Postgres 9.2 so this whole context should be
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
if described_class.new.send(:can_bulk_insert_and_ignore_duplicates?)
example.run
end
end
context 'when files were uploaded before and after hashed storage was enabled' do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do
# Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute
stub_application_setting(hashed_storage_enabled: true)
# Markdown upload after enabling hashed_storage
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the untracked_files_for_uploads table' do it 'adds unhashed files to the untracked_files_for_uploads table' do
described_class.new.perform described_class.new.perform
...@@ -130,91 +93,66 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -130,91 +93,66 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end end
end end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do context 'when there are files in /uploads/tmp' do
it_behaves_like 'does not add files in /uploads/tmp' let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') }
end
end
end
context 'test bulk insert without ON CONFLICT DO NOTHING or IGNORE' do
before do before do
# If this is CI, we use Postgres 9.2 so this stub has no effect. FileUtils.mkdir(File.dirname(tmp_file))
# FileUtils.touch(tmp_file)
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of(described_class).to receive(:postgresql_pre_9_5?).and_return(true)
end end
context 'when files were uploaded before and after hashed storage was enabled' do after do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) } FileUtils.rm(tmp_file)
let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do
# Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute
stub_application_setting(hashed_storage_enabled: true)
# Markdown upload after enabling hashed_storage
UploadService.new(project2, uploaded_file, FileUploader).execute
end end
it 'adds unhashed files to the untracked_files_for_uploads table' do it 'does not add files from /uploads/tmp' do
described_class.new.perform described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(5) expect(untracked_files_for_uploads.count).to eq(5)
end end
it 'adds files with paths relative to CarrierWave.root' do
described_class.new.perform
untracked_files_for_uploads.all.each do |file|
expect(file.path.start_with?('uploads/')).to be_truthy
end
end end
it 'does not add hashed files to the untracked_files_for_uploads table' do context 'when the last batch size exactly matches the max batch size' do
described_class.new.perform it 'does not raise error' do
stub_const("#{described_class}::FIND_BATCH_SIZE", 5)
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path expect do
expect(untracked_files_for_uploads.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey
end
it 'correctly schedules the follow-up background migration jobs' do
described_class.new.perform described_class.new.perform
end.not_to raise_error
expect(described_class::FOLLOW_UP_MIGRATION).to be_scheduled_migration(1, 5) expect(untracked_files_for_uploads.count).to eq(5)
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
end end
# E.g. from a previous failed run of this background migration
context 'when there is existing data in untracked_files_for_uploads' do
before do
described_class.new.perform
end end
it 'does not error or produce duplicates of existing data' do
expect do
described_class.new.perform
end.not_to change { untracked_files_for_uploads.count }.from(5)
end end
end end
context 'when there are files in /uploads/tmp' do # If running on Postgres 9.2 (like on CI), this whole context is skipped
it_behaves_like 'does not add files in /uploads/tmp' # since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE", if: described_class.new.send(:can_bulk_insert_and_ignore_duplicates?) do
it_behaves_like 'prepares the untracked_files_for_uploads table'
end end
# If running on Postgres 9.2 (like on CI), the stubbed method has no effect.
#
# If running on Postgres 9.5+ or MySQL, then this context effectively tests
# the bulk insert functionality without ON CONFLICT DO NOTHING or IGNORE.
context 'test bulk insert without ON CONFLICT DO NOTHING or IGNORE' do
before do
allow_any_instance_of(described_class).to receive(:postgresql_pre_9_5?).and_return(true)
end end
it_behaves_like 'prepares the untracked_files_for_uploads table'
end end
# Very new or lightly-used installations that are running this migration # Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads. # may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do context 'when no files were ever uploaded' do
it 'does not add to the untracked_files_for_uploads table (and does not raise error)' do it 'deletes the `untracked_files_for_uploads` table (and does not raise error)' do
described_class.new.perform described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(0) expect(untracked_files_for_uploads.connection.table_exists?(:untracked_files_for_uploads)).to be_falsey
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment