Commit c133f1a7 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'mk-fix-no-untracked-upload-files-error' into 'master'

Resolve "PrepareUntrackedUploads PostgreSQL syntax error"

Closes #42881

See merge request gitlab-org/gitlab-ce!17019
parents 41285af4 4e6a8eaa
---
title: Resolve PrepareUntrackedUploads PostgreSQL syntax error
merge_request: 17019
author:
type: fixed
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class SchedulePopulateUntrackedUploadsIfNeeded < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
class UntrackedFile < ActiveRecord::Base
include EachBatch
self.table_name = 'untracked_files_for_uploads'
end
def up
if table_exists?(:untracked_files_for_uploads)
process_or_remove_table
end
end
def down
# nothing
end
private
def process_or_remove_table
if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs
end
end
def drop_temp_table
drop_table(:untracked_files_for_uploads, if_exists: true)
end
def schedule_populate_untracked_uploads_jobs
say "Scheduling #{FOLLOW_UP_MIGRATION} background migration jobs since there are rows in untracked_files_for_uploads."
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180206200543) do
ActiveRecord::Schema.define(version: 20180208183958) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......
......@@ -43,8 +43,12 @@ module Gitlab
store_untracked_file_paths
if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs
end
end
private
......@@ -92,7 +96,7 @@ module Gitlab
end
end
yield(paths)
yield(paths) if paths.any?
end
def build_find_command(search_dir)
......@@ -165,6 +169,11 @@ module Gitlab
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
def drop_temp_table
UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if_exists: true)
end
end
end
end
......@@ -8,8 +8,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
before do
DatabaseCleaner.clean
drop_temp_table_if_exists
end
after do
......@@ -23,31 +21,21 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
shared_examples 'does not add files in /uploads/tmp' do
let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') }
shared_examples 'prepares the untracked_files_for_uploads table' do
context 'when files were uploaded before and after hashed storage was enabled' do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do
FileUtils.mkdir(File.dirname(tmp_file))
FileUtils.touch(tmp_file)
end
after do
FileUtils.rm(tmp_file)
end
it 'does not add files from /uploads/tmp' do
described_class.new.perform
# Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute
expect(untracked_files_for_uploads.count).to eq(5)
end
end
stub_application_setting(hashed_storage_enabled: true)
it 'ensures the untracked_files_for_uploads table exists' do
expect do
described_class.new.perform
end.to change { ActiveRecord::Base.connection.table_exists?(:untracked_files_for_uploads) }.from(false).to(true)
# Markdown upload after enabling hashed_storage
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'has a path field long enough for really long paths' do
......@@ -65,31 +53,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
expect(record.reload.path.size).to eq(519)
end
context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE" do
around do |example|
# If this is CI, we use Postgres 9.2 so this whole context should be
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
if described_class.new.send(:can_bulk_insert_and_ignore_duplicates?)
example.run
end
end
context 'when files were uploaded before and after hashed storage was enabled' do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do
# Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute
stub_application_setting(hashed_storage_enabled: true)
# Markdown upload after enabling hashed_storage
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the untracked_files_for_uploads table' do
described_class.new.perform
......@@ -130,91 +93,66 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do
it_behaves_like 'does not add files in /uploads/tmp'
end
end
end
let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') }
context 'test bulk insert without ON CONFLICT DO NOTHING or IGNORE' do
before do
# If this is CI, we use Postgres 9.2 so this stub has no effect.
#
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of(described_class).to receive(:postgresql_pre_9_5?).and_return(true)
FileUtils.mkdir(File.dirname(tmp_file))
FileUtils.touch(tmp_file)
end
context 'when files were uploaded before and after hashed storage was enabled' do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let!(:user) { create(:user, :with_avatar) }
let!(:project1) { create(:project, :with_avatar, :legacy_storage) }
let(:project2) { create(:project) } # instantiate after enabling hashed_storage
before do
# Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute
stub_application_setting(hashed_storage_enabled: true)
# Markdown upload after enabling hashed_storage
UploadService.new(project2, uploaded_file, FileUploader).execute
after do
FileUtils.rm(tmp_file)
end
it 'adds unhashed files to the untracked_files_for_uploads table' do
it 'does not add files from /uploads/tmp' do
described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(5)
end
it 'adds files with paths relative to CarrierWave.root' do
described_class.new.perform
untracked_files_for_uploads.all.each do |file|
expect(file.path.start_with?('uploads/')).to be_truthy
end
end
it 'does not add hashed files to the untracked_files_for_uploads table' do
described_class.new.perform
context 'when the last batch size exactly matches the max batch size' do
it 'does not raise error' do
stub_const("#{described_class}::FIND_BATCH_SIZE", 5)
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path
expect(untracked_files_for_uploads.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey
end
it 'correctly schedules the follow-up background migration jobs' do
expect do
described_class.new.perform
end.not_to raise_error
expect(described_class::FOLLOW_UP_MIGRATION).to be_scheduled_migration(1, 5)
expect(BackgroundMigrationWorker.jobs.size).to eq(1)
expect(untracked_files_for_uploads.count).to eq(5)
end
# E.g. from a previous failed run of this background migration
context 'when there is existing data in untracked_files_for_uploads' do
before do
described_class.new.perform
end
it 'does not error or produce duplicates of existing data' do
expect do
described_class.new.perform
end.not_to change { untracked_files_for_uploads.count }.from(5)
end
end
context 'when there are files in /uploads/tmp' do
it_behaves_like 'does not add files in /uploads/tmp'
# If running on Postgres 9.2 (like on CI), this whole context is skipped
# since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE", if: described_class.new.send(:can_bulk_insert_and_ignore_duplicates?) do
it_behaves_like 'prepares the untracked_files_for_uploads table'
end
# If running on Postgres 9.2 (like on CI), the stubbed method has no effect.
#
# If running on Postgres 9.5+ or MySQL, then this context effectively tests
# the bulk insert functionality without ON CONFLICT DO NOTHING or IGNORE.
context 'test bulk insert without ON CONFLICT DO NOTHING or IGNORE' do
before do
allow_any_instance_of(described_class).to receive(:postgresql_pre_9_5?).and_return(true)
end
it_behaves_like 'prepares the untracked_files_for_uploads table'
end
# Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do
it 'does not add to the untracked_files_for_uploads table (and does not raise error)' do
it 'deletes the `untracked_files_for_uploads` table (and does not raise error)' do
described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(0)
expect(untracked_files_for_uploads.connection.table_exists?(:untracked_files_for_uploads)).to be_falsey
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment