Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Léo-Paul Géneau
gitlab-ce
Commits
edb5cac4
Commit
edb5cac4
authored
Nov 20, 2017
by
Michael Kozono
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use bulk inserts
parent
17ce21d7
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
184 additions
and
71 deletions
+184
-71
lib/gitlab/background_migration/prepare_untracked_uploads.rb
lib/gitlab/background_migration/prepare_untracked_uploads.rb
+35
-20
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
...ab/background_migration/prepare_untracked_uploads_spec.rb
+145
-47
spec/migrations/track_untracked_uploads_spec.rb
spec/migrations/track_untracked_uploads_spec.rb
+4
-4
No files found.
lib/gitlab/background_migration/prepare_untracked_uploads.rb
View file @
edb5cac4
...
@@ -20,7 +20,19 @@ module Gitlab
...
@@ -20,7 +20,19 @@ module Gitlab
def
perform
def
perform
ensure_temporary_tracking_table_exists
ensure_temporary_tracking_table_exists
# Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since
# doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be
# slow, start with an empty table for Postgres < 9.5.
# That way we can do bulk inserts at ~30x the speed of individual
# inserts (~20 minutes worth of inserts at GitLab.com scale instead of
# ~10 hours).
# In all other cases, installations will get both bulk inserts and the
# ability for these jobs to retry without having to clear and reinsert.
clear_untracked_file_paths
unless
can_bulk_insert_and_ignore_duplicates?
store_untracked_file_paths
store_untracked_file_paths
schedule_populate_untracked_uploads_jobs
schedule_populate_untracked_uploads_jobs
end
end
...
@@ -44,6 +56,10 @@ module Gitlab
...
@@ -44,6 +56,10 @@ module Gitlab
end
end
end
end
def
clear_untracked_file_paths
UntrackedFile
.
delete_all
end
def
store_untracked_file_paths
def
store_untracked_file_paths
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
...
@@ -96,36 +112,35 @@ module Gitlab
...
@@ -96,36 +112,35 @@ module Gitlab
end
end
def
insert_file_paths
(
file_paths
)
def
insert_file_paths
(
file_paths
)
ActiveRecord
::
Base
.
transaction
do
sql
=
if
postgresql_pre_9_5?
file_paths
.
each
do
|
file_path
|
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
insert_file_path
(
file_path
)
elsif
postgresql?
end
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
ON CONFLICT DO NOTHING;"
end
else
# MySQL
end
"INSERT IGNORE INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
def
insert_file_path
(
file_path
)
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
if
postgresql_pre_9_5?
end
# No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails
return
UntrackedFile
.
where
(
path:
file_path
).
first_or_create
end
table_columns_and_values
=
'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)'
def
table_columns_and_values_for_insert
(
file_paths
)
timestamp
=
Time
.
now
.
utc
.
iso8601
sql
=
if
postgresql?
values
=
file_paths
.
map
do
|
file_path
|
"INSERT INTO
#{
table_columns_and_values
}
ON CONFLICT DO NOTHING;"
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?, ?, ?)'
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
else
end
.
join
(
', '
)
"INSERT IGNORE INTO
#{
table_columns_and_values
}
;"
end
timestamp
=
Time
.
now
.
utc
.
iso8601
"
#{
UntrackedFile
.
table_name
}
(path, created_at, updated_at) VALUES
#{
values
}
"
sql
=
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
sql
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
end
def
postgresql?
def
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
end
end
def
can_bulk_insert_and_ignore_duplicates?
!
postgresql_pre_9_5?
end
def
postgresql_pre_9_5?
def
postgresql_pre_9_5?
@postgresql_pre_9_5
||=
postgresql?
&&
@postgresql_pre_9_5
||=
postgresql?
&&
ActiveRecord
::
Base
.
connection
.
select_value
(
'SHOW server_version_num'
).
to_i
<
90500
ActiveRecord
::
Base
.
connection
.
select_value
(
'SHOW server_version_num'
).
to_i
<
90500
...
...
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
View file @
edb5cac4
...
@@ -53,80 +53,178 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :migration, :side
...
@@ -53,80 +53,178 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :migration, :side
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
end
end
context
'when files were uploaded before and after hashed storage was enabled'
do
context
"test bulk insert with ON CONFLICT DO NOTHING or IGNORE"
do
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
around
do
|
example
|
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
# If this is CI, we use Postgres 9.2 so this whole context should be
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
if
described_class
.
new
.
send
(
:can_bulk_insert_and_ignore_duplicates?
)
example
.
run
end
end
before
do
context
'when files were uploaded before and after hashed storage was enabled'
do
# Markdown upload before enabling hashed_storage
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
stub_application_setting
(
hashed_storage_enabled:
true
)
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
# Markdown upload after enabling hashed_storage
stub_application_setting
(
hashed_storage_enabled:
true
)
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
it
'adds unhashed files to the untracked_files_for_uploads table'
do
# Markdown upload after enabling hashed_storage
described_class
.
new
.
perform
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
it
'adds unhashed files to the untracked_files_for_uploads table'
do
end
described_class
.
new
.
perform
it
'adds files with paths relative to CarrierWave.root'
do
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
end
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
it
'adds files with paths relative to CarrierWave.root
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
end
end
it
'correctly schedules the follow-up background migration jobs
'
do
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
end
# E.g. from a previous failed run of this background migration
it
'correctly schedules the follow-up background migration jobs'
do
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
end
it
'does not error or produce duplicates of existing data'
do
# E.g. from a previous failed run of this background migration
expect
do
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
context
'test bulk insert without ON CONFLICT DO NOTHING or IGNORE'
do
before
do
# If this is CI, we use Postgres 9.2 so this stub has no effect.
#
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of
(
described_class
).
to
receive
(
:postgresql_pre_9_5?
).
and_return
(
true
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
context
'when files were uploaded before and after hashed storage was enabled'
do
# just uploaded a file
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
context
'when there are files in /uploads/tmp'
do
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
before
do
before
do
FileUtils
.
touch
(
tmp_file
)
# Markdown upload before enabling hashed_storage
end
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
after
do
stub_application_setting
(
hashed_storage_enabled:
true
)
FileUtils
.
rm
(
tmp_file
)
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
end
it
'
does not add files from /uploads/tmp
'
do
it
'
adds unhashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'does not add hashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
end
end
...
...
spec/migrations/track_untracked_uploads_spec.rb
View file @
edb5cac4
...
@@ -62,8 +62,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
...
@@ -62,8 +62,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/header_logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_header_logo_attributes
)
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/header_logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_header_logo_attributes
)
expect
(
user2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user2_avatar_attributes
)
expect
(
user2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user2_avatar_attributes
)
expect
(
project2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
uploads
.
la
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
expect
(
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
end
end
it
'ignores already-tracked uploads'
do
it
'ignores already-tracked uploads'
do
...
@@ -71,8 +71,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
...
@@ -71,8 +71,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_logo_attributes
)
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_logo_attributes
)
expect
(
user1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user1_avatar_attributes
)
expect
(
user1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user1_avatar_attributes
)
expect
(
project1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
uploads
.
la
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
expect
(
project1
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
end
end
it
'the temporary table untracked_files_for_uploads no longer exists'
do
it
'the temporary table untracked_files_for_uploads no longer exists'
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment