Commit 1c481b7a authored by Toon Claes's avatar Toon Claes

Enhance performance of counting local Uploads

Add an index to the `store` column on `uploads`. This makes counting
local uploads faster.

Also, there is no longer need to check for objects with `store = NULL`.
See https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/18557

---

### Query plans

Query:

```sql
SELECT COUNT(*)
FROM "uploads"
WHERE ("uploads"."store" = ? OR "uploads"."store" IS NULL)
```

#### Without index

```
gitlabhq_production=# EXPLAIN ANALYZE SELECT uploads.* FROM uploads WHERE (uploads.store = 1 OR uploads.store IS NULL);
                                                  QUERY PLAN
---------------------------------------------------------------------------------------------------------------
 Seq Scan on uploads  (cost=0.00..601729.54 rows=578 width=272) (actual time=6.170..2308.256 rows=545 loops=1)
   Filter: ((store = 1) OR (store IS NULL))
   Rows Removed by Filter: 4411957
 Planning time: 38.652 ms
 Execution time: 2308.454 ms
(5 rows)
```

#### Add index

```
gitlabhq_production=# create index uploads_tmp1 on uploads (store);
CREATE INDEX
```

#### With index

```
gitlabhq_production=# EXPLAIN ANALYZE SELECT uploads.* FROM uploads WHERE (uploads.store = 1 OR uploads.store IS NULL);
                                                          QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on uploads  (cost=11.46..1238.88 rows=574 width=272) (actual time=0.155..0.577 rows=545 loops=1)
   Recheck Cond: ((store = 1) OR (store IS NULL))
   Heap Blocks: exact=217
   ->  BitmapOr  (cost=11.46..11.46 rows=574 width=0) (actual time=0.116..0.116 rows=0 loops=1)
         ->  Bitmap Index Scan on uploads_tmp1  (cost=0.00..8.74 rows=574 width=0) (actual time=0.095..0.095 rows=545 loops=1)
               Index Cond: (store = 1)
         ->  Bitmap Index Scan on uploads_tmp1  (cost=0.00..2.44 rows=1 width=0) (actual time=0.020..0.020 rows=0 loops=1)
               Index Cond: (store IS NULL)
 Planning time: 0.274 ms
 Execution time: 0.637 ms
(10 rows)
```

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/6070
parent b1fae097
...@@ -11,7 +11,7 @@ class Upload < ActiveRecord::Base ...@@ -11,7 +11,7 @@ class Upload < ActiveRecord::Base
validates :model, presence: true validates :model, presence: true
validates :uploader, presence: true validates :uploader, presence: true
scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) } scope :with_files_stored_locally, -> { where(store: ObjectStorage::Store::LOCAL) }
before_save :calculate_checksum!, if: :foreground_checksummable? before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :checksummable? after_commit :schedule_checksum, if: :checksummable?
...@@ -57,8 +57,6 @@ class Upload < ActiveRecord::Base ...@@ -57,8 +57,6 @@ class Upload < ActiveRecord::Base
end end
def local? def local?
return true if store.nil?
store == ObjectStorage::Store::LOCAL store == ObjectStorage::Store::LOCAL
end end
......
---
title: Enhance performance of counting local Uploads
merge_request: 22522
author:
type: performance
# frozen_string_literal: true
class AddIndexToUploadsStore < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :uploads, :store
end
def down
remove_concurrent_index :uploads, :store
end
end
...@@ -2155,6 +2155,7 @@ ActiveRecord::Schema.define(version: 20181107054254) do ...@@ -2155,6 +2155,7 @@ ActiveRecord::Schema.define(version: 20181107054254) do
add_index "uploads", ["checksum"], name: "index_uploads_on_checksum", using: :btree add_index "uploads", ["checksum"], name: "index_uploads_on_checksum", using: :btree
add_index "uploads", ["model_id", "model_type"], name: "index_uploads_on_model_id_and_model_type", using: :btree add_index "uploads", ["model_id", "model_type"], name: "index_uploads_on_model_id_and_model_type", using: :btree
add_index "uploads", ["store"], name: "index_uploads_on_store", using: :btree
add_index "uploads", ["uploader", "path"], name: "index_uploads_on_uploader_and_path", using: :btree add_index "uploads", ["uploader", "path"], name: "index_uploads_on_uploader_and_path", using: :btree
create_table "user_agent_details", force: :cascade do |t| create_table "user_agent_details", force: :cascade do |t|
......
...@@ -21,7 +21,8 @@ describe Upload do ...@@ -21,7 +21,8 @@ describe Upload do
path: __FILE__, path: __FILE__,
size: described_class::CHECKSUM_THRESHOLD + 1.kilobyte, size: described_class::CHECKSUM_THRESHOLD + 1.kilobyte,
model: build_stubbed(:user), model: build_stubbed(:user),
uploader: double('ExampleUploader') uploader: double('ExampleUploader'),
store: ObjectStorage::Store::LOCAL
) )
expect(UploadChecksumWorker) expect(UploadChecksumWorker)
...@@ -35,7 +36,8 @@ describe Upload do ...@@ -35,7 +36,8 @@ describe Upload do
path: __FILE__, path: __FILE__,
size: described_class::CHECKSUM_THRESHOLD, size: described_class::CHECKSUM_THRESHOLD,
model: build_stubbed(:user), model: build_stubbed(:user),
uploader: double('ExampleUploader') uploader: double('ExampleUploader'),
store: ObjectStorage::Store::LOCAL
) )
expect { upload.save } expect { upload.save }
...@@ -60,7 +62,7 @@ describe Upload do ...@@ -60,7 +62,7 @@ describe Upload do
describe '#absolute_path' do describe '#absolute_path' do
it 'returns the path directly when already absolute' do it 'returns the path directly when already absolute' do
path = '/path/to/namespace/project/secret/file.jpg' path = '/path/to/namespace/project/secret/file.jpg'
upload = described_class.new(path: path) upload = described_class.new(path: path, store: ObjectStorage::Store::LOCAL)
expect(upload).not_to receive(:uploader_class) expect(upload).not_to receive(:uploader_class)
...@@ -69,7 +71,7 @@ describe Upload do ...@@ -69,7 +71,7 @@ describe Upload do
it "delegates to the uploader's absolute_path method" do it "delegates to the uploader's absolute_path method" do
uploader = spy('FakeUploader') uploader = spy('FakeUploader')
upload = described_class.new(path: 'secret/file.jpg') upload = described_class.new(path: 'secret/file.jpg', store: ObjectStorage::Store::LOCAL)
expect(upload).to receive(:uploader_class).and_return(uploader) expect(upload).to receive(:uploader_class).and_return(uploader)
upload.absolute_path upload.absolute_path
...@@ -81,7 +83,8 @@ describe Upload do ...@@ -81,7 +83,8 @@ describe Upload do
describe '#calculate_checksum!' do describe '#calculate_checksum!' do
let(:upload) do let(:upload) do
described_class.new(path: __FILE__, described_class.new(path: __FILE__,
size: described_class::CHECKSUM_THRESHOLD - 1.megabyte) size: described_class::CHECKSUM_THRESHOLD - 1.megabyte,
store: ObjectStorage::Store::LOCAL)
end end
it 'sets `checksum` to SHA256 sum of the file' do it 'sets `checksum` to SHA256 sum of the file' do
...@@ -104,13 +107,13 @@ describe Upload do ...@@ -104,13 +107,13 @@ describe Upload do
describe '#exist?' do describe '#exist?' do
it 'returns true when the file exists' do it 'returns true when the file exists' do
upload = described_class.new(path: __FILE__) upload = described_class.new(path: __FILE__, store: ObjectStorage::Store::LOCAL)
expect(upload).to exist expect(upload).to exist
end end
it 'returns false when the file does not exist' do it 'returns false when the file does not exist' do
upload = described_class.new(path: "#{__FILE__}-nope") upload = described_class.new(path: "#{__FILE__}-nope", store: ObjectStorage::Store::LOCAL)
expect(upload).not_to exist expect(upload).not_to exist
end end
......
...@@ -38,14 +38,6 @@ describe 'gitlab:uploads:migrate rake tasks' do ...@@ -38,14 +38,6 @@ describe 'gitlab:uploads:migrate rake tasks' do
let!(:projects) { create_list(:project, 10, :with_avatar) } let!(:projects) { create_list(:project, 10, :with_avatar) }
it_behaves_like 'enqueue jobs in batch', batch: 4 it_behaves_like 'enqueue jobs in batch', batch: 4
context 'Upload has store = nil' do
before do
Upload.where(model: projects).update_all(store: nil)
end
it_behaves_like 'enqueue jobs in batch', batch: 4
end
end end
context "for Group" do context "for Group" do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment