Commit 7ce78e6c authored by David Fernandez's avatar David Fernandez

Bulk insert package dependencies with upsert

Handle the case where bulk insert encounters a conflict and no ids are
returned.
parent 19f5184e
...@@ -10,7 +10,7 @@ class Packages::Dependency < ApplicationRecord ...@@ -10,7 +10,7 @@ class Packages::Dependency < ApplicationRecord
MAX_STRING_LENGTH = 255.freeze MAX_STRING_LENGTH = 255.freeze
MAX_CHUNKED_QUERIES_COUNT = 10.freeze MAX_CHUNKED_QUERIES_COUNT = 10.freeze
def self.for_package_names_and_version_patterns(names_and_version_patterns = {}, chunk_size = 50, max_rows_limit = 200) def self.ids_for_package_names_and_version_patterns(names_and_version_patterns = {}, chunk_size = 50, max_rows_limit = 200)
names_and_version_patterns.reject! { |key, value| key.size > MAX_STRING_LENGTH || value.size > MAX_STRING_LENGTH } names_and_version_patterns.reject! { |key, value| key.size > MAX_STRING_LENGTH || value.size > MAX_STRING_LENGTH }
raise ArgumentError, 'Too many names_and_version_patterns' if names_and_version_patterns.size > MAX_CHUNKED_QUERIES_COUNT * chunk_size raise ArgumentError, 'Too many names_and_version_patterns' if names_and_version_patterns.size > MAX_CHUNKED_QUERIES_COUNT * chunk_size
...@@ -26,9 +26,15 @@ class Packages::Dependency < ApplicationRecord ...@@ -26,9 +26,15 @@ class Packages::Dependency < ApplicationRecord
raise ArgumentError, 'Too many Dependencies selected' if matched_ids.size > max_rows_limit raise ArgumentError, 'Too many Dependencies selected' if matched_ids.size > max_rows_limit
end end
return none if matched_ids.empty? matched_ids
end
def self.for_package_names_and_version_patterns(names_and_version_patterns = {}, chunk_size = 50, max_rows_limit = 200)
ids = ids_for_package_names_and_version_patterns(names_and_version_patterns, chunk_size, max_rows_limit)
return none if ids.empty?
where(id: matched_ids) id_in(ids)
end end
def self.pluck_ids_and_names def self.pluck_ids_and_names
......
...@@ -19,12 +19,12 @@ module Packages ...@@ -19,12 +19,12 @@ module Packages
def create_dependency(type) def create_dependency(type)
return unless dependencies.key?(type) return unless dependencies.key?(type)
names_and_version_patterns = dependencies[type].to_a names_and_version_patterns = dependencies[type]
existing_ids, existing_names = find_existing_ids_and_names(names_and_version_patterns) existing_ids, existing_names = find_existing_ids_and_names(names_and_version_patterns)
dependencies_to_insert = names_and_version_patterns dependencies_to_insert = names_and_version_patterns
if existing_names.any? if existing_names.any?
dependencies_to_insert = names_and_version_patterns.reject { |e| e.first.in?(existing_names) } dependencies_to_insert = names_and_version_patterns.reject { |k, _| k.in?(existing_names) }
end end
ActiveRecord::Base.transaction do ActiveRecord::Base.transaction do
...@@ -51,7 +51,16 @@ module Packages ...@@ -51,7 +51,16 @@ module Packages
} }
end end
database.bulk_insert(Packages::Dependency.table_name, rows, return_ids: true) ids = database.bulk_insert(Packages::Dependency.table_name, rows, return_ids: true, on_conflict: :do_nothing)
return ids if ids.size == names_and_version_patterns.size
Packages::Dependency.uncached do
# The bulk_insert statement above do not dirty the query cache. To make
# sure that the results are fresh from the database and not from a stalled
# and potentially wrong cache, this query has to be done with the query
# chache disabled.
Packages::Dependency.ids_for_package_names_and_version_patterns(names_and_version_patterns)
end
end end
def bulk_insert_package_dependency_links(type, dependency_ids) def bulk_insert_package_dependency_links(type, dependency_ids)
......
...@@ -14,16 +14,17 @@ RSpec.describe Packages::Dependency, type: :model do ...@@ -14,16 +14,17 @@ RSpec.describe Packages::Dependency, type: :model do
it { is_expected.to validate_uniqueness_of(:name).scoped_to(:version_pattern) } it { is_expected.to validate_uniqueness_of(:name).scoped_to(:version_pattern) }
end end
describe '.for_package_names_and_version_patterns' do describe '.ids_for_package_names_and_version_patterns' do
let_it_be(:package_dependency1) { create(:packages_dependency, name: 'foo', version_pattern: '~1.0.0') } let_it_be(:package_dependency1) { create(:packages_dependency, name: 'foo', version_pattern: '~1.0.0') }
let_it_be(:package_dependency2) { create(:packages_dependency, name: 'bar', version_pattern: '~2.5.0') } let_it_be(:package_dependency2) { create(:packages_dependency, name: 'bar', version_pattern: '~2.5.0') }
let_it_be(:expected_ids) { [package_dependency1.id, package_dependency2.id] }
let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2) } let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2) }
let(:chunk_size) { 50 } let(:chunk_size) { 50 }
let(:rows_limit) { 50 } let(:rows_limit) { 50 }
subject { Packages::Dependency.for_package_names_and_version_patterns(names_and_version_patterns, chunk_size, rows_limit) } subject { Packages::Dependency.ids_for_package_names_and_version_patterns(names_and_version_patterns, chunk_size, rows_limit) }
it { is_expected.to match_array([package_dependency1, package_dependency2]) } it { is_expected.to match_array(expected_ids) }
context 'with unknown names' do context 'with unknown names' do
let(:names_and_version_patterns) { { unknown: '~1.0.0' } } let(:names_and_version_patterns) { { unknown: '~1.0.0' } }
...@@ -41,14 +42,14 @@ RSpec.describe Packages::Dependency, type: :model do ...@@ -41,14 +42,14 @@ RSpec.describe Packages::Dependency, type: :model do
let_it_be(:big_name) { 'a' * (Packages::Dependency::MAX_STRING_LENGTH + 1) } let_it_be(:big_name) { 'a' * (Packages::Dependency::MAX_STRING_LENGTH + 1) }
let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2).merge(big_name => '~1.0.0') } let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2).merge(big_name => '~1.0.0') }
it { is_expected.to match_array([package_dependency1, package_dependency2]) } it { is_expected.to match_array(expected_ids) }
end end
context 'with a version pattern bigger than column size' do context 'with a version pattern bigger than column size' do
let_it_be(:big_version_pattern) { 'a' * (Packages::Dependency::MAX_STRING_LENGTH + 1) } let_it_be(:big_version_pattern) { 'a' * (Packages::Dependency::MAX_STRING_LENGTH + 1) }
let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2).merge('test' => big_version_pattern) } let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2).merge('test' => big_version_pattern) }
it { is_expected.to match_array([package_dependency1, package_dependency2]) } it { is_expected.to match_array(expected_ids) }
end end
context 'with too big parameter' do context 'with too big parameter' do
...@@ -64,12 +65,13 @@ RSpec.describe Packages::Dependency, type: :model do ...@@ -64,12 +65,13 @@ RSpec.describe Packages::Dependency, type: :model do
let_it_be(:package_dependency5) { create(:packages_dependency, name: 'foo5', version_pattern: '~1.5.5') } let_it_be(:package_dependency5) { create(:packages_dependency, name: 'foo5', version_pattern: '~1.5.5') }
let_it_be(:package_dependency6) { create(:packages_dependency, name: 'foo6', version_pattern: '~1.5.6') } let_it_be(:package_dependency6) { create(:packages_dependency, name: 'foo6', version_pattern: '~1.5.6') }
let_it_be(:package_dependency7) { create(:packages_dependency, name: 'foo7', version_pattern: '~1.5.7') } let_it_be(:package_dependency7) { create(:packages_dependency, name: 'foo7', version_pattern: '~1.5.7') }
let(:expected_ids) { [package_dependency1.id, package_dependency2.id, package_dependency3.id, package_dependency4.id, package_dependency5.id, package_dependency6.id, package_dependency7.id] }
let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2, package_dependency3, package_dependency4, package_dependency5, package_dependency6, package_dependency7) } let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2, package_dependency3, package_dependency4, package_dependency5, package_dependency6, package_dependency7) }
context 'above the chunk size' do context 'above the chunk size' do
let(:chunk_size) { 2 } let(:chunk_size) { 2 }
it { is_expected.to match_array([package_dependency1, package_dependency2, package_dependency3, package_dependency4, package_dependency5, package_dependency6, package_dependency7]) } it { is_expected.to match_array(expected_ids) }
end end
context 'selecting too many rows' do context 'selecting too many rows' do
...@@ -78,11 +80,34 @@ RSpec.describe Packages::Dependency, type: :model do ...@@ -78,11 +80,34 @@ RSpec.describe Packages::Dependency, type: :model do
it { expect { subject }.to raise_error(ArgumentError, 'Too many Dependencies selected') } it { expect { subject }.to raise_error(ArgumentError, 'Too many Dependencies selected') }
end end
end end
end
def build_names_and_version_patterns(*package_dependencies) describe '.for_package_names_and_version_patterns' do
result = Hash.new { |h, dependency| h[dependency.name] = dependency.version_pattern } let_it_be(:package_dependency1) { create(:packages_dependency, name: 'foo', version_pattern: '~1.0.0') }
package_dependencies.each { |dependency| result[dependency] } let_it_be(:package_dependency2) { create(:packages_dependency, name: 'bar', version_pattern: '~2.5.0') }
result let_it_be(:expected_array) { [package_dependency1, package_dependency2] }
let(:names_and_version_patterns) { build_names_and_version_patterns(package_dependency1, package_dependency2) }
subject { Packages::Dependency.for_package_names_and_version_patterns(names_and_version_patterns) }
it { is_expected.to match_array(expected_array) }
context 'with unknown names' do
let(:names_and_version_patterns) { { unknown: '~1.0.0' } }
it { is_expected.to be_empty }
end
context 'with unknown version patterns' do
let(:names_and_version_patterns) { { 'foo' => '~1.0.0beta' } }
it { is_expected.to be_empty }
end end
end end
def build_names_and_version_patterns(*package_dependencies)
result = Hash.new { |h, dependency| h[dependency.name] = dependency.version_pattern }
package_dependencies.each { |dependency| result[dependency] }
result
end
end end
...@@ -24,6 +24,11 @@ describe Packages::CreateDependencyService do ...@@ -24,6 +24,11 @@ describe Packages::CreateDependencyService do
subject { described_class.new(package, dependencies).execute } subject { described_class.new(package, dependencies).execute }
it 'creates dependencies and links' do it 'creates dependencies and links' do
expect(Packages::Dependency)
.to receive(:ids_for_package_names_and_version_patterns)
.once
.and_call_original
expect { subject } expect { subject }
.to change { Packages::Dependency.count }.by(1) .to change { Packages::Dependency.count }.by(1)
.and change { Packages::DependencyLink.count }.by(1) .and change { Packages::DependencyLink.count }.by(1)
...@@ -35,6 +40,11 @@ describe Packages::CreateDependencyService do ...@@ -35,6 +40,11 @@ describe Packages::CreateDependencyService do
let(:json_file) { 'npm/payload_with_duplicated_packages.json' } let(:json_file) { 'npm/payload_with_duplicated_packages.json' }
it 'creates dependencies and links' do it 'creates dependencies and links' do
expect(Packages::Dependency)
.to receive(:ids_for_package_names_and_version_patterns)
.exactly(5).times
.and_call_original
expect { subject } expect { subject }
.to change { Packages::Dependency.count }.by(4) .to change { Packages::Dependency.count }.by(4)
.and change { Packages::DependencyLink.count }.by(7) .and change { Packages::DependencyLink.count }.by(7)
...@@ -43,6 +53,29 @@ describe Packages::CreateDependencyService do ...@@ -43,6 +53,29 @@ describe Packages::CreateDependencyService do
end end
end end
context 'with dependencies bulk insert conflicts' do
let_it_be(:rows) { [{ name: 'express', version_pattern: '^4.16.4' }] }
it 'creates dependences and links' do
original_bulk_insert = ::Gitlab::Database.method(:bulk_insert)
expect(::Gitlab::Database)
.to receive(:bulk_insert) do |table, rows, return_ids: false, disable_quote: [], on_conflict: nil|
call_count = table == Packages::Dependency.table_name ? 2 : 1
call_count.times { original_bulk_insert.call(table, rows, return_ids: return_ids, disable_quote: disable_quote, on_conflict: on_conflict) }
end.twice
expect(Packages::Dependency)
.to receive(:ids_for_package_names_and_version_patterns)
.twice
.and_call_original
expect { subject }
.to change { Packages::Dependency.count }.by(1)
.and change { Packages::DependencyLink.count }.by(1)
expect(dependency_names).to match_array(%w(express))
expect(dependency_link_types).to match_array(%w(dependencies))
end
end
context 'with existing dependencies' do context 'with existing dependencies' do
let(:other_package) { create(:npm_package) } let(:other_package) { create(:npm_package) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment