Commit 46d6c0b0 authored by Alexandru Croitor's avatar Alexandru Croitor

Fix fixture max inserts

As projects now have project_namespace_id column in order
to mass generate projects we also need to mass generate
corresponding project namespaces. Because we generate massive
number of projects and project namespaces and corresponding
routes and feature settings the transaction becomes a bit too
big which leads to timeouts or transaction taking too long to
commit, that is why splitting the generation of corresponding
associations(routes, features, etc) into separate files helps
make the transactions smaller.
parent 6fb14df7
......@@ -4,8 +4,8 @@ class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_NAMESPACES_COUNT = 100
MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
attr_reader :opts
def initialize(opts = {})
......@@ -15,6 +15,7 @@ class Gitlab::Seeder::Users
def seed!
Sidekiq::Testing.inline! do
create_mass_users!
create_mass_namespaces!
create_random_users!
end
end
......@@ -26,20 +27,22 @@ class Gitlab::Seeder::Users
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
INSERT INTO users (username, name, email, state, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{Gitlab::Seeder::MASS_INSERT_USER_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
'active',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
ON CONFLICT DO NOTHING;
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
Gitlab::Seeder.with_mass_insert(relation.count, 'user namespaces') do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id, type)
SELECT
......@@ -48,6 +51,16 @@ class Gitlab::Seeder::Users
id,
'User'
FROM users WHERE NOT admin
ON CONFLICT DO NOTHING;
SQL
end
Gitlab::Seeder.with_mass_insert(relation.count, "User namespaces routes") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (namespace_id, source_id, source_type, path, name)
SELECT id as namespace_id, id as source_id, 'Namespace', path, name
FROM namespaces WHERE type IS NULL OR type = 'User'
ON CONFLICT DO NOTHING;
SQL
end
......@@ -74,6 +87,97 @@ class Gitlab::Seeder::Users
end
end
def create_mass_namespaces!
Gitlab::Seeder.with_mass_insert(MASS_NAMESPACES_COUNT, "root namespaces and subgroups 9 levels deep") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, type)
SELECT
'mass insert group level 0 - ' || seq,
'#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_0_' || seq,
'Group'
FROM generate_series(1, #{MASS_NAMESPACES_COUNT}) AS seq
ON CONFLICT DO NOTHING;
SQL
(1..9).each do |idx|
count = Namespace.where("path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'").where(type: 'Group').count * 2
Gitlab::Seeder.log_message("Creating subgroups at level #{idx}: #{count}")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, type, parent_id)
SELECT
'mass insert group level #{idx} - ' || seq,
'#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_#{idx}_' || seq,
'Group',
namespaces.id
FROM namespaces
CROSS JOIN generate_series(1, 2) AS seq
WHERE namespaces.type='Group' AND namespaces.path like '#{Gitlab::Seeder::MASS_INSERT_GROUP_START}_#{idx-1}_%'
ON CONFLICT DO NOTHING;
SQL
end
Gitlab::Seeder.log_message("creating routes.")
ActiveRecord::Base.connection.execute <<~SQL
WITH RECURSIVE cte(source_id, namespace_id, parent_id, path, height) AS (
(
SELECT ARRAY[batch.id], batch.id, batch.parent_id, batch.path, 1
FROM
"namespaces" as batch
WHERE
"batch"."type" = 'Group' AND "batch"."parent_id" is null
)
UNION
(
SELECT array_append(cte.source_id, n.id), n.id, n.parent_id, cte.path || '/' || n.path, cte.height+1
FROM
"namespaces" as n,
"cte"
WHERE
"n"."type" = 'Group'
AND "n"."parent_id" = "cte"."namespace_id"
)
)
INSERT INTO routes (namespace_id, source_id, source_type, path, name)
SELECT cte.namespace_id as namespace_id, cte.namespace_id as source_id, 'Namespace', cte.path, cte.path FROM cte
ON CONFLICT DO NOTHING;
SQL
Gitlab::Seeder.log_message("filling traversal ids.")
ActiveRecord::Base.connection.execute <<~SQL
WITH RECURSIVE cte(source_id, namespace_id, parent_id) AS (
(
SELECT ARRAY[batch.id], batch.id, batch.parent_id
FROM
"namespaces" as batch
WHERE
"batch"."type" = 'Group' AND "batch"."parent_id" is null
)
UNION
(
SELECT array_append(cte.source_id, n.id), n.id, n.parent_id
FROM
"namespaces" as n,
"cte"
WHERE
"n"."type" = 'Group'
AND "n"."parent_id" = "cte"."namespace_id"
)
)
UPDATE namespaces
SET traversal_ids = computed.source_id FROM (SELECT namespace_id, source_id FROM cte) AS computed
where computed.namespace_id = namespaces.id AND namespaces.path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'
SQL
Gitlab::Seeder.log_message("creating namespace settings.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespace_settings(namespace_id, created_at, updated_at)
SELECT id, now(), now() FROM namespaces
ON CONFLICT DO NOTHING;
SQL
end
end
def random_password
@random_password ||= SecureRandom.hex.slice(0,16)
end
......
......@@ -53,14 +53,56 @@ class Gitlab::Seeder::Projects
public: 1 # 1m projects = 5m total
}
BATCH_SIZE = 100_000
def seed!
Sidekiq::Testing.inline! do
create_real_projects!
create_large_projects!
create_mass_projects!
end
end
def self.insert_project_namespaces_sql(type:, range:)
<<~SQL
INSERT INTO namespaces (name, path, parent_id, owner_id, type, visibility_level, created_at, updated_at)
SELECT
'Seed project ' || seq || ' ' || ('{#{Gitlab::Seeder::Projects.visibility_per_user}}'::text[])[seq] AS project_name,
'#{Gitlab::Seeder::MASS_INSERT_PROJECT_START}' || ('{#{Gitlab::Seeder::Projects.visibility_per_user}}'::text[])[seq] || '_' || seq AS namespace_path,
n.id AS parent_id,
n.owner_id AS owner_id,
'Project' AS type,
('{#{Gitlab::Seeder::Projects.visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM namespaces n
CROSS JOIN generate_series(1, #{Gitlab::Seeder::Projects.projects_per_user_count}) AS seq
WHERE type='#{type}' AND path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%'
AND n.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
def self.insert_projects_sql(type:, range:)
<<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, project_namespace_id, visibility_level, created_at, updated_at)
SELECT
n.name AS project_name,
n.path AS project_path,
n.owner_id AS creator_id,
n.parent_id AS namespace_id,
n.id AS project_namespace_id,
n.visibility_level AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM namespaces n
WHERE type = 'Project' AND n.parent_id IN (
SELECT id FROM namespaces n1 WHERE type='#{type}'
AND path LIKE '#{Gitlab::Seeder::MASS_INSERT_PREFIX}%' AND n1.id BETWEEN #{range.first} AND #{range.last}
)
ON CONFLICT DO NOTHING;
SQL
end
private
def create_real_projects!
......@@ -156,55 +198,26 @@ class Gitlab::Seeder::Projects
end
end
def create_mass_projects!
projects_per_user_count = MASS_PROJECTS_COUNT_PER_USER.values.sum
visibility_per_user = ['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
visibility_level_per_user = visibility_per_user.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
visibility_per_user = visibility_per_user.join(',')
visibility_level_per_user = visibility_level_per_user.join(',')
Gitlab::Seeder.with_mass_insert(User.count * projects_per_user_count, "Projects and relations") do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO projects (name, path, creator_id, namespace_id, visibility_level, created_at, updated_at)
SELECT
'Seed project ' || seq || ' ' || ('{#{visibility_per_user}}'::text[])[seq] AS project_name,
'#{Gitlab::Seeder::MASS_INSERT_PROJECT_START}' || ('{#{visibility_per_user}}'::text[])[seq] || '_' || seq AS project_path,
u.id AS user_id,
n.id AS namespace_id,
('{#{visibility_level_per_user}}'::int[])[seq] AS visibility_level,
NOW() AS created_at,
NOW() AS updated_at
FROM users u
CROSS JOIN generate_series(1, #{projects_per_user_count}) AS seq
JOIN namespaces n ON n.owner_id=u.id
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level,
pages_access_level)
SELECT
id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects ON CONFLICT (project_id) DO NOTHING;
SQL
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (source_id, source_type, name, path)
SELECT
p.id,
'Project',
u.name || ' / ' || p.name,
u.username || '/' || p.path
FROM projects p JOIN users u ON u.id=p.creator_id
ON CONFLICT (source_type, source_id) DO NOTHING;
SQL
end
def self.projects_per_user_count
MASS_PROJECTS_COUNT_PER_USER.values.sum
end
def self.visibility_per_user_array
['private'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:private) +
['internal'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:internal) +
['public'] * MASS_PROJECTS_COUNT_PER_USER.fetch(:public)
end
def self.visibility_level_per_user_map
visibility_per_user_array.map { |visibility| Gitlab::VisibilityLevel.level_value(visibility) }
end
def self.visibility_per_user
visibility_per_user_array.join(',')
end
def self.visibility_level_per_user
visibility_level_per_user_map.join(',')
end
end
......
# frozen_string_literal: true
class Gitlab::Seeder::UserProjects
def seed!
create_user_projects!
end
private
def create_user_projects!
user_namespaces = Namespace.where("path LIKE ?", "#{Gitlab::Seeder::MASS_INSERT_PREFIX}%").where(type: 'User')
Gitlab::Seeder.with_mass_insert(user_namespaces.count * Gitlab::Seeder::Projects.projects_per_user_count, "User projects and corresponding project namespaces") do
user_namespaces.each_batch(of: Gitlab::Seeder::Projects::BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * batch.size * Gitlab::Seeder::Projects.projects_per_user_count
Gitlab::Seeder.log_message("Creating project namespaces: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_project_namespaces_sql(type: 'User', range: range))
Gitlab::Seeder.log_message("Creating projects: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_projects_sql(type: 'User', range: range))
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::UserProjects.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::GroupProjects
def seed!
create_projects!
end
private
def create_projects!
groups = Namespace.where("path LIKE ?", "#{Gitlab::Seeder::MASS_INSERT_PREFIX}%").where(type: 'Group')
Gitlab::Seeder.with_mass_insert(groups.count * Gitlab::Seeder::Projects.projects_per_user_count, "Projects and corresponding project namespaces") do
groups.each_batch(of: Gitlab::Seeder::Projects::BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * batch.size * Gitlab::Seeder::Projects.projects_per_user_count
Gitlab::Seeder.log_message("Creating projects namespaces: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_project_namespaces_sql(type: 'Group', range: range))
Gitlab::Seeder.log_message("Creating projects: #{count}.")
ActiveRecord::Base.connection.execute(Gitlab::Seeder::Projects.insert_projects_sql(type: 'Group', range: range))
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::GroupProjects.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::ProjectFeatures
include ActionView::Helpers::NumberHelper
BATCH_SIZE = 100_000
def seed!
create_project_features!
end
def create_project_features!
Gitlab::Seeder.with_mass_insert(Project.count, "Project features") do
Project.each_batch(of: BATCH_SIZE) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * BATCH_SIZE
Gitlab::Seeder.log_message("Creating project features: #{count}.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_features (project_id, merge_requests_access_level, issues_access_level, wiki_access_level, pages_access_level)
SELECT
id,
#{ProjectFeature::ENABLED} AS merge_requests_access_level,
#{ProjectFeature::ENABLED} AS issues_access_level,
#{ProjectFeature::ENABLED} AS wiki_access_level,
#{ProjectFeature::ENABLED} AS pages_access_level
FROM projects
WHERE projects.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::ProjectFeatures.new
projects.seed!
end
# frozen_string_literal: true
class Gitlab::Seeder::ProjectRoutes
include ActionView::Helpers::NumberHelper
BATCH_SIZE = 100_000
def seed!
create_project_routes!
end
def create_project_routes!
Gitlab::Seeder.with_mass_insert(Project.count, "Project routes") do
Project.each_batch(of: BATCH_SIZE / 2) do |batch, index|
range = batch.pluck(Arel.sql('MIN(id)'), Arel.sql('MAX(id)')).first
count = index * BATCH_SIZE / 2
Gitlab::Seeder.log_message("Creating project routes: #{count}.")
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO routes (namespace_id, source_id, source_type, name, path)
SELECT
p.project_namespace_id as namespace_id,
p.id as source_id,
'Project',
routes.name || ' / ' || p.name,
routes.path || '/' || p.path
FROM projects p
INNER JOIN routes ON routes.source_id = p.namespace_id and source_type = 'Namespace'
WHERE p.id BETWEEN #{range.first} AND #{range.last}
ON CONFLICT DO NOTHING;
SQL
end
end
end
end
Gitlab::Seeder.quiet do
projects = Gitlab::Seeder::ProjectRoutes.new
projects.seed!
end
......@@ -37,13 +37,15 @@ class Gitlab::Seeder::ProjectLabels
end
Gitlab::Seeder.quiet do
puts "\nGenerating group labels"
Group.all.find_each do |group|
Gitlab::Seeder::GroupLabels.new(group).seed!
label_per_group = 10
puts "\nGenerating group labels: #{Group.not_mass_generated.count * label_per_group}"
Group.not_mass_generated.find_each do |group|
Gitlab::Seeder::GroupLabels.new(group, label_per_group: label_per_group).seed!
end
puts "\nGenerating project labels"
label_per_project = 5
puts "\nGenerating project labels: #{Project.not_mass_generated.count * label_per_project}"
Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed!
Gitlab::Seeder::ProjectLabels.new(project, label_per_project: label_per_project).seed!
end
end
......@@ -2,7 +2,7 @@ require './spec/support/sidekiq_middleware'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.'
......
......@@ -41,7 +41,7 @@ end
Gitlab::Seeder.quiet do
puts "\nGenerating group crm organizations and contacts"
Group.where('parent_id IS NULL').first(10).each do |group|
Group.not_mass_generated.where('parent_id IS NULL').first(10).each do |group|
Gitlab::Seeder::Crm.new(group).seed!
end
end
# frozen_string_literal: true
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
5.times do
epic_params = {
title: FFaker::Lorem.sentence(6),
......
# frozen_string_literal: true
Gitlab::Seeder.quiet do
groups = Group.take(5)
groups = Group.not_mass_generated.take(5)
next if groups.empty?
......
......@@ -9,7 +9,7 @@ module Iterations
end
Gitlab::Seeder.quiet do
Group.all.each do |group|
Group.not_mass_generated.each do |group|
cadences = []
1000.times do
random_number = rand(5)
......
......@@ -4,12 +4,24 @@ module Gitlab
class Seeder
extend ActionView::Helpers::NumberHelper
MASS_INSERT_PROJECT_START = 'mass_insert_project_'
MASS_INSERT_USER_START = 'mass_insert_user_'
MASS_INSERT_PREFIX = 'mass_insert'
MASS_INSERT_PROJECT_START = "#{MASS_INSERT_PREFIX}_project_"
MASS_INSERT_GROUP_START = "#{MASS_INSERT_PREFIX}_group_"
MASS_INSERT_USER_START = "#{MASS_INSERT_PREFIX}_user_"
REPORTED_USER_START = 'reported_user_'
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
ESTIMATED_INSERT_PER_MINUTE = 250_000
MASS_INSERT_ENV = 'MASS_INSERT'
module NamespaceSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{MASS_INSERT_GROUP_START}%'")
end
end
end
module ProjectSeed
extend ActiveSupport::Concern
......@@ -30,6 +42,10 @@ module Gitlab
end
end
def self.log_message(message)
puts "#{Time.current}: #{message}"
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
......@@ -63,6 +79,7 @@ module Gitlab
def self.quiet
# Additional seed logic for models.
Namespace.include(NamespaceSeed)
Project.include(ProjectSeed)
User.include(UserSeed)
......
......@@ -10,7 +10,12 @@ namespace :dev do
Gitlab::Database::EachDatabase.each_database_connection do |connection|
# Make sure DB statistics are up to date.
# gitlab:setup task can insert quite a bit of data, especially with MASS_INSERT=1
# so ANALYZE can take more than default 15s statement timeout. This being a dev task,
# we disable the statement timeout for ANALYZE to run and enable it back afterwards.
connection.execute('SET statement_timeout TO 0')
connection.execute('ANALYZE')
connection.execute('RESET statement_timeout')
end
Rake::Task["gitlab:shell:setup"].invoke
......
......@@ -3,6 +3,24 @@
require 'spec_helper'
RSpec.describe Gitlab::Seeder do
describe Namespace do
subject { described_class }
it 'has not_mass_generated scope' do
expect { Namespace.not_mass_generated }.to raise_error(NoMethodError)
Gitlab::Seeder.quiet do
expect { Namespace.not_mass_generated }.not_to raise_error
end
end
it 'includes NamespaceSeed module' do
Gitlab::Seeder.quiet do
is_expected.to include_module(Gitlab::Seeder::NamespaceSeed)
end
end
end
describe '.quiet' do
let(:database_base_models) do
{
......@@ -50,4 +68,13 @@ RSpec.describe Gitlab::Seeder do
notification_service.new_note(note)
end
end
describe '.log_message' do
it 'prepends timestamp to the logged message' do
freeze_time do
message = "some message."
expect { described_class.log_message(message) }.to output(/#{Time.current}: #{message}/).to_stdout
end
end
end
end
......@@ -17,7 +17,9 @@ RSpec.describe 'dev rake tasks' do
it 'sets up the development environment', :aggregate_failures do
expect(Rake::Task['gitlab:setup']).to receive(:invoke)
expect(connections).to all(receive(:execute).with('SET statement_timeout TO 0'))
expect(connections).to all(receive(:execute).with('ANALYZE'))
expect(connections).to all(receive(:execute).with('RESET statement_timeout'))
expect(Rake::Task['gitlab:shell:setup']).to receive(:invoke)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment