Commit 7ae15546 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '28149-improve-seed' into 'master'

Seed dev database with massive amount of Users, Projects and its relations

Closes #17211

See merge request gitlab-org/gitlab!16700
parents 4f438c5a e3dc3bfc
# frozen_string_literal: true
class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
MASS_INSERT_USERNAME_START = 'mass_insert_user_'
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_mass_users!
create_random_users!
end
end
private
def create_mass_users!
encrypted_password = Devise::Encryptor.digest(User, '12345678')
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{MASS_INSERT_USERNAME_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id)
SELECT
username,
username,
id
FROM users WHERE NOT admin
SQL
end
end
def create_random_users!
RANDOM_USERS_COUNT.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
end
Gitlab::Seeder.quiet do
users = Gitlab::Seeder::Users.new
users.seed!
end
This diff is collapsed.
......@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do
end
puts "\nGenerating project labels"
Project.all.find_each do |project|
Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed!
end
end
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
20.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
5.times do |i|
begin
User.create!(
username: "user#{i}",
name: "User #{i}",
email: "user#{i}@example.com",
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
......@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
Group.all.each do |group|
User.all.sample(4).each do |user|
User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.'
else
......@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do
end
end
Project.all.each do |project|
User.all.sample(4).each do |user|
Project.not_mass_generated.each do |project|
User.not_mass_generated.sample(4).each do |user|
if project.add_role(user, Gitlab::Access.sym_options.keys.sample)
print '.'
else
......
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
Project.all.each do |project|
Project.not_mass_generated.each do |project|
5.times do |i|
milestone_params = {
title: "v#{i}.0",
......
......@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do
# Limit the number of merge requests per project to avoid long seeds
MAX_NUM_MERGE_REQUESTS = 10
Project.non_archived.with_merge_requests_enabled.reject(&:empty_repo?).each do |project|
projects = Project
.non_archived
.with_merge_requests_enabled
.not_mass_generated
.reject(&:empty_repo?)
projects.each do |project|
branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2)
branches.each do |branch_name|
......
......@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do
# that it falls under `Sidekiq::Testing.disable!`.
Key.skip_callback(:commit, :after, :add_to_shell)
User.first(10).each do |user|
User.not_mass_generated.first(10).each do |user|
key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0="
key = user.keys.create(
......
......@@ -25,7 +25,7 @@ end
eos
50.times do |i|
user = User.all.sample
user = User.not_mass_generated.sample
PersonalSnippet.seed(:id, [{
id: i,
......
......@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project_builds = Gitlab::Seeder::Pipelines.new(project)
project_builds.seed!
end
......
......@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
admin_user = User.find(1)
Project.all.each do |project|
Project.not_mass_generated.each do |project|
params = {
name: 'master'
}
......
......@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do
flag = 'SEED_CYCLE_ANALYTICS'
if ENV[flag]
Project.find_each do |project|
Project.not_mass_generated.find_each do |project|
# This seed naively assumes that every project has a repository, and every
# repository has a `master` branch, which may be the case for a pristine
# GDK seed, but is almost never true for a GDK that's actually had
......
......@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project_environments = Gitlab::Seeder::Environments.new(project)
project_environments.seed!
end
......
......@@ -22,7 +22,7 @@ module Db
end
def self.random_user
User.find(User.pluck(:id).sample)
User.find(User.not_mass_generated.pluck(:id).sample)
end
end
end
......
......@@ -2,8 +2,8 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do
User.all.sample(10).each do |user|
source_project = Project.public_only.sample
User.not_mass_generated.sample(10).each do |user|
source_project = Project.not_mass_generated.public_only.sample
##
# 03_project.rb might not have created a public project because
......
......@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`.
This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database.
Note: `db:setup` calls `db:seed` but this does nothing.
### Env variables
**MASS_INSERT**: Create millions of users (2m), projects (5m) and its
relations. It's highly recommended to run the seed with it to catch slow queries
while developing. Expect the process to take up to 20 extra minutes.
**LARGE_PROJECTS**: Create large projects (through import) from a predefined set of urls.
### Seeding issues for all or a given project
You can seed issues for all or a given project with the `gitlab:seed:issues`
......
......@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do
seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed!
else
Project.all.each do |project|
Project.not_mass_generated.each do |project|
seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed!
end
......
......@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities
end
Gitlab::Seeder.quiet do
Project.joins(:ci_pipelines).distinct.all.sample(5).each do |project|
Project.joins(:ci_pipelines).not_mass_generated.distinct.all.sample(5).each do |project|
seeder = Gitlab::Seeder::Vulnerabilities.new(project)
seeder.seed!
end
......
# frozen_string_literal: true
# EE fixture
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
project.ci_pipelines.all.sample(2).each do |pipeline|
next if pipeline.source_pipeline
......
......@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages
end
Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project|
Project.not_mass_generated.sample(5).each do |project|
Gitlab::Seeder::Packages.new(project.owner, project).seed
end
end
......@@ -14,7 +14,71 @@ end
module Gitlab
class Seeder
extend ActionView::Helpers::NumberHelper
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
MASS_INSERT_ENV = 'MASS_INSERT'
module ProjectSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{Gitlab::Seeder::Projects::MASS_INSERT_NAME_START}%'")
end
end
end
module UserSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("username LIKE '#{Gitlab::Seeder::Users::MASS_INSERT_USERNAME_START}%'")
end
end
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
if !ENV[MASS_INSERT_ENV] && !ENV['CI']
puts "\nSkipping mass insertion for #{humanized_model_name}."
puts "Consider running the seed with #{MASS_INSERT_ENV}=1"
return
end
humanized_size = number_with_delimiter(size)
estimative = estimated_time_message(size)
puts "\nCreating #{humanized_size} #{humanized_model_name}."
puts estimative
yield
puts "\n#{number_with_delimiter(size)} #{humanized_model_name} created!"
end
def self.estimated_time_message(size)
estimated_minutes = (size.to_f / ESTIMATED_INSERT_PER_MINUTE).round
humanized_minutes = 'minute'.pluralize(estimated_minutes)
if estimated_minutes.zero?
"Rough estimated time: less than a minute ⏰"
else
"Rough estimated time: #{estimated_minutes} #{humanized_minutes} ⏰"
end
end
def self.quiet
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
# Additional seed logic for models.
Project.include(ProjectSeed)
User.include(UserSeed)
mute_notifications
mute_mailer
......@@ -23,6 +87,7 @@ module Gitlab
yield
SeedFu.quiet = false
ActiveRecord::Base.logger = old_logger
puts "\nOK".color(:green)
end
......
......@@ -5,6 +5,10 @@ namespace :dev do
task setup: :environment do
ENV['force'] = 'yes'
Rake::Task["gitlab:setup"].invoke
# Make sure DB statistics are up to date.
ActiveRecord::Base.connection.execute('ANALYZE')
Rake::Task["gitlab:shell:setup"].invoke
end
......
......@@ -22,7 +22,7 @@ namespace :gitlab do
[project]
else
Project.find_each
Project.not_mass_generated.find_each
end
projects.each do |project|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment