Commit e3dc3bfc authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Seed data for users, projects and its relations

It handles the very basics for batch inserting
records using the seed database, also showing
time estimatives.

It leverages generate_series() (from postgres) and
Gitlab::Database.bulk_insert methods for
inserting a great amount of users, projects, routes
and namespaces.
parent 9da64b22
...@@ -336,7 +336,6 @@ group :development do ...@@ -336,7 +336,6 @@ group :development do
gem 'letter_opener_web', '~> 1.3.4' gem 'letter_opener_web', '~> 1.3.4'
gem 'rblineprof', '~> 0.3.6', platform: :mri, require: false gem 'rblineprof', '~> 0.3.6', platform: :mri, require: false
gem 'active_record-pg_generate_series', '~> 0.1.2'
# Better errors handler # Better errors handler
gem 'better_errors', '~> 2.5.0' gem 'better_errors', '~> 2.5.0'
......
...@@ -29,8 +29,6 @@ GEM ...@@ -29,8 +29,6 @@ GEM
erubi (~> 1.4) erubi (~> 1.4)
rails-dom-testing (~> 2.0) rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.0.3) rails-html-sanitizer (~> 1.0, >= 1.0.3)
active_record-pg_generate_series (0.1.3)
activerecord
activejob (5.2.3) activejob (5.2.3)
activesupport (= 5.2.3) activesupport (= 5.2.3)
globalid (>= 0.3.6) globalid (>= 0.3.6)
...@@ -1090,7 +1088,6 @@ DEPENDENCIES ...@@ -1090,7 +1088,6 @@ DEPENDENCIES
RedCloth (~> 4.3.2) RedCloth (~> 4.3.2)
ace-rails-ap (~> 4.1.0) ace-rails-ap (~> 4.1.0)
acme-client (~> 2.0.2) acme-client (~> 2.0.2)
active_record-pg_generate_series (~> 0.1.2)
activerecord-explain-analyze (~> 0.1) activerecord-explain-analyze (~> 0.1)
acts-as-taggable-on (~> 6.0) acts-as-taggable-on (~> 6.0)
addressable (~> 2.5.2) addressable (~> 2.5.2)
......
# frozen_string_literal: true
class Gitlab::Seeder::Users class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20 RANDOM_USERS_COUNT = 20
MASS_USERS_COUNT = 1_500_000 MASS_USERS_COUNT = ENV['CI'] ? 10 : 1_000_000
MASS_INSERT_USERNAME_START = 'mass_insert_user_'
attr_reader :opts attr_reader :opts
...@@ -12,13 +15,43 @@ class Gitlab::Seeder::Users ...@@ -12,13 +15,43 @@ class Gitlab::Seeder::Users
def seed! def seed!
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
create_random_users!
create_mass_users! create_mass_users!
create_random_users!
end end
end end
private private
def create_mass_users!
encrypted_password = Devise::Encryptor.digest(User, '12345678')
Gitlab::Seeder.with_mass_insert(MASS_USERS_COUNT, User) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO users (username, name, email, confirmed_at, projects_limit, encrypted_password)
SELECT
'#{MASS_INSERT_USERNAME_START}' || seq,
'Seed user ' || seq,
'seed_user' || seq || '@example.com',
to_timestamp(seq),
#{MASS_USERS_COUNT},
'#{encrypted_password}'
FROM generate_series(1, #{MASS_USERS_COUNT}) AS seq
SQL
end
relation = User.where(admin: false)
Gitlab::Seeder.with_mass_insert(relation.count, Namespace) do
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO namespaces (name, path, owner_id)
SELECT
username,
username,
id
FROM users WHERE NOT admin
SQL
end
end
def create_random_users! def create_random_users!
RANDOM_USERS_COUNT.times do |i| RANDOM_USERS_COUNT.times do |i|
begin begin
...@@ -36,26 +69,6 @@ class Gitlab::Seeder::Users ...@@ -36,26 +69,6 @@ class Gitlab::Seeder::Users
end end
end end
end end
def create_mass_users!
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
encrypted_password = Devise::Encryptor.digest(User, '12345678')
User.insert_using_generate_series(1, MASS_USERS_COUNT, debug: true) do |sql|
sql.username = raw("'user' || seq")
sql.name = raw("'User ' || seq")
sql.email = raw("'user' || seq || '@example.com'")
sql.confirmed_at = raw("('1388530801'::timestamp + seq)::date") # 2014-01-01
sql.encrypted_password = encrypted_password
end
puts "\n#{number_with_delimiter(MASS_USERS_COUNT)} users created!"
# Reset logging
ActiveRecord::Base.logger = old_logger
end
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
......
This diff is collapsed.
...@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do ...@@ -43,7 +43,7 @@ Gitlab::Seeder.quiet do
end end
puts "\nGenerating project labels" puts "\nGenerating project labels"
Project.all.find_each do |project| Project.not_mass_generated.find_each do |project|
Gitlab::Seeder::ProjectLabels.new(project).seed! Gitlab::Seeder::ProjectLabels.new(project).seed!
end end
end end
require './spec/support/sidekiq'
class Gitlab::Seeder::Projects
include ActionView::Helpers::NumberHelper
PROJECT_URLS = [
'https://gitlab.com/gitlab-org/gitlab-test.git',
'https://gitlab.com/gitlab-org/gitlab-ce.git',
'https://gitlab.com/gitlab-org/gitlab-ci.git',
'https://gitlab.com/gitlab-org/gitlab-shell.git',
'https://github.com/documentcloud/underscore.git',
'https://github.com/twitter/flight.git',
'https://github.com/twitter/typeahead.js.git',
'https://github.com/h5bp/html5-boilerplate.git',
'https://github.com/google/material-design-lite.git',
'https://github.com/jlevy/the-art-of-command-line.git',
'https://github.com/FreeCodeCamp/freecodecamp.git',
'https://github.com/google/deepdream.git',
'https://github.com/jtleek/datasharing.git',
'https://github.com/WebAssembly/design.git',
'https://github.com/airbnb/javascript.git',
'https://github.com/tessalt/echo-chamber-js.git',
'https://github.com/atom/atom.git',
'https://github.com/mattermost/platform.git',
'https://github.com/purifycss/purifycss.git',
'https://github.com/facebook/nuclide.git',
'https://github.com/wbkd/awesome-d3.git',
'https://github.com/kilimchoi/engineering-blogs.git',
'https://github.com/gilbarbara/logos.git',
'https://github.com/gaearon/redux.git',
'https://github.com/awslabs/s2n.git',
'https://github.com/arkency/reactjs_koans.git',
'https://github.com/twbs/bootstrap.git',
'https://github.com/chjj/ttystudio.git',
'https://github.com/DrBoolean/mostly-adequate-guide.git',
'https://github.com/octocat/Spoon-Knife.git',
'https://github.com/opencontainers/runc.git',
'https://github.com/googlesamples/android-topeka.git'
]
MASS_PROJECTS_COUNT = {
private: 2_000_000,
internal: 30_000,
public: 265_000
}
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_real_projects!(opts[:count])
create_mass_projects!
end
end
private
def create_real_projects!(count)
PROJECT_URLS.first(count).each_with_index do |url, i|
group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
project_path.gsub!(".git", "")
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample
}
project = ::Projects::CreateService.new(User.first, params).execute
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
if project.valid? && project.valid_repo?
print '.'
else
puts project.errors.full_messages
print 'F'
end
end
end
def create_mass_projects!
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
create_mass_projects_by_visility!(:private)
create_mass_projects_by_visility!(:internal)
create_mass_projects_by_visility!(:public)
# Reset logging
ActiveRecord::Base.logger = old_logger
end
def create_mass_projects_by_visility!(visibility)
users = User.limit(100)
groups = Group.limit(100)
namespaces = users + groups
Project.insert_using_generate_series(1, MASS_PROJECTS_COUNT[visibility], debug: true) do |sql|
project_name = raw("'seed_#{visibility}_project_' || seq")
namespace = namespaces.take
sql.name = project_name
sql.path = project_name
sql.creator_id = namespace.is_a?(Group) ? namespace.owner_id : users.take.id
sql.namespace_id = namespace.is_a?(Group) ? namespace.id : namespace.namespace_id
sql.visibility_level = Gitlab::VisibilityLevel.level_value(visibility.to_s)
end
puts "#{number_with_delimiter(MASS_PROJECTS_COUNT[visibility])} projects created!"
end
end
Gitlab::Seeder.quiet do
count = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
projects = Gitlab::Seeder::Projects.new(count: count)
projects.seed!
end
...@@ -3,7 +3,7 @@ require './spec/support/sidekiq' ...@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Group.all.each do |group| Group.all.each do |group|
User.all.sample(4).each do |user| User.not_mass_generated.sample(4).each do |user|
if group.add_user(user, Gitlab::Access.values.sample).persisted? if group.add_user(user, Gitlab::Access.values.sample).persisted?
print '.' print '.'
else else
...@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do ...@@ -12,8 +12,8 @@ Sidekiq::Testing.inline! do
end end
end end
Project.all.each do |project| Project.not_mass_generated.each do |project|
User.all.sample(4).each do |user| User.not_mass_generated.sample(4).each do |user|
if project.add_role(user, Gitlab::Access.sym_options.keys.sample) if project.add_role(user, Gitlab::Access.sym_options.keys.sample)
print '.' print '.'
else else
......
require './spec/support/sidekiq' require './spec/support/sidekiq'
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.each do |project| Project.not_mass_generated.each do |project|
5.times do |i| 5.times do |i|
milestone_params = { milestone_params = {
title: "v#{i}.0", title: "v#{i}.0",
......
...@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do ...@@ -4,7 +4,13 @@ Gitlab::Seeder.quiet do
# Limit the number of merge requests per project to avoid long seeds # Limit the number of merge requests per project to avoid long seeds
MAX_NUM_MERGE_REQUESTS = 10 MAX_NUM_MERGE_REQUESTS = 10
Project.non_archived.with_merge_requests_enabled.reject(&:empty_repo?).each do |project| projects = Project
.non_archived
.with_merge_requests_enabled
.not_mass_generated
.reject(&:empty_repo?)
projects.each do |project|
branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2) branches = project.repository.branch_names.sample(MAX_NUM_MERGE_REQUESTS * 2)
branches.each do |branch_name| branches.each do |branch_name|
......
...@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do ...@@ -9,7 +9,7 @@ Sidekiq::Testing.disable! do
# that it falls under `Sidekiq::Testing.disable!`. # that it falls under `Sidekiq::Testing.disable!`.
Key.skip_callback(:commit, :after, :add_to_shell) Key.skip_callback(:commit, :after, :add_to_shell)
User.first(10).each do |user| User.not_mass_generated.first(10).each do |user|
key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0=" key = "ssh-rsa AAAAB3NzaC1yc2EAAAABJQAAAIEAiPWx6WM4lhHNedGfBpPJNPpZ7yKu+dnn1SJejgt#{user.id + 100}6k6YjzGGphH2TUxwKzxcKDKKezwkpfnxPkSMkuEspGRt/aZZ9wa++Oi7Qkr8prgHc4soW6NUlfDzpvZK2H5E7eQaSeP3SAwGmQKUFHCddNaP0L+hM7zhFNzjFvpaMgJw0="
key = user.keys.create( key = user.keys.create(
......
...@@ -25,7 +25,7 @@ end ...@@ -25,7 +25,7 @@ end
eos eos
50.times do |i| 50.times do |i|
user = User.all.sample user = User.not_mass_generated.sample
PersonalSnippet.seed(:id, [{ PersonalSnippet.seed(:id, [{
id: i, id: i,
......
...@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines ...@@ -214,7 +214,7 @@ class Gitlab::Seeder::Pipelines
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project_builds = Gitlab::Seeder::Pipelines.new(project) project_builds = Gitlab::Seeder::Pipelines.new(project)
project_builds.seed! project_builds.seed!
end end
......
...@@ -3,7 +3,7 @@ require './spec/support/sidekiq' ...@@ -3,7 +3,7 @@ require './spec/support/sidekiq'
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
admin_user = User.find(1) admin_user = User.find(1)
Project.all.each do |project| Project.not_mass_generated.each do |project|
params = { params = {
name: 'master' name: 'master'
} }
......
...@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do ...@@ -217,7 +217,7 @@ Gitlab::Seeder.quiet do
flag = 'SEED_CYCLE_ANALYTICS' flag = 'SEED_CYCLE_ANALYTICS'
if ENV[flag] if ENV[flag]
Project.find_each do |project| Project.not_mass_generated.find_each do |project|
# This seed naively assumes that every project has a repository, and every # This seed naively assumes that every project has a repository, and every
# repository has a `master` branch, which may be the case for a pristine # repository has a `master` branch, which may be the case for a pristine
# GDK seed, but is almost never true for a GDK that's actually had # GDK seed, but is almost never true for a GDK that's actually had
......
...@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments ...@@ -67,7 +67,7 @@ class Gitlab::Seeder::Environments
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project_environments = Gitlab::Seeder::Environments.new(project) project_environments = Gitlab::Seeder::Environments.new(project)
project_environments.seed! project_environments.seed!
end end
......
...@@ -22,7 +22,7 @@ module Db ...@@ -22,7 +22,7 @@ module Db
end end
def self.random_user def self.random_user
User.find(User.pluck(:id).sample) User.find(User.not_mass_generated.pluck(:id).sample)
end end
end end
end end
......
...@@ -2,8 +2,8 @@ require './spec/support/sidekiq' ...@@ -2,8 +2,8 @@ require './spec/support/sidekiq'
Sidekiq::Testing.inline! do Sidekiq::Testing.inline! do
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
User.all.sample(10).each do |user| User.not_mass_generated.sample(10).each do |user|
source_project = Project.public_only.sample source_project = Project.not_mass_generated.public_only.sample
## ##
# 03_project.rb might not have created a public project because # 03_project.rb might not have created a public project because
......
...@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`. ...@@ -12,6 +12,14 @@ The `setup` task is an alias for `gitlab:setup`.
This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database. This tasks calls `db:reset` to create the database, and calls `db:seed_fu` to seed the database.
Note: `db:setup` calls `db:seed` but this does nothing. Note: `db:setup` calls `db:seed` but this does nothing.
### Env variables
**MASS_INSERT**: Create millions of users (2m), projects (5m) and its
relations. It's highly recommended to run the seed with it to catch slow queries
while developing. Expect the process to take up to 20 extra minutes.
**LARGE_PROJECTS**: Create large projects (through import) from a predefined set of urls.
### Seeding issues for all or a given project ### Seeding issues for all or a given project
You can seed issues for all or a given project with the `gitlab:seed:issues` You can seed issues for all or a given project with the `gitlab:seed:issues`
......
...@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do ...@@ -88,7 +88,7 @@ Gitlab::Seeder.quiet do
seeder = Gitlab::Seeder::Burndown.new(project) seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed! seeder.seed!
else else
Project.all.each do |project| Project.not_mass_generated.each do |project|
seeder = Gitlab::Seeder::Burndown.new(project) seeder = Gitlab::Seeder::Burndown.new(project)
seeder.seed! seeder.seed!
end end
......
...@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities ...@@ -128,7 +128,7 @@ class Gitlab::Seeder::Vulnerabilities
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.joins(:ci_pipelines).distinct.all.sample(5).each do |project| Project.joins(:ci_pipelines).not_mass_generated.distinct.all.sample(5).each do |project|
seeder = Gitlab::Seeder::Vulnerabilities.new(project) seeder = Gitlab::Seeder::Vulnerabilities.new(project)
seeder.seed! seeder.seed!
end end
......
# frozen_string_literal: true # frozen_string_literal: true
# EE fixture # EE fixture
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
project.ci_pipelines.all.sample(2).each do |pipeline| project.ci_pipelines.all.sample(2).each do |pipeline|
next if pipeline.source_pipeline next if pipeline.source_pipeline
......
...@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages ...@@ -32,7 +32,7 @@ class Gitlab::Seeder::Packages
end end
Gitlab::Seeder.quiet do Gitlab::Seeder.quiet do
Project.all.sample(5).each do |project| Project.not_mass_generated.sample(5).each do |project|
Gitlab::Seeder::Packages.new(project.owner, project).seed Gitlab::Seeder::Packages.new(project.owner, project).seed
end end
end end
...@@ -14,7 +14,71 @@ end ...@@ -14,7 +14,71 @@ end
module Gitlab module Gitlab
class Seeder class Seeder
extend ActionView::Helpers::NumberHelper
ESTIMATED_INSERT_PER_MINUTE = 2_000_000
MASS_INSERT_ENV = 'MASS_INSERT'
module ProjectSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("path LIKE '#{Gitlab::Seeder::Projects::MASS_INSERT_NAME_START}%'")
end
end
end
module UserSeed
extend ActiveSupport::Concern
included do
scope :not_mass_generated, -> do
where.not("username LIKE '#{Gitlab::Seeder::Users::MASS_INSERT_USERNAME_START}%'")
end
end
end
def self.with_mass_insert(size, model)
humanized_model_name = model.is_a?(String) ? model : model.model_name.human.pluralize(size)
if !ENV[MASS_INSERT_ENV] && !ENV['CI']
puts "\nSkipping mass insertion for #{humanized_model_name}."
puts "Consider running the seed with #{MASS_INSERT_ENV}=1"
return
end
humanized_size = number_with_delimiter(size)
estimative = estimated_time_message(size)
puts "\nCreating #{humanized_size} #{humanized_model_name}."
puts estimative
yield
puts "\n#{number_with_delimiter(size)} #{humanized_model_name} created!"
end
def self.estimated_time_message(size)
estimated_minutes = (size.to_f / ESTIMATED_INSERT_PER_MINUTE).round
humanized_minutes = 'minute'.pluralize(estimated_minutes)
if estimated_minutes.zero?
"Rough estimated time: less than a minute ⏰"
else
"Rough estimated time: #{estimated_minutes} #{humanized_minutes} ⏰"
end
end
def self.quiet def self.quiet
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
# Additional seed logic for models.
Project.include(ProjectSeed)
User.include(UserSeed)
mute_notifications mute_notifications
mute_mailer mute_mailer
...@@ -23,6 +87,7 @@ module Gitlab ...@@ -23,6 +87,7 @@ module Gitlab
yield yield
SeedFu.quiet = false SeedFu.quiet = false
ActiveRecord::Base.logger = old_logger
puts "\nOK".color(:green) puts "\nOK".color(:green)
end end
......
...@@ -5,6 +5,10 @@ namespace :dev do ...@@ -5,6 +5,10 @@ namespace :dev do
task setup: :environment do task setup: :environment do
ENV['force'] = 'yes' ENV['force'] = 'yes'
Rake::Task["gitlab:setup"].invoke Rake::Task["gitlab:setup"].invoke
# Make sure DB statistics are up to date.
ActiveRecord::Base.connection.execute('ANALYZE')
Rake::Task["gitlab:shell:setup"].invoke Rake::Task["gitlab:shell:setup"].invoke
end end
......
...@@ -22,7 +22,7 @@ namespace :gitlab do ...@@ -22,7 +22,7 @@ namespace :gitlab do
[project] [project]
else else
Project.find_each Project.not_mass_generated.find_each
end end
projects.each do |project| projects.each do |project|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment