Commit 9da64b22 authored by Rémy Coutable's avatar Rémy Coutable Committed by Oswaldo Ferreira

Prepare projects mass insert

Setup initial work for massive DB insert
for projects.

Add active_record-pg_generate_series gem.
Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent 911756ca
......@@ -336,6 +336,7 @@ group :development do
gem 'letter_opener_web', '~> 1.3.4'
gem 'rblineprof', '~> 0.3.6', platform: :mri, require: false
gem 'active_record-pg_generate_series', '~> 0.1.2'
# Better errors handler
gem 'better_errors', '~> 2.5.0'
......
......@@ -29,6 +29,8 @@ GEM
erubi (~> 1.4)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.0.3)
active_record-pg_generate_series (0.1.3)
activerecord
activejob (5.2.3)
activesupport (= 5.2.3)
globalid (>= 0.3.6)
......@@ -1088,6 +1090,7 @@ DEPENDENCIES
RedCloth (~> 4.3.2)
ace-rails-ap (~> 4.1.0)
acme-client (~> 2.0.2)
active_record-pg_generate_series (~> 0.1.2)
activerecord-explain-analyze (~> 0.1)
acts-as-taggable-on (~> 6.0)
addressable (~> 2.5.2)
......
class Gitlab::Seeder::Users
include ActionView::Helpers::NumberHelper
RANDOM_USERS_COUNT = 20
MASS_USERS_COUNT = 1_500_000
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_random_users!
create_mass_users!
end
end
private
def create_random_users!
RANDOM_USERS_COUNT.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
def create_mass_users!
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
encrypted_password = Devise::Encryptor.digest(User, '12345678')
User.insert_using_generate_series(1, MASS_USERS_COUNT, debug: true) do |sql|
sql.username = raw("'user' || seq")
sql.name = raw("'User ' || seq")
sql.email = raw("'user' || seq || '@example.com'")
sql.confirmed_at = raw("('1388530801'::timestamp + seq)::date") # 2014-01-01
sql.encrypted_password = encrypted_password
end
puts "\n#{number_with_delimiter(MASS_USERS_COUNT)} users created!"
# Reset logging
ActiveRecord::Base.logger = old_logger
end
end
Gitlab::Seeder.quiet do
users = Gitlab::Seeder::Users.new
users.seed!
end
require './spec/support/sidekiq'
class Gitlab::Seeder::Projects
include ActionView::Helpers::NumberHelper
PROJECT_URLS = [
'https://gitlab.com/gitlab-org/gitlab-test.git',
'https://gitlab.com/gitlab-org/gitlab-ce.git',
'https://gitlab.com/gitlab-org/gitlab-ci.git',
'https://gitlab.com/gitlab-org/gitlab-shell.git',
'https://github.com/documentcloud/underscore.git',
'https://github.com/twitter/flight.git',
'https://github.com/twitter/typeahead.js.git',
'https://github.com/h5bp/html5-boilerplate.git',
'https://github.com/google/material-design-lite.git',
'https://github.com/jlevy/the-art-of-command-line.git',
'https://github.com/FreeCodeCamp/freecodecamp.git',
'https://github.com/google/deepdream.git',
'https://github.com/jtleek/datasharing.git',
'https://github.com/WebAssembly/design.git',
'https://github.com/airbnb/javascript.git',
'https://github.com/tessalt/echo-chamber-js.git',
'https://github.com/atom/atom.git',
'https://github.com/mattermost/platform.git',
'https://github.com/purifycss/purifycss.git',
'https://github.com/facebook/nuclide.git',
'https://github.com/wbkd/awesome-d3.git',
'https://github.com/kilimchoi/engineering-blogs.git',
'https://github.com/gilbarbara/logos.git',
'https://github.com/gaearon/redux.git',
'https://github.com/awslabs/s2n.git',
'https://github.com/arkency/reactjs_koans.git',
'https://github.com/twbs/bootstrap.git',
'https://github.com/chjj/ttystudio.git',
'https://github.com/DrBoolean/mostly-adequate-guide.git',
'https://github.com/octocat/Spoon-Knife.git',
'https://github.com/opencontainers/runc.git',
'https://github.com/googlesamples/android-topeka.git'
]
MASS_PROJECTS_COUNT = {
private: 2_000_000,
internal: 30_000,
public: 265_000
}
attr_reader :opts
def initialize(opts = {})
@opts = opts
end
def seed!
Sidekiq::Testing.inline! do
create_real_projects!(opts[:count])
create_mass_projects!
end
end
private
def create_real_projects!(count)
PROJECT_URLS.first(count).each_with_index do |url, i|
group_path, project_path = url.split('/')[-2..-1]
group = Group.find_by(path: group_path)
unless group
group = Group.new(
name: group_path.titleize,
path: group_path
)
group.description = FFaker::Lorem.sentence
group.save
group.add_owner(User.first)
end
project_path.gsub!(".git", "")
params = {
import_url: url,
namespace_id: group.id,
name: project_path.titleize,
description: FFaker::Lorem.sentence,
visibility_level: Gitlab::VisibilityLevel.values.sample
}
project = ::Projects::CreateService.new(User.first, params).execute
# Seed-Fu runs this entire fixture in a transaction, so the `after_commit`
# hook won't run until after the fixture is loaded. That is too late
# since the Sidekiq::Testing block has already exited. Force clearing
# the `after_commit` queue to ensure the job is run now.
project.send(:_run_after_commit_queue)
if project.valid? && project.valid_repo?
print '.'
else
puts project.errors.full_messages
print 'F'
end
end
end
def create_mass_projects!
# Disable database insertion logs so speed isn't limited by ability to print to console
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil
create_mass_projects_by_visility!(:private)
create_mass_projects_by_visility!(:internal)
create_mass_projects_by_visility!(:public)
# Reset logging
ActiveRecord::Base.logger = old_logger
end
def create_mass_projects_by_visility!(visibility)
users = User.limit(100)
groups = Group.limit(100)
namespaces = users + groups
Project.insert_using_generate_series(1, MASS_PROJECTS_COUNT[visibility], debug: true) do |sql|
project_name = raw("'seed_#{visibility}_project_' || seq")
namespace = namespaces.take
sql.name = project_name
sql.path = project_name
sql.creator_id = namespace.is_a?(Group) ? namespace.owner_id : users.take.id
sql.namespace_id = namespace.is_a?(Group) ? namespace.id : namespace.namespace_id
sql.visibility_level = Gitlab::VisibilityLevel.level_value(visibility.to_s)
end
puts "#{number_with_delimiter(MASS_PROJECTS_COUNT[visibility])} projects created!"
end
end
Gitlab::Seeder.quiet do
count = ENV['SIZE'].present? ? ENV['SIZE'].to_i : 8
projects = Gitlab::Seeder::Projects.new(count: count)
projects.seed!
end
require './spec/support/sidekiq'
Gitlab::Seeder.quiet do
20.times do |i|
begin
User.create!(
username: FFaker::Internet.user_name,
name: FFaker::Name.name,
email: FFaker::Internet.email,
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
5.times do |i|
begin
User.create!(
username: "user#{i}",
name: "User #{i}",
email: "user#{i}@example.com",
confirmed_at: DateTime.now,
password: '12345678'
)
print '.'
rescue ActiveRecord::RecordInvalid
print 'F'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment