Commit cf3c238e authored by Adam Hegyi's avatar Adam Hegyi

Migrate users.bio to user_details.bio

This change migrates users.bio to user_details.bio.
parent d92892fd
...@@ -217,6 +217,7 @@ class User < ApplicationRecord ...@@ -217,6 +217,7 @@ class User < ApplicationRecord
before_save :check_for_verified_email, if: ->(user) { user.email_changed? && !user.new_record? } before_save :check_for_verified_email, if: ->(user) { user.email_changed? && !user.new_record? }
before_validation :ensure_namespace_correct before_validation :ensure_namespace_correct
before_save :ensure_namespace_correct # in case validation is skipped before_save :ensure_namespace_correct # in case validation is skipped
before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed?
after_validation :set_username_errors after_validation :set_username_errors
after_update :username_changed_hook, if: :saved_change_to_username? after_update :username_changed_hook, if: :saved_change_to_username?
after_destroy :post_destroy_hook after_destroy :post_destroy_hook
...@@ -1262,6 +1263,13 @@ class User < ApplicationRecord ...@@ -1262,6 +1263,13 @@ class User < ApplicationRecord
end end
end end
# Temporary, will be removed when bio is fully migrated
def ensure_bio_is_assigned_to_user_details
return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true)
user_detail.bio = bio.to_s[0...255] # bio can be NULL in users, but cannot be NULL in user_details
end
def set_username_errors def set_username_errors
namespace_path_errors = self.errors.delete(:"namespace.path") namespace_path_errors = self.errors.delete(:"namespace.path")
self.errors[:username].concat(namespace_path_errors) if namespace_path_errors self.errors[:username].concat(namespace_path_errors) if namespace_path_errors
......
---
title: Add user_details.bio column and migrate data from users.bio
merge_request: 27773
author:
type: changed
# frozen_string_literal: true
class AddBioToUserDetails < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default(:user_details, :bio, :string, default: '', allow_null: false, limit: 255, update_column_in_batches_args: { batch_column_name: :user_id })
end
def down
remove_column(:user_details, :bio)
end
end
# frozen_string_literal: true
class AddTempIndexOnUsersBio < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'tmp_idx_on_user_id_where_bio_is_filled'
disable_ddl_transaction!
def up
add_concurrent_index :users, :id, where: "(COALESCE(bio, '') IS DISTINCT FROM '')", name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :users, INDEX_NAME
end
end
# frozen_string_literal: true
class TriggerBackgroundMigrationForUsersBio < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INTERVAL = 2.minutes.to_i
BATCH_SIZE = 500
MIGRATION = 'MigrateUsersBioToUserDetails'
disable_ddl_transaction!
class User < ActiveRecord::Base
self.table_name = 'users'
include ::EachBatch
end
def up
relation = User.where("(COALESCE(bio, '') IS DISTINCT FROM '')")
queue_background_migration_jobs_by_range_at_intervals(relation,
MIGRATION,
INTERVAL,
batch_size: BATCH_SIZE)
end
def down
# no-op
end
end
...@@ -6152,7 +6152,8 @@ ALTER SEQUENCE public.user_custom_attributes_id_seq OWNED BY public.user_custom_ ...@@ -6152,7 +6152,8 @@ ALTER SEQUENCE public.user_custom_attributes_id_seq OWNED BY public.user_custom_
CREATE TABLE public.user_details ( CREATE TABLE public.user_details (
user_id bigint NOT NULL, user_id bigint NOT NULL,
job_title character varying(200) DEFAULT ''::character varying NOT NULL job_title character varying(200) DEFAULT ''::character varying NOT NULL,
bio character varying(255) DEFAULT ''::character varying NOT NULL
); );
CREATE SEQUENCE public.user_details_user_id_seq CREATE SEQUENCE public.user_details_user_id_seq
...@@ -10243,6 +10244,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON public.term_agreements USING ...@@ -10243,6 +10244,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON public.term_agreements USING
CREATE INDEX tmp_build_stage_position_index ON public.ci_builds USING btree (stage_id, stage_idx) WHERE (stage_idx IS NOT NULL); CREATE INDEX tmp_build_stage_position_index ON public.ci_builds USING btree (stage_id, stage_idx) WHERE (stage_idx IS NOT NULL);
CREATE INDEX tmp_idx_on_user_id_where_bio_is_filled ON public.users USING btree (id) WHERE ((COALESCE(bio, ''::character varying))::text IS DISTINCT FROM ''::text);
CREATE INDEX undefined_vulnerabilities ON public.vulnerability_occurrences USING btree (id) WHERE (severity = 0); CREATE INDEX undefined_vulnerabilities ON public.vulnerability_occurrences USING btree (id) WHERE (severity = 0);
CREATE INDEX undefined_vulnerability ON public.vulnerabilities USING btree (id) WHERE (severity = 0); CREATE INDEX undefined_vulnerability ON public.vulnerabilities USING btree (id) WHERE (severity = 0);
...@@ -12797,7 +12800,10 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -12797,7 +12800,10 @@ COPY "schema_migrations" (version) FROM STDIN;
20200319203901 20200319203901
20200320112455 20200320112455
20200320123839 20200320123839
20200323071918
20200323074147
20200323075043 20200323075043
20200323080714
20200323122201 20200323122201
20200323134519 20200323134519
20200324115359 20200324115359
......
# frozen_string_literal: true
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class MigrateUsersBioToUserDetails
class User < ActiveRecord::Base
self.table_name = 'users'
end
class UserDetails < ActiveRecord::Base
self.table_name = 'user_details'
end
def perform(start_id, stop_id)
return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true)
relation = User
.select("id AS user_id", "substring(COALESCE(bio, '') from 1 for 255) AS bio")
.where("(COALESCE(bio, '') IS DISTINCT FROM '')")
.where(id: (start_id..stop_id))
ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc
INSERT INTO user_details
(user_id, bio)
#{relation.to_sql}
ON CONFLICT (user_id)
DO UPDATE SET
"bio" = EXCLUDED."bio";
EOF
end
end
end
end
...@@ -375,8 +375,11 @@ module Gitlab ...@@ -375,8 +375,11 @@ module Gitlab
# less "complex" without introducing extra methods (which actually will # less "complex" without introducing extra methods (which actually will
# make things _more_ complex). # make things _more_ complex).
# #
# `batch_column_name` option is for tables without primary key, in this
# case an other unique integer column can be used. Example: :user_id
#
# rubocop: disable Metrics/AbcSize # rubocop: disable Metrics/AbcSize
def update_column_in_batches(table, column, value, batch_size: nil) def update_column_in_batches(table, column, value, batch_size: nil, batch_column_name: :id)
if transaction_open? if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \ raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \ 'you can disable transactions by calling disable_ddl_transaction! ' \
...@@ -403,14 +406,14 @@ module Gitlab ...@@ -403,14 +406,14 @@ module Gitlab
batch_size = max_size if batch_size > max_size batch_size = max_size if batch_size > max_size
end end
start_arel = table.project(table[:id]).order(table[:id].asc).take(1) start_arel = table.project(table[batch_column_name]).order(table[batch_column_name].asc).take(1)
start_arel = yield table, start_arel if block_given? start_arel = yield table, start_arel if block_given?
start_id = exec_query(start_arel.to_sql).to_a.first['id'].to_i start_id = exec_query(start_arel.to_sql).to_a.first[batch_column_name.to_s].to_i
loop do loop do
stop_arel = table.project(table[:id]) stop_arel = table.project(table[batch_column_name])
.where(table[:id].gteq(start_id)) .where(table[batch_column_name].gteq(start_id))
.order(table[:id].asc) .order(table[batch_column_name].asc)
.take(1) .take(1)
.skip(batch_size) .skip(batch_size)
...@@ -420,12 +423,12 @@ module Gitlab ...@@ -420,12 +423,12 @@ module Gitlab
update_arel = Arel::UpdateManager.new update_arel = Arel::UpdateManager.new
.table(table) .table(table)
.set([[table[column], value]]) .set([[table[column], value]])
.where(table[:id].gteq(start_id)) .where(table[batch_column_name].gteq(start_id))
if stop_row if stop_row
stop_id = stop_row['id'].to_i stop_id = stop_row[batch_column_name.to_s].to_i
start_id = stop_id start_id = stop_id
update_arel = update_arel.where(table[:id].lt(stop_id)) update_arel = update_arel.where(table[batch_column_name].lt(stop_id))
end end
update_arel = yield table, update_arel if block_given? update_arel = yield table, update_arel if block_given?
...@@ -461,7 +464,7 @@ module Gitlab ...@@ -461,7 +464,7 @@ module Gitlab
# #
# This method can also take a block which is passed directly to the # This method can also take a block which is passed directly to the
# `update_column_in_batches` method. # `update_column_in_batches` method.
def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block) def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, update_column_in_batches_args: {}, &block)
if transaction_open? if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \ raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \ 'you can disable transactions by calling disable_ddl_transaction! ' \
...@@ -483,7 +486,12 @@ module Gitlab ...@@ -483,7 +486,12 @@ module Gitlab
begin begin
default_after_type_cast = connection.type_cast(default, column_for(table, column)) default_after_type_cast = connection.type_cast(default, column_for(table, column))
update_column_in_batches(table, column, default_after_type_cast, &block)
if update_column_in_batches_args.any?
update_column_in_batches(table, column, default_after_type_cast, **update_column_in_batches_args, &block)
else
update_column_in_batches(table, column, default_after_type_cast, &block)
end
change_column_null(table, column, false) unless allow_null change_column_null(table, column, false) unless allow_null
# We want to rescue _all_ exceptions here, even those that don't inherit # We want to rescue _all_ exceptions here, even those that don't inherit
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::MigrateUsersBioToUserDetails, :migration, schema: 20200323074147 do
let(:users) { table(:users) }
let(:user_details) do
klass = table(:user_details)
klass.primary_key = :user_id
klass
end
let!(:user_needs_migration) { users.create(name: 'user1', email: 'test1@test.com', projects_limit: 1, bio: 'bio') }
let!(:user_needs_no_migration) { users.create(name: 'user2', email: 'test2@test.com', projects_limit: 1) }
let!(:user_also_needs_no_migration) { users.create(name: 'user3', email: 'test3@test.com', projects_limit: 1, bio: '') }
let!(:user_with_long_bio) { users.create(name: 'user4', email: 'test4@test.com', projects_limit: 1, bio: 'a' * 256) } # 255 is the max
let!(:user_already_has_details) { users.create(name: 'user5', email: 'test5@test.com', projects_limit: 1, bio: 'my bio') }
let!(:existing_user_details) { user_details.find_or_create_by(user_id: user_already_has_details.id).update(bio: 'my bio') }
# unlikely scenario since we have triggers
let!(:user_has_different_details) { users.create(name: 'user6', email: 'test6@test.com', projects_limit: 1, bio: 'different') }
let!(:different_existing_user_details) { user_details.find_or_create_by(user_id: user_has_different_details.id).update(bio: 'bio') }
let(:user_ids) do
[
user_needs_migration,
user_needs_no_migration,
user_also_needs_no_migration,
user_with_long_bio,
user_already_has_details,
user_has_different_details
].map(&:id)
end
subject { described_class.new.perform(user_ids.min, user_ids.max) }
it 'migrates all relevant records' do
subject
all_user_details = user_details.all
expect(all_user_details.size).to eq(4)
end
it 'migrates `bio`' do
subject
user_detail = user_details.find_by!(user_id: user_needs_migration.id)
expect(user_detail.bio).to eq('bio')
end
it 'migrates long `bio`' do
subject
user_detail = user_details.find_by!(user_id: user_with_long_bio.id)
expect(user_detail.bio).to eq('a' * 255)
end
it 'does not change existing user detail' do
expect { subject }.not_to change { user_details.find_by!(user_id: user_already_has_details.id).attributes }
end
it 'changes existing user detail when the columns are different' do
expect { subject }.to change { user_details.find_by!(user_id: user_has_different_details.id).bio }.from('bio').to('different')
end
it 'does not migrate record' do
subject
user_detail = user_details.find_by(user_id: user_needs_no_migration.id)
expect(user_detail).to be_nil
end
it 'does not migrate empty bio' do
subject
user_detail = user_details.find_by(user_id: user_also_needs_no_migration.id)
expect(user_detail).to be_nil
end
context 'when `migrate_bio_to_user_details` feature flag is off' do
before do
stub_feature_flags(migrate_bio_to_user_details: false)
end
it 'does nothing' do
already_existing_user_details = user_details.where(user_id: [
user_has_different_details.id,
user_already_has_details.id
])
subject
expect(user_details.all).to match_array(already_existing_user_details)
end
end
end
...@@ -657,6 +657,30 @@ describe Gitlab::Database::MigrationHelpers do ...@@ -657,6 +657,30 @@ describe Gitlab::Database::MigrationHelpers do
end end
end end
context 'when `update_column_in_batches_args` is given' do
let(:column) { UserDetail.columns.find { |c| c.name == "user_id" } }
it 'uses `user_id` for `update_column_in_batches`' do
allow(model).to receive(:transaction_open?).and_return(false)
allow(model).to receive(:transaction).and_yield
allow(model).to receive(:column_for).with(:user_details, :foo).and_return(column)
allow(model).to receive(:update_column_in_batches).with(:user_details, :foo, 10, batch_column_name: :user_id)
allow(model).to receive(:change_column_null).with(:user_details, :foo, false)
allow(model).to receive(:change_column_default).with(:user_details, :foo, 10)
expect(model).to receive(:add_column)
.with(:user_details, :foo, :integer, default: nil)
model.add_column_with_default(
:user_details,
:foo,
:integer,
default: 10,
update_column_in_batches_args: { batch_column_name: :user_id }
)
end
end
context 'when a column limit is set' do context 'when a column limit is set' do
it 'adds the column with a limit' do it 'adds the column with a limit' do
allow(model).to receive(:transaction_open?).and_return(false) allow(model).to receive(:transaction_open?).and_return(false)
......
...@@ -55,6 +55,67 @@ describe User, :do_not_mock_admin_mode do ...@@ -55,6 +55,67 @@ describe User, :do_not_mock_admin_mode do
it { is_expected.to have_many(:custom_attributes).class_name('UserCustomAttribute') } it { is_expected.to have_many(:custom_attributes).class_name('UserCustomAttribute') }
it { is_expected.to have_many(:releases).dependent(:nullify) } it { is_expected.to have_many(:releases).dependent(:nullify) }
describe "#bio" do
it 'syncs bio with `user_details.bio` on create' do
user = create(:user, bio: 'my bio')
expect(user.bio).to eq(user.user_detail.bio)
end
context 'when `migrate_bio_to_user_details` feature flag is off' do
before do
stub_feature_flags(migrate_bio_to_user_details: false)
end
it 'does not sync bio with `user_details.bio`' do
user = create(:user, bio: 'my bio')
expect(user.bio).to eq('my bio')
expect(user.user_detail.bio).to eq('')
end
end
it 'syncs bio with `user_details.bio` on update' do
user = create(:user)
user.update!(bio: 'my bio')
expect(user.bio).to eq(user.user_detail.bio)
end
context 'when `user_details` association already exists' do
let(:user) { create(:user) }
before do
create(:user_detail, user: user)
end
it 'syncs bio with `user_details.bio`' do
user.update!(bio: 'my bio')
expect(user.bio).to eq(user.user_detail.bio)
end
it 'falls back to "" when nil is given' do
user.update!(bio: nil)
expect(user.bio).to eq(nil)
expect(user.user_detail.bio).to eq('')
end
# very unlikely scenario
it 'truncates long bio when syncing to user_details' do
invalid_bio = 'a' * 256
truncated_bio = 'a' * 255
user.bio = invalid_bio
user.save(validate: false)
expect(user.user_detail.bio).to eq(truncated_bio)
end
end
end
describe "#abuse_report" do describe "#abuse_report" do
let(:current_user) { create(:user) } let(:current_user) { create(:user) }
let(:other_user) { create(:user) } let(:other_user) { create(:user) }
......
...@@ -739,6 +739,17 @@ describe API::Users, :do_not_mock_admin_mode do ...@@ -739,6 +739,17 @@ describe API::Users, :do_not_mock_admin_mode do
expect(user.reload.bio).to eq('new test bio') expect(user.reload.bio).to eq('new test bio')
end end
it "updates user with empty bio" do
user.bio = 'previous bio'
user.save!
put api("/users/#{user.id}", admin), params: { bio: '' }
expect(response).to have_gitlab_http_status(:ok)
expect(json_response['bio']).to eq('')
expect(user.reload.bio).to eq('')
end
it "updates user with new password and forces reset on next login" do it "updates user with new password and forces reset on next login" do
put api("/users/#{user.id}", admin), params: { password: '12345678' } put api("/users/#{user.id}", admin), params: { password: '12345678' }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment