Commit 5fcd9986 authored by Yorick Peterse's avatar Yorick Peterse

Refactor getting user groups/projects/contributions

This new setup no longer loads any IDs into memory using "pluck",
instead using SQL UNIONs to merge the various datasets together. This
results in greatly improved query performance as well as a reduction of
memory usage.

The old setup was in particular problematic when requesting the
authorized projects _including_ public/internal projects as this would
result in roughly 65000 project IDs being loaded into memory. These IDs
would in turn be passed to other queries.
parent bfd9855a
...@@ -389,21 +389,40 @@ class User < ActiveRecord::Base ...@@ -389,21 +389,40 @@ class User < ActiveRecord::Base
end end
end end
# Groups user has access to # Returns the groups a user has access to, optionally including any public
def authorized_groups # groups.
@authorized_groups ||= #
begin # public_internal - When set to "true" all public groups and groups of public
union = Gitlab::SQL::Union. # projects are also included.
new([groups.select(:id), authorized_projects.select(:namespace_id)]) #
# Returns an ActiveRecord::Relation
def authorized_groups(public_internal = false)
union = Gitlab::SQL::Union.
new([groups.select(:id), authorized_projects(public_internal).
select(:namespace_id)])
Group.where("namespaces.id IN (#{union.to_sql})") sql = "namespaces.id IN (#{union.to_sql})"
end
if public_internal
sql << ' OR public IS TRUE'
end
Group.where(sql)
end end
# Projects user has access to # Returns the groups a user is authorized to access.
def authorized_projects #
@authorized_projects ||= # public_internal - When set to "true" all public/internal projects will also
Project.where("projects.id IN (#{projects_union.to_sql})") # be included.
def authorized_projects(public_internal = false)
base = "projects.id IN (#{projects_union.to_sql})"
if public_internal
Project.where("#{base} OR projects.visibility_level IN (?)",
Project.public_and_internal_levels)
else
Project.where(base)
end
end end
def owned_projects def owned_projects
...@@ -726,12 +745,25 @@ class User < ActiveRecord::Base ...@@ -726,12 +745,25 @@ class User < ActiveRecord::Base
Doorkeeper::AccessToken.where(resource_owner_id: self.id, revoked_at: nil) Doorkeeper::AccessToken.where(resource_owner_id: self.id, revoked_at: nil)
end end
def contributed_projects_ids # Returns the projects a user contributed to in the last year.
Event.contributions.where(author_id: self). #
# This method relies on a subquery as this performs significantly better
# compared to a JOIN when coupled with, for example,
# `Project.visible_to_user`. That is, consider the following code:
#
# some_user.contributed_projects.visible_to_user(other_user)
#
# If this method were to use a JOIN the resulting query would take roughly 200
# ms on a database with a similar size to gitlab.com's database. On the other
# hand, using a subquery means we can get the exact same data in about 40 ms.
def contributed_projects
events = Event.select(:project_id).
contributions.where(author_id: self).
where("created_at > ?", Time.now - 1.year). where("created_at > ?", Time.now - 1.year).
reorder(project_id: :desc).
uniq. uniq.
pluck(:project_id) reorder(nil)
Project.where(id: events)
end end
def restricted_signup_domains def restricted_signup_domains
......
...@@ -686,7 +686,7 @@ describe User do ...@@ -686,7 +686,7 @@ describe User do
end end
end end
describe "#contributed_projects_ids" do describe "#contributed_projects" do
subject { create(:user) } subject { create(:user) }
let!(:project1) { create(:project) } let!(:project1) { create(:project) }
let!(:project2) { create(:project, forked_from_project: project3) } let!(:project2) { create(:project, forked_from_project: project3) }
...@@ -701,15 +701,15 @@ describe User do ...@@ -701,15 +701,15 @@ describe User do
end end
it "includes IDs for projects the user has pushed to" do it "includes IDs for projects the user has pushed to" do
expect(subject.contributed_projects_ids).to include(project1.id) expect(subject.contributed_projects).to include(project1)
end end
it "includes IDs for projects the user has had merge requests merged into" do it "includes IDs for projects the user has had merge requests merged into" do
expect(subject.contributed_projects_ids).to include(project3.id) expect(subject.contributed_projects).to include(project3)
end end
it "doesn't include IDs for unrelated projects" do it "doesn't include IDs for unrelated projects" do
expect(subject.contributed_projects_ids).not_to include(project2.id) expect(subject.contributed_projects).not_to include(project2)
end end
end end
...@@ -758,4 +758,48 @@ describe User do ...@@ -758,4 +758,48 @@ describe User do
expect(subject.recent_push).to eq(nil) expect(subject.recent_push).to eq(nil)
end end
end end
describe '#authorized_groups' do
let!(:user) { create(:user) }
let!(:private_group) { create(:group) }
let!(:public_group) { create(:group, public: true) }
before do
private_group.add_user(user, Gitlab::Access::MASTER)
end
describe 'excluding public groups' do
subject { user.authorized_groups }
it { is_expected.to eq([private_group]) }
end
describe 'including public groups' do
subject { user.authorized_groups(true) }
it { is_expected.to eq([public_group, private_group]) }
end
end
describe '#authorized_projects' do
let!(:user) { create(:user) }
let!(:private_project) { create(:project, :private) }
let!(:public_project) { create(:project, :public) }
before do
private_project.team << [user, Gitlab::Access::MASTER]
end
describe 'excluding public projects' do
subject { user.authorized_projects }
it { is_expected.to eq([private_project]) }
end
describe 'including public projects' do
subject { user.authorized_projects(true) }
it { is_expected.to eq([public_project, private_project]) }
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment