Commit 81be7217 authored by Nick Thomas's avatar Nick Thomas

Never fetch more than 101 commits when processing a git push

This change limits the number of commits we'll pull from Gitaly when
processing hooks for branch/tag push/delete actions to at-most 101.

This limit already exists when creating the initial default branch, but
when creating a new non-default branch, or pushing to an existing one,
we would read all the commits in the branch from Gitaly before throwing
all but the most recent 100 away and processing those for the hooks.

No behaviour changes are implied by this change, we're just skipping
unnecessary work by taking advantage of the new `limit` option to the
CommitsBetween RPC.

Since we never want the full list of `commits`, I change the interface
declared in `Git::BaseHooksService` to make it clearer as well.

Changelog: performance
parent c64ee58c
...@@ -161,8 +161,8 @@ class Repository ...@@ -161,8 +161,8 @@ class Repository
CommitCollection.new(container, commits, ref) CommitCollection.new(container, commits, ref)
end end
def commits_between(from, to) def commits_between(from, to, limit: nil)
commits = Gitlab::Git::Commit.between(raw_repository, from, to) commits = Gitlab::Git::Commit.between(raw_repository, from, to, limit: limit)
commits = Commit.decorate(commits, container) if commits.present? commits = Commit.decorate(commits, container) if commits.present?
commits commits
end end
......
...@@ -25,17 +25,13 @@ module Git ...@@ -25,17 +25,13 @@ module Git
raise NotImplementedError, "Please implement #{self.class}##{__method__}" raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end end
# The changeset, ordered with the newest commit last # This should return PROCESS_COMMIT_LIMIT commits, ordered with newest last
def commits
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
def limited_commits def limited_commits
@limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT) raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end end
def commits_count def commits_count
commits.count raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end end
def event_message def event_message
......
...@@ -18,20 +18,25 @@ module Git ...@@ -18,20 +18,25 @@ module Git
:push_hooks :push_hooks
end end
def commits def limited_commits
strong_memoize(:commits) do strong_memoize(:limited_commits) { threshold_commits.last(PROCESS_COMMIT_LIMIT) }
end
# Taking limit+1 commits allows us to detect when the limit is in effect
def threshold_commits
strong_memoize(:threshold_commits) do
if creating_default_branch? if creating_default_branch?
# The most recent PROCESS_COMMIT_LIMIT commits in the default branch. # The most recent PROCESS_COMMIT_LIMIT commits in the default branch.
# They are returned newest-to-oldest, but we need to present them oldest-to-newest # They are returned newest-to-oldest, but we need to present them oldest-to-newest
project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT).reverse project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT + 1).reverse!
elsif creating_branch? elsif creating_branch?
# Use the pushed commits that aren't reachable by the default branch # Use the pushed commits that aren't reachable by the default branch
# as a heuristic. This may include more commits than are actually # as a heuristic. This may include more commits than are actually
# pushed, but that shouldn't matter because we check for existing # pushed, but that shouldn't matter because we check for existing
# cross-references later. # cross-references later.
project.repository.commits_between(project.default_branch, newrev) project.repository.commits_between(project.default_branch, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
elsif updating_branch? elsif updating_branch?
project.repository.commits_between(oldrev, newrev) project.repository.commits_between(oldrev, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
else # removing branch else # removing branch
[] []
end end
...@@ -39,9 +44,21 @@ module Git ...@@ -39,9 +44,21 @@ module Git
end end
def commits_count def commits_count
return count_commits_in_branch if creating_default_branch? strong_memoize(:commits_count) do
next threshold_commits.count if
strong_memoized?(:threshold_commits) &&
threshold_commits.count <= PROCESS_COMMIT_LIMIT
super if creating_default_branch?
project.repository.commit_count_for_ref(ref)
elsif creating_branch?
project.repository.count_commits_between(project.default_branch, newrev)
elsif updating_branch?
project.repository.count_commits_between(oldrev, newrev)
else # removing branch
0
end
end
end end
override :invalidated_file_types override :invalidated_file_types
...@@ -179,12 +196,6 @@ module Git ...@@ -179,12 +196,6 @@ module Git
creating_branch? && default_branch? creating_branch? && default_branch?
end end
def count_commits_in_branch
strong_memoize(:count_commits_in_branch) do
project.repository.commit_count_for_ref(ref)
end
end
def default_branch? def default_branch?
strong_memoize(:default_branch) do strong_memoize(:default_branch) do
[nil, branch_name].include?(project.default_branch) [nil, branch_name].include?(project.default_branch)
......
...@@ -8,10 +8,14 @@ module Git ...@@ -8,10 +8,14 @@ module Git
:tag_push_hooks :tag_push_hooks
end end
def commits def limited_commits
[tag_commit].compact [tag_commit].compact
end end
def commits_count
limited_commits.count
end
def event_message def event_message
tag&.message tag&.message
end end
......
...@@ -108,17 +108,26 @@ module Gitlab ...@@ -108,17 +108,26 @@ module Gitlab
# See also #repository.commits_between # See also #repository.commits_between
# #
# Ex. # Ex.
# Commit.between(repo, '29eda46b', 'master') # Commit.between(repo, '29eda46b', 'master') # all commits, ordered oldest to newest
# Commit.between(repo, '29eda46b', 'master', limit: 100) # 100 newest commits, ordered oldest to newest
# #
def between(repo, base, head) def between(repo, base, head, limit: nil)
# In either of these cases, we are guaranteed to return no commits, so # In either of these cases, we are guaranteed to return no commits, so
# shortcut the RPC call # shortcut the RPC call
return [] if Gitlab::Git.blank_ref?(base) || Gitlab::Git.blank_ref?(head) return [] if Gitlab::Git.blank_ref?(base) || Gitlab::Git.blank_ref?(head)
wrapped_gitaly_errors do wrapped_gitaly_errors do
revisions = [head, "^#{base}"] # base..head revisions = [head, "^#{base}"] # base..head
client = repo.gitaly_commit_client
repo.gitaly_commit_client.list_commits(revisions, reverse: true)
# We must return the commits in chronological order but using both
# limit and reverse in the Gitaly RPC would return the oldest N,
# rather than newest N, commits, so reorder in Ruby with limit
if limit
client.list_commits(revisions, pagination_params: { limit: limit }).reverse!
else
client.list_commits(revisions, reverse: true)
end
end end
end end
......
...@@ -255,11 +255,12 @@ module Gitlab ...@@ -255,11 +255,12 @@ module Gitlab
consume_commits_response(response) consume_commits_response(response)
end end
def list_commits(revisions, reverse: false) def list_commits(revisions, reverse: false, pagination_params: nil)
request = Gitaly::ListCommitsRequest.new( request = Gitaly::ListCommitsRequest.new(
repository: @gitaly_repo, repository: @gitaly_repo,
revisions: Array.wrap(revisions), revisions: Array.wrap(revisions),
reverse: reverse reverse: reverse,
pagination_params: pagination_params
) )
response = GitalyClient.call(@repository.storage, :commit_service, :list_commits, request, timeout: GitalyClient.medium_timeout) response = GitalyClient.call(@repository.storage, :commit_service, :list_commits, request, timeout: GitalyClient.medium_timeout)
......
...@@ -364,12 +364,40 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do ...@@ -364,12 +364,40 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do
end end
describe '.between' do describe '.between' do
subject do let(:limit) { nil }
commits = described_class.between(repository, SeedRepo::Commit::PARENT_ID, SeedRepo::Commit::ID) let(:commit_ids) { commits.map(&:id) }
commits.map { |c| c.id }
subject(:commits) { described_class.between(repository, from, to, limit: limit) }
context 'requesting a single commit' do
let(:from) { SeedRepo::Commit::PARENT_ID }
let(:to) { SeedRepo::Commit::ID }
it { expect(commit_ids).to contain_exactly(to) }
end end
it { is_expected.to contain_exactly(SeedRepo::Commit::ID) } context 'requesting a commit range' do
let(:from) { 'v1.0.0' }
let(:to) { 'v1.2.0' }
let(:commits_in_range) do
%w[
570e7b2abdd848b95f2f578043fc23bd6f6fd24d
5937ac0a7beb003549fc5fd26fc247adbce4a52e
eb49186cfa5c4338011f5f590fac11bd66c5c631
]
end
context 'no limit' do
it { expect(commit_ids).to eq(commits_in_range) }
end
context 'limited' do
let(:limit) { 2 }
it { expect(commit_ids).to eq(commits_in_range.last(2)) }
end
end
end end
describe '.shas_with_signatures' do describe '.shas_with_signatures' do
......
...@@ -311,6 +311,10 @@ RSpec.describe Gitlab::GitalyClient::CommitService do ...@@ -311,6 +311,10 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
end end
describe '#list_commits' do describe '#list_commits' do
let(:revisions) { 'master' }
let(:reverse) { false }
let(:pagination_params) { nil }
shared_examples 'a ListCommits request' do shared_examples 'a ListCommits request' do
before do before do
::Gitlab::GitalyClient.clear_stubs! ::Gitlab::GitalyClient.clear_stubs!
...@@ -318,26 +322,35 @@ RSpec.describe Gitlab::GitalyClient::CommitService do ...@@ -318,26 +322,35 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
it 'sends a list_commits message' do it 'sends a list_commits message' do
expect_next_instance_of(Gitaly::CommitService::Stub) do |service| expect_next_instance_of(Gitaly::CommitService::Stub) do |service|
expect(service) expected_request = gitaly_request_with_params(
.to receive(:list_commits) Array.wrap(revisions),
.with(gitaly_request_with_params(expected_params), kind_of(Hash)) reverse: reverse,
.and_return([]) pagination_params: pagination_params
)
expect(service).to receive(:list_commits).with(expected_request, kind_of(Hash)).and_return([])
end end
client.list_commits(revisions) client.list_commits(revisions, reverse: reverse, pagination_params: pagination_params)
end end
end end
context 'with a single revision' do it_behaves_like 'a ListCommits request'
let(:revisions) { 'master' }
let(:expected_params) { %w[master] } context 'with multiple revisions' do
let(:revisions) { %w[master --not --all] }
it_behaves_like 'a ListCommits request'
end
context 'with reverse: true' do
let(:reverse) { true }
it_behaves_like 'a ListCommits request' it_behaves_like 'a ListCommits request'
end end
context 'with multiple revisions' do context 'with pagination params' do
let(:revisions) { %w[master --not --all] } let(:pagination_params) { { limit: 1, page_token: 'foo' } }
let(:expected_params) { %w[master --not --all] }
it_behaves_like 'a ListCommits request' it_behaves_like 'a ListCommits request'
end end
......
...@@ -470,6 +470,29 @@ RSpec.describe Repository do ...@@ -470,6 +470,29 @@ RSpec.describe Repository do
end end
end end
describe '#commits_between' do
let(:commit) { project.commit }
it 'delegates to Gitlab::Git::Commit#between, returning decorated commits' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: 5)
.and_call_original
result = repository.commits_between(commit.parent_id, commit.id, limit: 5)
expect(result).to contain_exactly(instance_of(Commit), instance_of(Commit))
end
it 'defaults to no limit' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: nil)
repository.commits_between(commit.parent_id, commit.id)
end
end
describe '#find_commits_by_message' do describe '#find_commits_by_message' do
it 'returns commits with messages containing a given string' do it 'returns commits with messages containing a given string' do
commit_ids = repository.find_commits_by_message('submodule').map(&:id) commit_ids = repository.find_commits_by_message('submodule').map(&:id)
......
...@@ -19,9 +19,13 @@ RSpec.describe Git::BaseHooksService do ...@@ -19,9 +19,13 @@ RSpec.describe Git::BaseHooksService do
:push_hooks :push_hooks
end end
def commits def limited_commits
[] []
end end
def commits_count
0
end
end end
end end
......
...@@ -362,6 +362,9 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -362,6 +362,9 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
end end
end end
let(:commits_count) { service.send(:commits_count) }
let(:threshold_limit) { described_class::PROCESS_COMMIT_LIMIT + 1 }
let(:oldrev) { project.commit(commit_ids.first).parent_id } let(:oldrev) { project.commit(commit_ids.first).parent_id }
let(:newrev) { commit_ids.last } let(:newrev) { commit_ids.last }
...@@ -373,17 +376,31 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -373,17 +376,31 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA } let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits)
.with(newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute service.execute
expect(commits_count).to eq(project.repository.commit_count_for_ref(newrev))
end end
end end
context 'updating the default branch' do context 'updating the default branch' do
it 'processes a limited number of commit messages' do it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end end
end end
...@@ -391,9 +408,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -391,9 +408,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA } let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async) expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute service.execute
expect(commits_count).to eq(0)
end end
end end
...@@ -402,9 +423,16 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -402,9 +423,16 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA } let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(project.default_branch, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute service.execute
expect(commits_count).to eq(project.repository.count_commits_between(project.default_branch, branch))
end end
end end
...@@ -412,9 +440,15 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -412,9 +440,15 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:branch) { 'fix' } let(:branch) { 'fix' }
it 'processes a limited number of commit messages' do it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end end
end end
...@@ -423,9 +457,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do ...@@ -423,9 +457,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA } let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async) expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute service.execute
expect(commits_count).to eq(0)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment