Commit 81be7217 authored by Nick Thomas's avatar Nick Thomas

Never fetch more than 101 commits when processing a git push

This change limits the number of commits we'll pull from Gitaly when
processing hooks for branch/tag push/delete actions to at-most 101.

This limit already exists when creating the initial default branch, but
when creating a new non-default branch, or pushing to an existing one,
we would read all the commits in the branch from Gitaly before throwing
all but the most recent 100 away and processing those for the hooks.

No behaviour changes are implied by this change, we're just skipping
unnecessary work by taking advantage of the new `limit` option to the
CommitsBetween RPC.

Since we never want the full list of `commits`, I change the interface
declared in `Git::BaseHooksService` to make it clearer as well.

Changelog: performance
parent c64ee58c
......@@ -161,8 +161,8 @@ class Repository
CommitCollection.new(container, commits, ref)
end
def commits_between(from, to)
commits = Gitlab::Git::Commit.between(raw_repository, from, to)
def commits_between(from, to, limit: nil)
commits = Gitlab::Git::Commit.between(raw_repository, from, to, limit: limit)
commits = Commit.decorate(commits, container) if commits.present?
commits
end
......
......@@ -25,17 +25,13 @@ module Git
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
# The changeset, ordered with the newest commit last
def commits
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
# This should return PROCESS_COMMIT_LIMIT commits, ordered with newest last
def limited_commits
@limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT)
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
def commits_count
commits.count
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
def event_message
......
......@@ -18,20 +18,25 @@ module Git
:push_hooks
end
def commits
strong_memoize(:commits) do
def limited_commits
strong_memoize(:limited_commits) { threshold_commits.last(PROCESS_COMMIT_LIMIT) }
end
# Taking limit+1 commits allows us to detect when the limit is in effect
def threshold_commits
strong_memoize(:threshold_commits) do
if creating_default_branch?
# The most recent PROCESS_COMMIT_LIMIT commits in the default branch.
# They are returned newest-to-oldest, but we need to present them oldest-to-newest
project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT).reverse
project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT + 1).reverse!
elsif creating_branch?
# Use the pushed commits that aren't reachable by the default branch
# as a heuristic. This may include more commits than are actually
# pushed, but that shouldn't matter because we check for existing
# cross-references later.
project.repository.commits_between(project.default_branch, newrev)
project.repository.commits_between(project.default_branch, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
elsif updating_branch?
project.repository.commits_between(oldrev, newrev)
project.repository.commits_between(oldrev, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
else # removing branch
[]
end
......@@ -39,9 +44,21 @@ module Git
end
def commits_count
return count_commits_in_branch if creating_default_branch?
strong_memoize(:commits_count) do
next threshold_commits.count if
strong_memoized?(:threshold_commits) &&
threshold_commits.count <= PROCESS_COMMIT_LIMIT
super
if creating_default_branch?
project.repository.commit_count_for_ref(ref)
elsif creating_branch?
project.repository.count_commits_between(project.default_branch, newrev)
elsif updating_branch?
project.repository.count_commits_between(oldrev, newrev)
else # removing branch
0
end
end
end
override :invalidated_file_types
......@@ -179,12 +196,6 @@ module Git
creating_branch? && default_branch?
end
def count_commits_in_branch
strong_memoize(:count_commits_in_branch) do
project.repository.commit_count_for_ref(ref)
end
end
def default_branch?
strong_memoize(:default_branch) do
[nil, branch_name].include?(project.default_branch)
......
......@@ -8,10 +8,14 @@ module Git
:tag_push_hooks
end
def commits
def limited_commits
[tag_commit].compact
end
def commits_count
limited_commits.count
end
def event_message
tag&.message
end
......
......@@ -108,17 +108,26 @@ module Gitlab
# See also #repository.commits_between
#
# Ex.
# Commit.between(repo, '29eda46b', 'master')
# Commit.between(repo, '29eda46b', 'master') # all commits, ordered oldest to newest
# Commit.between(repo, '29eda46b', 'master', limit: 100) # 100 newest commits, ordered oldest to newest
#
def between(repo, base, head)
def between(repo, base, head, limit: nil)
# In either of these cases, we are guaranteed to return no commits, so
# shortcut the RPC call
return [] if Gitlab::Git.blank_ref?(base) || Gitlab::Git.blank_ref?(head)
wrapped_gitaly_errors do
revisions = [head, "^#{base}"] # base..head
repo.gitaly_commit_client.list_commits(revisions, reverse: true)
client = repo.gitaly_commit_client
# We must return the commits in chronological order but using both
# limit and reverse in the Gitaly RPC would return the oldest N,
# rather than newest N, commits, so reorder in Ruby with limit
if limit
client.list_commits(revisions, pagination_params: { limit: limit }).reverse!
else
client.list_commits(revisions, reverse: true)
end
end
end
......
......@@ -255,11 +255,12 @@ module Gitlab
consume_commits_response(response)
end
def list_commits(revisions, reverse: false)
def list_commits(revisions, reverse: false, pagination_params: nil)
request = Gitaly::ListCommitsRequest.new(
repository: @gitaly_repo,
revisions: Array.wrap(revisions),
reverse: reverse
reverse: reverse,
pagination_params: pagination_params
)
response = GitalyClient.call(@repository.storage, :commit_service, :list_commits, request, timeout: GitalyClient.medium_timeout)
......
......@@ -364,12 +364,40 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do
end
describe '.between' do
subject do
commits = described_class.between(repository, SeedRepo::Commit::PARENT_ID, SeedRepo::Commit::ID)
commits.map { |c| c.id }
let(:limit) { nil }
let(:commit_ids) { commits.map(&:id) }
subject(:commits) { described_class.between(repository, from, to, limit: limit) }
context 'requesting a single commit' do
let(:from) { SeedRepo::Commit::PARENT_ID }
let(:to) { SeedRepo::Commit::ID }
it { expect(commit_ids).to contain_exactly(to) }
end
it { is_expected.to contain_exactly(SeedRepo::Commit::ID) }
context 'requesting a commit range' do
let(:from) { 'v1.0.0' }
let(:to) { 'v1.2.0' }
let(:commits_in_range) do
%w[
570e7b2abdd848b95f2f578043fc23bd6f6fd24d
5937ac0a7beb003549fc5fd26fc247adbce4a52e
eb49186cfa5c4338011f5f590fac11bd66c5c631
]
end
context 'no limit' do
it { expect(commit_ids).to eq(commits_in_range) }
end
context 'limited' do
let(:limit) { 2 }
it { expect(commit_ids).to eq(commits_in_range.last(2)) }
end
end
end
describe '.shas_with_signatures' do
......
......@@ -311,6 +311,10 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
end
describe '#list_commits' do
let(:revisions) { 'master' }
let(:reverse) { false }
let(:pagination_params) { nil }
shared_examples 'a ListCommits request' do
before do
::Gitlab::GitalyClient.clear_stubs!
......@@ -318,26 +322,35 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
it 'sends a list_commits message' do
expect_next_instance_of(Gitaly::CommitService::Stub) do |service|
expect(service)
.to receive(:list_commits)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([])
expected_request = gitaly_request_with_params(
Array.wrap(revisions),
reverse: reverse,
pagination_params: pagination_params
)
expect(service).to receive(:list_commits).with(expected_request, kind_of(Hash)).and_return([])
end
client.list_commits(revisions)
client.list_commits(revisions, reverse: reverse, pagination_params: pagination_params)
end
end
context 'with a single revision' do
let(:revisions) { 'master' }
let(:expected_params) { %w[master] }
it_behaves_like 'a ListCommits request'
context 'with multiple revisions' do
let(:revisions) { %w[master --not --all] }
it_behaves_like 'a ListCommits request'
end
context 'with reverse: true' do
let(:reverse) { true }
it_behaves_like 'a ListCommits request'
end
context 'with multiple revisions' do
let(:revisions) { %w[master --not --all] }
let(:expected_params) { %w[master --not --all] }
context 'with pagination params' do
let(:pagination_params) { { limit: 1, page_token: 'foo' } }
it_behaves_like 'a ListCommits request'
end
......
......@@ -470,6 +470,29 @@ RSpec.describe Repository do
end
end
describe '#commits_between' do
let(:commit) { project.commit }
it 'delegates to Gitlab::Git::Commit#between, returning decorated commits' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: 5)
.and_call_original
result = repository.commits_between(commit.parent_id, commit.id, limit: 5)
expect(result).to contain_exactly(instance_of(Commit), instance_of(Commit))
end
it 'defaults to no limit' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: nil)
repository.commits_between(commit.parent_id, commit.id)
end
end
describe '#find_commits_by_message' do
it 'returns commits with messages containing a given string' do
commit_ids = repository.find_commits_by_message('submodule').map(&:id)
......
......@@ -19,9 +19,13 @@ RSpec.describe Git::BaseHooksService do
:push_hooks
end
def commits
def limited_commits
[]
end
def commits_count
0
end
end
end
......
......@@ -362,6 +362,9 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
end
end
let(:commits_count) { service.send(:commits_count) }
let(:threshold_limit) { described_class::PROCESS_COMMIT_LIMIT + 1 }
let(:oldrev) { project.commit(commit_ids.first).parent_id }
let(:newrev) { commit_ids.last }
......@@ -373,17 +376,31 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits)
.with(newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.commit_count_for_ref(newrev))
end
end
context 'updating the default branch' do
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end
end
......@@ -391,9 +408,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute
expect(commits_count).to eq(0)
end
end
......@@ -402,9 +423,16 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(project.default_branch, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(project.default_branch, branch))
end
end
......@@ -412,9 +440,15 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:branch) { 'fix' }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end
end
......@@ -423,9 +457,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute
expect(commits_count).to eq(0)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment