Commit ff78882f authored by Yorick Peterse's avatar Yorick Peterse

Add finder for getting commits with a trailer set

This class fetches all commits in a range in batches, selecting only
commits with a certain trailer set. This class will be used to fetch the
commits that should be used to generate a changelog.

This builds on the changes introduced in
https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2842 and
https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2999. To take
advantage of these changes, we also bump the Gitaly Gem version and add
fallback code for getting trailers using Rugged; as Rugged is still in
use. In addition, we change the merge_request_diff_commits table and the
MergeRequestDiffCommit model to support storing the trailers as JSON.

See https://gitlab.com/gitlab-com/gl-infra/delivery/-/issues/1365 for
more information.
parent ce7382e7
...@@ -462,7 +462,7 @@ group :ed25519 do ...@@ -462,7 +462,7 @@ group :ed25519 do
end end
# Gitaly GRPC protocol definitions # Gitaly GRPC protocol definitions
gem 'gitaly', '~> 13.8.0.pre.rc2' gem 'gitaly', '~> 13.8.0.pre.rc3'
gem 'grpc', '~> 1.30.2' gem 'grpc', '~> 1.30.2'
......
...@@ -417,7 +417,7 @@ GEM ...@@ -417,7 +417,7 @@ GEM
rails (>= 3.2.0) rails (>= 3.2.0)
git (1.7.0) git (1.7.0)
rchardet (~> 1.8) rchardet (~> 1.8)
gitaly (13.8.0.pre.rc2) gitaly (13.8.0.pre.rc3)
grpc (~> 1.0) grpc (~> 1.0)
github-markup (1.7.0) github-markup (1.7.0)
gitlab-chronic (0.10.5) gitlab-chronic (0.10.5)
...@@ -1357,7 +1357,7 @@ DEPENDENCIES ...@@ -1357,7 +1357,7 @@ DEPENDENCIES
gettext (~> 3.3) gettext (~> 3.3)
gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails (~> 1.8.0)
gettext_i18n_rails_js (~> 1.3) gettext_i18n_rails_js (~> 1.3)
gitaly (~> 13.8.0.pre.rc2) gitaly (~> 13.8.0.pre.rc3)
github-markup (~> 1.7.0) github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5) gitlab-chronic (~> 0.10.5)
gitlab-experiment (~> 0.4.5) gitlab-experiment (~> 0.4.5)
......
# frozen_string_literal: true
module Repositories
# Finder for obtaining commits between two refs, with a Git trailer set.
class CommitsWithTrailerFinder
# The maximum number of commits to retrieve per page.
#
# This value is arbitrarily chosen. Lowering it means more Gitaly calls, but
# less data being loaded into memory at once. Increasing it has the opposite
# effect.
#
# This amount is based around the number of commits that usually go in a
# GitLab release. Some examples for GitLab's own releases:
#
# * 13.6.0: 4636 commits
# * 13.5.0: 5912 commits
# * 13.4.0: 5541 commits
#
# Using this limit should result in most (very large) projects only needing
# 5-10 Gitaly calls, while keeping memory usage at a reasonable amount.
COMMITS_PER_PAGE = 1024
# The `project` argument specifies the project for which to obtain the
# commits.
#
# The `from` and `to` arguments specify the range of commits to include. The
# commit specified in `from` won't be included itself. The commit specified
# in `to` _is_ included.
#
# The `per_page` argument specifies how many commits are retrieved in a single
# Gitaly API call.
def initialize(project:, from:, to:, per_page: COMMITS_PER_PAGE)
@project = project
@from = from
@to = to
@per_page = per_page
end
# Fetches all commits that have the given trailer set.
#
# The commits are yielded to the supplied block in batches. This allows
# other code to process these commits in batches too, instead of first
# having to load all commits into memory.
#
# Example:
#
# CommitsWithTrailerFinder.new(...).each_page('Signed-off-by') do |commits|
# commits.each do |commit|
# ...
# end
# end
def each_page(trailer)
return to_enum(__method__, trailer) unless block_given?
offset = 0
response = fetch_commits
while response.any?
commits = []
response.each do |commit|
commits.push(commit) if commit.trailers.key?(trailer)
end
yield commits
offset += response.length
response = fetch_commits(offset)
end
end
private
def fetch_commits(offset = 0)
range = "#{@from}..#{@to}"
@project
.repository
.commits(range, limit: @per_page, offset: offset, trailers: true)
end
end
end
...@@ -12,6 +12,9 @@ class MergeRequestContextCommit < ApplicationRecord ...@@ -12,6 +12,9 @@ class MergeRequestContextCommit < ApplicationRecord
validates :sha, presence: true validates :sha, presence: true
validates :sha, uniqueness: { message: 'has already been added' } validates :sha, uniqueness: { message: 'has already been added' }
serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize
validates :trailers, json_schema: { filename: 'git_trailers' }
# Sort by committed date in descending order to ensure latest commits comes on the top # Sort by committed date in descending order to ensure latest commits comes on the top
scope :order_by_committed_date_desc, -> { order('committed_date DESC') } scope :order_by_committed_date_desc, -> { order('committed_date DESC') }
......
...@@ -10,6 +10,9 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -10,6 +10,9 @@ class MergeRequestDiffCommit < ApplicationRecord
sha_attribute :sha sha_attribute :sha
alias_attribute :id, :sha alias_attribute :id, :sha
serialize :trailers, Serializers::JSON # rubocop:disable Cop/ActiveRecordSerialize
validates :trailers, json_schema: { filename: 'git_trailers' }
# Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead. # Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead.
# cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress # cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress
def self.create_bulk(merge_request_diff_id, commits) def self.create_bulk(merge_request_diff_id, commits)
...@@ -23,7 +26,8 @@ class MergeRequestDiffCommit < ApplicationRecord ...@@ -23,7 +26,8 @@ class MergeRequestDiffCommit < ApplicationRecord
relative_order: index, relative_order: index,
sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize
authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]), authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]),
committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]) committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]),
trailers: commit_hash.fetch(:trailers, {}).to_json
) )
end end
......
...@@ -151,7 +151,8 @@ class Repository ...@@ -151,7 +151,8 @@ class Repository
all: !!opts[:all], all: !!opts[:all],
first_parent: !!opts[:first_parent], first_parent: !!opts[:first_parent],
order: opts[:order], order: opts[:order],
literal_pathspec: opts.fetch(:literal_pathspec, true) literal_pathspec: opts.fetch(:literal_pathspec, true),
trailers: opts[:trailers]
} }
commits = Gitlab::Git::Commit.where(options) commits = Gitlab::Git::Commit.where(options)
......
...@@ -66,7 +66,8 @@ module MergeRequests ...@@ -66,7 +66,8 @@ module MergeRequests
relative_order: index, relative_order: index,
sha: sha, sha: sha,
authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]), authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]),
committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]) committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]),
trailers: commit_hash.fetch(:trailers, {}).to_json
) )
end end
end end
......
{
"description": "Git trailer key/value pairs",
"type": "object",
"patternProperties": {
".*": {
"type": "string"
}
}
}
---
title: Add finder for getting commits with a trailer set
merge_request: 49243
author:
type: added
# frozen_string_literal: true
# See https://docs.gitlab.com/ee/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddMergeRequestDiffCommitTrailers < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
with_lock_retries do
add_column :merge_request_diff_commits, :trailers, :jsonb, default: {}, null: false
end
end
def down
with_lock_retries do
remove_column :merge_request_diff_commits, :trailers
end
end
end
# frozen_string_literal: true
# See https://docs.gitlab.com/ee/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddMergeRequestContextCommitTrailers < ActiveRecord::Migration[6.0]
DOWNTIME = false
def change
add_column :merge_request_context_commits, :trailers, :jsonb, default: {}, null: false
end
end
4aeff45663a9f5a41a8dd92298afb4b0b57aa6f190f4648455df2fa1e39e174f
\ No newline at end of file
3e867ceefcab4f043b89d3c04e6e0a1182423039e1a9245e611128efe77c0e88
\ No newline at end of file
...@@ -13863,7 +13863,8 @@ CREATE TABLE merge_request_context_commits ( ...@@ -13863,7 +13863,8 @@ CREATE TABLE merge_request_context_commits (
committer_name text, committer_name text,
committer_email text, committer_email text,
message text, message text,
merge_request_id bigint merge_request_id bigint,
trailers jsonb DEFAULT '{}'::jsonb NOT NULL
); );
CREATE SEQUENCE merge_request_context_commits_id_seq CREATE SEQUENCE merge_request_context_commits_id_seq
...@@ -13885,7 +13886,8 @@ CREATE TABLE merge_request_diff_commits ( ...@@ -13885,7 +13886,8 @@ CREATE TABLE merge_request_diff_commits (
author_email text, author_email text,
committer_name text, committer_name text,
committer_email text, committer_email text,
message text message text,
trailers jsonb DEFAULT '{}'::jsonb NOT NULL
); );
CREATE TABLE merge_request_diff_details ( CREATE TABLE merge_request_diff_details (
......
...@@ -16,7 +16,7 @@ module Gitlab ...@@ -16,7 +16,7 @@ module Gitlab
SERIALIZE_KEYS = [ SERIALIZE_KEYS = [
:id, :message, :parent_ids, :id, :message, :parent_ids,
:authored_date, :author_name, :author_email, :authored_date, :author_name, :author_email,
:committed_date, :committer_name, :committer_email :committed_date, :committer_name, :committer_email, :trailers
].freeze ].freeze
attr_accessor(*SERIALIZE_KEYS) attr_accessor(*SERIALIZE_KEYS)
...@@ -389,6 +389,7 @@ module Gitlab ...@@ -389,6 +389,7 @@ module Gitlab
@committer_name = commit.committer.name.dup @committer_name = commit.committer.name.dup
@committer_email = commit.committer.email.dup @committer_email = commit.committer.email.dup
@parent_ids = Array(commit.parent_ids) @parent_ids = Array(commit.parent_ids)
@trailers = Hash[commit.trailers.map { |t| [t.key, t.value] }]
end end
# Gitaly provides a UNIX timestamp in author.date.seconds, and a timezone # Gitaly provides a UNIX timestamp in author.date.seconds, and a timezone
......
...@@ -103,6 +103,7 @@ module Gitlab ...@@ -103,6 +103,7 @@ module Gitlab
@committer_name = committer[:name] @committer_name = committer[:name]
@committer_email = committer[:email] @committer_email = committer[:email]
@parent_ids = commit.parents.map(&:oid) @parent_ids = commit.parents.map(&:oid)
@trailers = Hash[commit.trailers]
end end
end end
end end
......
...@@ -335,7 +335,8 @@ module Gitlab ...@@ -335,7 +335,8 @@ module Gitlab
all: !!options[:all], all: !!options[:all],
first_parent: !!options[:first_parent], first_parent: !!options[:first_parent],
global_options: parse_global_options!(options), global_options: parse_global_options!(options),
disable_walk: true # This option is deprecated. The 'walk' implementation is being removed. disable_walk: true, # This option is deprecated. The 'walk' implementation is being removed.
trailers: options[:trailers]
) )
request.after = GitalyClient.timestamp(options[:after]) if options[:after] request.after = GitalyClient.timestamp(options[:after]) if options[:after]
request.before = GitalyClient.timestamp(options[:before]) if options[:before] request.before = GitalyClient.timestamp(options[:before]) if options[:before]
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Repositories::CommitsWithTrailerFinder do
let(:project) { create(:project, :repository) }
describe '#each_page' do
it 'only yields commits with the given trailer' do
finder = described_class.new(
project: project,
from: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d',
to: 'c7fbe50c7c7419d9701eebe64b1fdacc3df5b9dd'
)
commits = finder.each_page('Signed-off-by').to_a.flatten
expect(commits.length).to eq(1)
expect(commits.first.id).to eq('5937ac0a7beb003549fc5fd26fc247adbce4a52e')
expect(commits.first.trailers).to eq(
'Signed-off-by' => 'Dmitriy Zaporozhets <dmitriy.zaporozhets@gmail.com>'
)
end
it 'supports paginating of commits' do
finder = described_class.new(
project: project,
from: 'c1acaa58bbcbc3eafe538cb8274ba387047b69f8',
to: '5937ac0a7beb003549fc5fd26fc247adbce4a52e',
per_page: 1
)
commits = finder.each_page('Signed-off-by')
expect(commits.count).to eq(4)
end
end
end
...@@ -720,7 +720,8 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do ...@@ -720,7 +720,8 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do
committer_name: "Dmitriy Zaporozhets", committer_name: "Dmitriy Zaporozhets",
id: SeedRepo::Commit::ID, id: SeedRepo::Commit::ID,
message: "tree css fixes", message: "tree css fixes",
parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"] parent_ids: ["874797c3a73b60d2187ed6e2fcabd289ff75171e"],
trailers: {}
} }
end end
end end
...@@ -231,6 +231,7 @@ MergeRequestDiffCommit: ...@@ -231,6 +231,7 @@ MergeRequestDiffCommit:
- committer_name - committer_name
- committer_email - committer_email
- message - message
- trailers
MergeRequestDiffFile: MergeRequestDiffFile:
- merge_request_diff_id - merge_request_diff_id
- relative_order - relative_order
...@@ -255,6 +256,7 @@ MergeRequestContextCommit: ...@@ -255,6 +256,7 @@ MergeRequestContextCommit:
- committer_email - committer_email
- message - message
- merge_request_id - merge_request_id
- trailers
MergeRequestContextCommitDiffFile: MergeRequestContextCommitDiffFile:
- sha - sha
- relative_order - relative_order
......
...@@ -48,7 +48,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -48,7 +48,8 @@ RSpec.describe MergeRequestDiffCommit do
"committer_email": "dmitriy.zaporozhets@gmail.com", "committer_email": "dmitriy.zaporozhets@gmail.com",
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 0, "relative_order": 0,
"sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e") "sha": Gitlab::Database::ShaAttribute.serialize("5937ac0a7beb003549fc5fd26fc247adbce4a52e"),
"trailers": {}.to_json
}, },
{ {
"message": "Change some files\n\nSigned-off-by: Dmitriy Zaporozhets \u003cdmitriy.zaporozhets@gmail.com\u003e\n", "message": "Change some files\n\nSigned-off-by: Dmitriy Zaporozhets \u003cdmitriy.zaporozhets@gmail.com\u003e\n",
...@@ -60,7 +61,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -60,7 +61,8 @@ RSpec.describe MergeRequestDiffCommit do
"committer_email": "dmitriy.zaporozhets@gmail.com", "committer_email": "dmitriy.zaporozhets@gmail.com",
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 1, "relative_order": 1,
"sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d") "sha": Gitlab::Database::ShaAttribute.serialize("570e7b2abdd848b95f2f578043fc23bd6f6fd24d"),
"trailers": {}.to_json
} }
] ]
end end
...@@ -92,7 +94,8 @@ RSpec.describe MergeRequestDiffCommit do ...@@ -92,7 +94,8 @@ RSpec.describe MergeRequestDiffCommit do
"committer_email": "alejorro70@gmail.com", "committer_email": "alejorro70@gmail.com",
"merge_request_diff_id": merge_request_diff_id, "merge_request_diff_id": merge_request_diff_id,
"relative_order": 0, "relative_order": 0,
"sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69") "sha": Gitlab::Database::ShaAttribute.serialize("ba3343bc4fa403a8dfbfcab7fc1a8c29ee34bd69"),
"trailers": {}.to_json
}] }]
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment