Commit 1eddbfca authored by Rubén Dávila's avatar Rubén Dávila

Fix timeout when MaxFileSize push rule was active on large repo

Instead of inspecting each commit delta, we rely on the usage of `git diff`
and `git cat-file` commands.
parent 326e39bb
...@@ -36,7 +36,6 @@ class PushRule < ActiveRecord::Base ...@@ -36,7 +36,6 @@ class PushRule < ActiveRecord::Base
commit_committer_check || commit_committer_check ||
member_check || member_check ||
file_name_regex.present? || file_name_regex.present? ||
max_file_size > 0 ||
prevent_secrets prevent_secrets
end end
......
---
title: Large pushes were failing when max file size push rule was active
merge_request: 4989
author:
type: fixed
...@@ -20,6 +20,7 @@ module EE ...@@ -20,6 +20,7 @@ module EE
super(skip_commits_check: true) super(skip_commits_check: true)
push_rule_check push_rule_check
file_size_check
# Check of commits should happen as the last step # Check of commits should happen as the last step
# given they're expensive in terms of performance # given they're expensive in terms of performance
commits_check commits_check
...@@ -29,6 +30,30 @@ module EE ...@@ -29,6 +30,30 @@ module EE
private private
def file_size_check
return if push_rule.nil? || push_rule.max_file_size.zero?
max_file_size = push_rule.max_file_size
large_file = changes.find do |c|
size_in_mb = ::Gitlab::Utils.bytes_to_megabytes(c.blob_size)
c.operation != :deleted && size_in_mb > max_file_size
end
if large_file
raise ::Gitlab::GitAccess::UnauthorizedError, %Q{File "#{large_file.new_path}" is larger than the allowed size of #{max_file_size} MB}
end
end
def changes
strong_memoize(:changes) do
return [] unless newrev
project.repository.raw_changes_between(oldrev, newrev)
end
end
def push_rule def push_rule
project.push_rule project.push_rule
end end
...@@ -152,9 +177,7 @@ module EE ...@@ -152,9 +177,7 @@ module EE
def push_rule_checks_commit? def push_rule_checks_commit?
return false unless push_rule return false unless push_rule
push_rule.max_file_size > 0 || push_rule.file_name_regex.present? || push_rule.prevent_secrets
push_rule.file_name_regex.present? ||
push_rule.prevent_secrets
end end
override :validations_for_commit override :validations_for_commit
...@@ -168,11 +191,7 @@ module EE ...@@ -168,11 +191,7 @@ module EE
def push_rule_commit_validations(commit) def push_rule_commit_validations(commit)
return [] unless push_rule return [] unless push_rule
[file_name_validation].tap do |validations| [file_name_validation]
if push_rule.max_file_size > 0
validations << file_size_validation(commit, push_rule.max_file_size)
end
end
end end
def validate_path_locks? def validate_path_locks?
...@@ -208,17 +227,6 @@ module EE ...@@ -208,17 +227,6 @@ module EE
end end
end end
end end
def file_size_validation(commit, max_file_size)
lambda do |diff|
return if diff.deleted_file
blob = project.repository.blob_at(commit.id, diff.new_path)
if blob && blob.size && blob.size > max_file_size.megabytes
return "File #{diff.new_path.inspect} is larger than the allowed size of #{max_file_size} MB"
end
end
end
end end
end end
end end
......
...@@ -250,7 +250,7 @@ describe Gitlab::Checks::ChangeAccess do ...@@ -250,7 +250,7 @@ describe Gitlab::Checks::ChangeAccess do
let(:push_rule) { create(:push_rule, max_file_size: 1) } let(:push_rule) { create(:push_rule, max_file_size: 1) }
before do before do
allow_any_instance_of(Blob).to receive(:size).and_return(2.megabytes) allow_any_instance_of(::Gitlab::Git::RawDiffChange).to receive(:blob_size).and_return(2.megabytes)
end end
it_behaves_like 'check ignored when push rule unlicensed' it_behaves_like 'check ignored when push rule unlicensed'
......
require 'spec_helper' require 'spec_helper'
describe PushRule do describe PushRule do
using RSpec::Parameterized::TableSyntax
let(:global_push_rule) { create(:push_rule_sample) } let(:global_push_rule) { create(:push_rule_sample) }
let(:push_rule) { create(:push_rule) } let(:push_rule) { create(:push_rule) }
let(:user) { create(:user) } let(:user) { create(:user) }
...@@ -83,32 +85,32 @@ describe PushRule do ...@@ -83,32 +85,32 @@ describe PushRule do
describe '#commit_validation?' do describe '#commit_validation?' do
let(:settings_with_global_default) { %i(reject_unsigned_commits) } let(:settings_with_global_default) { %i(reject_unsigned_commits) }
settings = { where(:setting, :value, :result) do
commit_message_regex: 'regex', :commit_message_regex | 'regex' | true
branch_name_regex: 'regex', :branch_name_regex | 'regex' | true
author_email_regex: 'regex', :author_email_regex | 'regex' | true
file_name_regex: 'regex', :file_name_regex | 'regex' | true
reject_unsigned_commits: true, :reject_unsigned_commits | true | true
commit_committer_check: true, :commit_committer_check | true | true
member_check: true, :member_check | true | true
prevent_secrets: true, :prevent_secrets | true | true
max_file_size: 1 :max_file_size | 1 | false
} end
settings.each do |setting, value| with_them do
context "when #{setting} is enabled at global level" do context "when rule is enabled at global level" do
before do before do
global_push_rule.update_column(setting, value) global_push_rule.update_column(setting, value)
end end
it "returns true at project level" do it "returns the default value at project level" do
rule = project.push_rule rule = project.push_rule
if settings_with_global_default.include?(setting) if settings_with_global_default.include?(setting)
rule.update_column(setting, nil) rule.update_column(setting, nil)
end end
expect(rule.commit_validation?).to eq(true) expect(rule.commit_validation?).to eq(result)
end end
end end
end end
......
module Gitlab module Gitlab
module Git module Git
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and
# https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
BLANK_SHA = ('0' * 40).freeze BLANK_SHA = ('0' * 40).freeze
TAG_REF_PREFIX = "refs/tags/".freeze TAG_REF_PREFIX = "refs/tags/".freeze
BRANCH_REF_PREFIX = "refs/heads/".freeze BRANCH_REF_PREFIX = "refs/heads/".freeze
......
module Gitlab
module Git
# This class behaves like a struct with fields :blob_id, :blob_size, :operation, :old_path, :new_path
# All those fields are (binary) strings or integers
class RawDiffChange
attr_reader :blob_id, :blob_size, :old_path, :new_path, :operation
def initialize(raw_change)
parse(raw_change)
end
private
# Input data has the following format:
#
# When a file has been modified:
# 7e3e39ebb9b2bf433b4ad17313770fbe4051649c 669 M\tfiles/ruby/popen.rb
#
# When a file has been renamed:
# 85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee
def parse(raw_change)
@blob_id, @blob_size, @raw_operation, raw_paths = raw_change.split(' ', 4)
@operation = extract_operation
@old_path, @new_path = extract_paths(raw_paths)
end
def extract_paths(file_path)
case operation
when :renamed
file_path.split(/\t/)
when :deleted
[file_path, nil]
when :added
[nil, file_path]
else
[file_path, file_path]
end
end
def extract_operation
case @raw_operation&.first(1)
when 'A'
:added
when 'C'
:copied
when 'D'
:deleted
when 'M'
:modified
when 'R'
:renamed
when 'T'
:type_changed
else
:unknown
end
end
end
end
end
...@@ -559,6 +559,24 @@ module Gitlab ...@@ -559,6 +559,24 @@ module Gitlab
count_commits(from: from, to: to, **options) count_commits(from: from, to: to, **options)
end end
# old_rev and new_rev are commit ID's
# the result of this method is an array of Gitlab::Git::RawDiffChange
def raw_changes_between(old_rev, new_rev)
result = []
circuit_breaker.perform do
Open3.pipeline_r(git_diff_cmd(old_rev, new_rev), format_git_cat_file_script, git_cat_file_cmd) do |last_stdout, wait_threads|
last_stdout.each_line { |line| result << ::Gitlab::Git::RawDiffChange.new(line.chomp!) }
if wait_threads.any? { |waiter| !waiter.value&.success? }
raise ::Gitlab::Git::Repository::GitError, "Unabled to obtain changes between #{old_rev} and #{new_rev}"
end
end
end
result
end
# Returns the SHA of the most recent common ancestor of +from+ and +to+ # Returns the SHA of the most recent common ancestor of +from+ and +to+
def merge_base(from, to) def merge_base(from, to)
gitaly_migrate(:merge_base) do |is_enabled| gitaly_migrate(:merge_base) do |is_enabled|
...@@ -2482,6 +2500,35 @@ module Gitlab ...@@ -2482,6 +2500,35 @@ module Gitlab
result.to_s(16) result.to_s(16)
end end
def build_git_cmd(*args)
object_directories = alternate_object_directories.join(File::PATH_SEPARATOR)
env = { 'PWD' => self.path }
env['GIT_ALTERNATE_OBJECT_DIRECTORIES'] = object_directories if object_directories.present?
[
env,
::Gitlab.config.git.bin_path,
*args,
{ chdir: self.path }
]
end
def git_diff_cmd(old_rev, new_rev)
old_rev = old_rev == ::Gitlab::Git::BLANK_SHA ? ::Gitlab::Git::EMPTY_TREE_ID : old_rev
build_git_cmd('diff', old_rev, new_rev, '--raw')
end
def git_cat_file_cmd
format = '%(objectname) %(objectsize) %(rest)'
build_git_cmd('cat-file', "--batch-check=#{format}")
end
def format_git_cat_file_script
File.expand_path('../support/format-git-cat-file-input', __FILE__)
end
end end
end end
end end
#!/usr/bin/env ruby
# This script formats the output of the `git diff <old_rev> <new_rev> --raw`
# command so it can be processed by `git cat-file`
# We need to convert this:
# ":100644 100644 5f53439... 85bc2f9... R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
# To:
# "85bc2f9 R\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
ARGF.each do |line|
_, _, old_blob_id, new_blob_id, rest = line.split(/\s/, 5)
old_blob_id.gsub!(/[^\h]/, '')
new_blob_id.gsub!(/[^\h]/, '')
# We can't pass '0000000...' to `git cat-file` given it will not return info about the deleted file
blob_id = new_blob_id =~ /\A0+\z/ ? old_blob_id : new_blob_id
$stdout.puts "#{blob_id} #{rest}"
end
...@@ -3,10 +3,6 @@ module Gitlab ...@@ -3,10 +3,6 @@ module Gitlab
class CommitService class CommitService
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
def initialize(repository) def initialize(repository)
@gitaly_repo = repository.gitaly_repository @gitaly_repo = repository.gitaly_repository
@repository = repository @repository = repository
...@@ -37,7 +33,7 @@ module Gitlab ...@@ -37,7 +33,7 @@ module Gitlab
def diff(from, to, options = {}) def diff(from, to, options = {})
from_id = case from from_id = case from
when NilClass when NilClass
EMPTY_TREE_ID Gitlab::Git::EMPTY_TREE_ID
else else
if from.respond_to?(:oid) if from.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in # This is meant to match a Rugged::Commit. This should be impossible in
...@@ -50,7 +46,7 @@ module Gitlab ...@@ -50,7 +46,7 @@ module Gitlab
to_id = case to to_id = case to
when NilClass when NilClass
EMPTY_TREE_ID Gitlab::Git::EMPTY_TREE_ID
else else
if to.respond_to?(:oid) if to.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in # This is meant to match a Rugged::Commit. This should be impossible in
...@@ -352,7 +348,7 @@ module Gitlab ...@@ -352,7 +348,7 @@ module Gitlab
end end
def diff_from_parent_request_params(commit, options = {}) def diff_from_parent_request_params(commit, options = {})
parent_id = commit.parent_ids.first || EMPTY_TREE_ID parent_id = commit.parent_ids.first || Gitlab::Git::EMPTY_TREE_ID
diff_between_commits_request_params(parent_id, commit.id, options) diff_between_commits_request_params(parent_id, commit.id, options)
end end
......
...@@ -80,6 +80,10 @@ module Gitlab ...@@ -80,6 +80,10 @@ module Gitlab
size size
end end
def bytes_to_megabytes(bytes)
bytes.to_f / Numeric::MEGABYTE
end
# Accepts either an Array or a String and returns an array # Accepts either an Array or a String and returns an array
def ensure_array_from_string(string_or_array) def ensure_array_from_string(string_or_array)
return string_or_array if string_or_array.is_a?(Array) return string_or_array if string_or_array.is_a?(Array)
......
require 'spec_helper'
describe Gitlab::Git::RawDiffChange do
let(:raw_change) { }
let(:change) { described_class.new(raw_change) }
context 'bad input' do
let(:raw_change) { 'foo' }
it 'does not set most of the attrs' do
expect(change.blob_id).to eq('foo')
expect(change.operation).to eq(:unknown)
expect(change.old_path).to be_blank
expect(change.new_path).to be_blank
expect(change.blob_size).to be_blank
end
end
context 'adding a file' do
let(:raw_change) { '93e123ac8a3e6a0b600953d7598af629dec7b735 59 A bar/branch-test.txt' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:added)
expect(change.old_path).to be_blank
expect(change.new_path).to eq('bar/branch-test.txt')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'renaming a file' do
let(:raw_change) { "85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee" }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:renamed)
expect(change.old_path).to eq('files/js/commit.js.coffee')
expect(change.new_path).to eq('files/js/commit.coffee')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'modifying a file' do
let(:raw_change) { 'c60514b6d3d6bf4bec1030f70026e34dfbd69ad5 824 M README.md' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:modified)
expect(change.old_path).to eq('README.md')
expect(change.new_path).to eq('README.md')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'deleting a file' do
let(:raw_change) { '60d7a906c2fd9e4509aeb1187b98d0ea7ce827c9 15364 D files/.DS_Store' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:deleted)
expect(change.old_path).to eq('files/.DS_Store')
expect(change.new_path).to be_nil
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
end
...@@ -1043,6 +1043,44 @@ describe Gitlab::Git::Repository, seed_helper: true do ...@@ -1043,6 +1043,44 @@ describe Gitlab::Git::Repository, seed_helper: true do
it { is_expected.to eq(17) } it { is_expected.to eq(17) }
end end
describe '#raw_changes_between' do
let(:old_rev) { }
let(:new_rev) { }
let(:changes) { repository.raw_changes_between(old_rev, new_rev) }
context 'initial commit' do
let(:old_rev) { Gitlab::Git::BLANK_SHA }
let(:new_rev) { '1a0b36b3cdad1d2ee32457c102a8c0b7056fa863' }
it 'returns the changes' do
expect(changes).to be_present
expect(changes.size).to eq(3)
end
end
context 'with an invalid rev' do
let(:old_rev) { 'foo' }
let(:new_rev) { 'bar' }
it 'returns an error' do
expect { changes }.to raise_error(Gitlab::Git::Repository::GitError)
end
end
context 'with valid revs' do
let(:old_rev) { 'fa1b1e6c004a68b7d8763b86455da9e6b23e36d6' }
let(:new_rev) { '4b4918a572fa86f9771e5ba40fbd48e1eb03e2c6' }
it 'returns the changes' do
expect(changes.size).to eq(9)
expect(changes.first.operation).to eq(:modified)
expect(changes.first.new_path).to eq('.gitmodules')
expect(changes.last.operation).to eq(:added)
expect(changes.last.new_path).to eq('files/lfs/picture-invalid.png')
end
end
end
describe '#merge_base' do describe '#merge_base' do
shared_examples '#merge_base' do shared_examples '#merge_base' do
where(:from, :to, :result) do where(:from, :to, :result) do
......
...@@ -1084,11 +1084,7 @@ describe Gitlab::GitAccess do ...@@ -1084,11 +1084,7 @@ describe Gitlab::GitAccess do
describe "max file size check" do describe "max file size check" do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' } let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:end_sha) { '913c66a37b4a45b9769037c55c2d238bd0942d2e' } let(:end_sha) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
before do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(1.5.megabytes.to_i)
end
it "returns false when size is too large" do it "returns false when size is too large" do
project.create_push_rule(max_file_size: 1) project.create_push_rule(max_file_size: 1)
......
...@@ -33,7 +33,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -33,7 +33,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863').raw initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863').raw
request = Gitaly::CommitDiffRequest.new( request = Gitaly::CommitDiffRequest.new(
repository: repository_message, repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904', left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id, right_commit_id: initial_commit.id,
collapse_diffs: true, collapse_diffs: true,
enforce_limits: true, enforce_limits: true,
...@@ -77,7 +77,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -77,7 +77,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863') initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863')
request = Gitaly::CommitDeltaRequest.new( request = Gitaly::CommitDeltaRequest.new(
repository: repository_message, repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904', left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id right_commit_id: initial_commit.id
) )
...@@ -90,7 +90,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -90,7 +90,7 @@ describe Gitlab::GitalyClient::CommitService do
describe '#between' do describe '#between' do
let(:from) { 'master' } let(:from) { 'master' }
let(:to) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' } let(:to) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do it 'sends an RPC request' do
request = Gitaly::CommitsBetweenRequest.new( request = Gitaly::CommitsBetweenRequest.new(
...@@ -155,7 +155,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -155,7 +155,7 @@ describe Gitlab::GitalyClient::CommitService do
end end
describe '#find_commit' do describe '#find_commit' do
let(:revision) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' } let(:revision) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do it 'sends an RPC request' do
request = Gitaly::FindCommitRequest.new( request = Gitaly::FindCommitRequest.new(
repository: repository_message, revision: revision repository: repository_message, revision: revision
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::Utils do describe Gitlab::Utils do
delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, to: :described_class delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, :bytes_to_megabytes, to: :described_class
describe '.slugify' do describe '.slugify' do
{ {
...@@ -97,4 +97,12 @@ describe Gitlab::Utils do ...@@ -97,4 +97,12 @@ describe Gitlab::Utils do
expect(ensure_array_from_string(str)).to eq(%w[seven eight 9 10]) expect(ensure_array_from_string(str)).to eq(%w[seven eight 9 10])
end end
end end
describe '.bytes_to_megabytes' do
it 'converts bytes to megabytes' do
bytes = 1.megabyte
expect(bytes_to_megabytes(bytes)).to eq(1)
end
end
end end
require 'zlib' require 'zlib'
class BareRepoOperations class BareRepoOperations
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
include Gitlab::Popen include Gitlab::Popen
def initialize(path_to_repo) def initialize(path_to_repo)
@path_to_repo = path_to_repo @path_to_repo = path_to_repo
end end
def commit_tree(tree_id, msg, parent: EMPTY_TREE_ID) def commit_tree(tree_id, msg, parent: Gitlab::Git::EMPTY_TREE_ID)
commit_tree_args = ['commit-tree', tree_id, '-m', msg] commit_tree_args = ['commit-tree', tree_id, '-m', msg]
commit_tree_args += ['-p', parent] unless parent == EMPTY_TREE_ID commit_tree_args += ['-p', parent] unless parent == Gitlab::Git::EMPTY_TREE_ID
commit_id = execute(commit_tree_args) commit_id = execute(commit_tree_args)
commit_id[0] commit_id[0]
...@@ -21,7 +17,7 @@ class BareRepoOperations ...@@ -21,7 +17,7 @@ class BareRepoOperations
# Based on https://stackoverflow.com/a/25556917/1856239 # Based on https://stackoverflow.com/a/25556917/1856239
def commit_file(file, dst_path, branch = 'master') def commit_file(file, dst_path, branch = 'master')
head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || EMPTY_TREE_ID head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || Gitlab::Git::EMPTY_TREE_ID
execute(['read-tree', '--empty']) execute(['read-tree', '--empty'])
execute(['read-tree', head_id]) execute(['read-tree', head_id])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment