Commit 1eddbfca authored by Rubén Dávila's avatar Rubén Dávila

Fix timeout when MaxFileSize push rule was active on large repo

Instead of inspecting each commit delta, we rely on the usage of `git diff`
and `git cat-file` commands.
parent 326e39bb
......@@ -36,7 +36,6 @@ class PushRule < ActiveRecord::Base
commit_committer_check ||
member_check ||
file_name_regex.present? ||
max_file_size > 0 ||
prevent_secrets
end
......
---
title: Large pushes were failing when max file size push rule was active
merge_request: 4989
author:
type: fixed
......@@ -20,6 +20,7 @@ module EE
super(skip_commits_check: true)
push_rule_check
file_size_check
# Check of commits should happen as the last step
# given they're expensive in terms of performance
commits_check
......@@ -29,6 +30,30 @@ module EE
private
def file_size_check
return if push_rule.nil? || push_rule.max_file_size.zero?
max_file_size = push_rule.max_file_size
large_file = changes.find do |c|
size_in_mb = ::Gitlab::Utils.bytes_to_megabytes(c.blob_size)
c.operation != :deleted && size_in_mb > max_file_size
end
if large_file
raise ::Gitlab::GitAccess::UnauthorizedError, %Q{File "#{large_file.new_path}" is larger than the allowed size of #{max_file_size} MB}
end
end
def changes
strong_memoize(:changes) do
return [] unless newrev
project.repository.raw_changes_between(oldrev, newrev)
end
end
def push_rule
project.push_rule
end
......@@ -152,9 +177,7 @@ module EE
def push_rule_checks_commit?
return false unless push_rule
push_rule.max_file_size > 0 ||
push_rule.file_name_regex.present? ||
push_rule.prevent_secrets
push_rule.file_name_regex.present? || push_rule.prevent_secrets
end
override :validations_for_commit
......@@ -168,11 +191,7 @@ module EE
def push_rule_commit_validations(commit)
return [] unless push_rule
[file_name_validation].tap do |validations|
if push_rule.max_file_size > 0
validations << file_size_validation(commit, push_rule.max_file_size)
end
end
[file_name_validation]
end
def validate_path_locks?
......@@ -208,17 +227,6 @@ module EE
end
end
end
def file_size_validation(commit, max_file_size)
lambda do |diff|
return if diff.deleted_file
blob = project.repository.blob_at(commit.id, diff.new_path)
if blob && blob.size && blob.size > max_file_size.megabytes
return "File #{diff.new_path.inspect} is larger than the allowed size of #{max_file_size} MB"
end
end
end
end
end
end
......
......@@ -250,7 +250,7 @@ describe Gitlab::Checks::ChangeAccess do
let(:push_rule) { create(:push_rule, max_file_size: 1) }
before do
allow_any_instance_of(Blob).to receive(:size).and_return(2.megabytes)
allow_any_instance_of(::Gitlab::Git::RawDiffChange).to receive(:blob_size).and_return(2.megabytes)
end
it_behaves_like 'check ignored when push rule unlicensed'
......
require 'spec_helper'
describe PushRule do
using RSpec::Parameterized::TableSyntax
let(:global_push_rule) { create(:push_rule_sample) }
let(:push_rule) { create(:push_rule) }
let(:user) { create(:user) }
......@@ -83,32 +85,32 @@ describe PushRule do
describe '#commit_validation?' do
let(:settings_with_global_default) { %i(reject_unsigned_commits) }
settings = {
commit_message_regex: 'regex',
branch_name_regex: 'regex',
author_email_regex: 'regex',
file_name_regex: 'regex',
reject_unsigned_commits: true,
commit_committer_check: true,
member_check: true,
prevent_secrets: true,
max_file_size: 1
}
settings.each do |setting, value|
context "when #{setting} is enabled at global level" do
where(:setting, :value, :result) do
:commit_message_regex | 'regex' | true
:branch_name_regex | 'regex' | true
:author_email_regex | 'regex' | true
:file_name_regex | 'regex' | true
:reject_unsigned_commits | true | true
:commit_committer_check | true | true
:member_check | true | true
:prevent_secrets | true | true
:max_file_size | 1 | false
end
with_them do
context "when rule is enabled at global level" do
before do
global_push_rule.update_column(setting, value)
end
it "returns true at project level" do
it "returns the default value at project level" do
rule = project.push_rule
if settings_with_global_default.include?(setting)
rule.update_column(setting, nil)
end
expect(rule.commit_validation?).to eq(true)
expect(rule.commit_validation?).to eq(result)
end
end
end
......
module Gitlab
module Git
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and
# https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
BLANK_SHA = ('0' * 40).freeze
TAG_REF_PREFIX = "refs/tags/".freeze
BRANCH_REF_PREFIX = "refs/heads/".freeze
......
module Gitlab
module Git
# This class behaves like a struct with fields :blob_id, :blob_size, :operation, :old_path, :new_path
# All those fields are (binary) strings or integers
class RawDiffChange
attr_reader :blob_id, :blob_size, :old_path, :new_path, :operation
def initialize(raw_change)
parse(raw_change)
end
private
# Input data has the following format:
#
# When a file has been modified:
# 7e3e39ebb9b2bf433b4ad17313770fbe4051649c 669 M\tfiles/ruby/popen.rb
#
# When a file has been renamed:
# 85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee
def parse(raw_change)
@blob_id, @blob_size, @raw_operation, raw_paths = raw_change.split(' ', 4)
@operation = extract_operation
@old_path, @new_path = extract_paths(raw_paths)
end
def extract_paths(file_path)
case operation
when :renamed
file_path.split(/\t/)
when :deleted
[file_path, nil]
when :added
[nil, file_path]
else
[file_path, file_path]
end
end
def extract_operation
case @raw_operation&.first(1)
when 'A'
:added
when 'C'
:copied
when 'D'
:deleted
when 'M'
:modified
when 'R'
:renamed
when 'T'
:type_changed
else
:unknown
end
end
end
end
end
......@@ -559,6 +559,24 @@ module Gitlab
count_commits(from: from, to: to, **options)
end
# old_rev and new_rev are commit ID's
# the result of this method is an array of Gitlab::Git::RawDiffChange
def raw_changes_between(old_rev, new_rev)
result = []
circuit_breaker.perform do
Open3.pipeline_r(git_diff_cmd(old_rev, new_rev), format_git_cat_file_script, git_cat_file_cmd) do |last_stdout, wait_threads|
last_stdout.each_line { |line| result << ::Gitlab::Git::RawDiffChange.new(line.chomp!) }
if wait_threads.any? { |waiter| !waiter.value&.success? }
raise ::Gitlab::Git::Repository::GitError, "Unabled to obtain changes between #{old_rev} and #{new_rev}"
end
end
end
result
end
# Returns the SHA of the most recent common ancestor of +from+ and +to+
def merge_base(from, to)
gitaly_migrate(:merge_base) do |is_enabled|
......@@ -2482,6 +2500,35 @@ module Gitlab
result.to_s(16)
end
def build_git_cmd(*args)
object_directories = alternate_object_directories.join(File::PATH_SEPARATOR)
env = { 'PWD' => self.path }
env['GIT_ALTERNATE_OBJECT_DIRECTORIES'] = object_directories if object_directories.present?
[
env,
::Gitlab.config.git.bin_path,
*args,
{ chdir: self.path }
]
end
def git_diff_cmd(old_rev, new_rev)
old_rev = old_rev == ::Gitlab::Git::BLANK_SHA ? ::Gitlab::Git::EMPTY_TREE_ID : old_rev
build_git_cmd('diff', old_rev, new_rev, '--raw')
end
def git_cat_file_cmd
format = '%(objectname) %(objectsize) %(rest)'
build_git_cmd('cat-file', "--batch-check=#{format}")
end
def format_git_cat_file_script
File.expand_path('../support/format-git-cat-file-input', __FILE__)
end
end
end
end
#!/usr/bin/env ruby
# This script formats the output of the `git diff <old_rev> <new_rev> --raw`
# command so it can be processed by `git cat-file`
# We need to convert this:
# ":100644 100644 5f53439... 85bc2f9... R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
# To:
# "85bc2f9 R\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
ARGF.each do |line|
_, _, old_blob_id, new_blob_id, rest = line.split(/\s/, 5)
old_blob_id.gsub!(/[^\h]/, '')
new_blob_id.gsub!(/[^\h]/, '')
# We can't pass '0000000...' to `git cat-file` given it will not return info about the deleted file
blob_id = new_blob_id =~ /\A0+\z/ ? old_blob_id : new_blob_id
$stdout.puts "#{blob_id} #{rest}"
end
......@@ -3,10 +3,6 @@ module Gitlab
class CommitService
include Gitlab::EncodingHelper
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
def initialize(repository)
@gitaly_repo = repository.gitaly_repository
@repository = repository
......@@ -37,7 +33,7 @@ module Gitlab
def diff(from, to, options = {})
from_id = case from
when NilClass
EMPTY_TREE_ID
Gitlab::Git::EMPTY_TREE_ID
else
if from.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in
......@@ -50,7 +46,7 @@ module Gitlab
to_id = case to
when NilClass
EMPTY_TREE_ID
Gitlab::Git::EMPTY_TREE_ID
else
if to.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in
......@@ -352,7 +348,7 @@ module Gitlab
end
def diff_from_parent_request_params(commit, options = {})
parent_id = commit.parent_ids.first || EMPTY_TREE_ID
parent_id = commit.parent_ids.first || Gitlab::Git::EMPTY_TREE_ID
diff_between_commits_request_params(parent_id, commit.id, options)
end
......
......@@ -80,6 +80,10 @@ module Gitlab
size
end
def bytes_to_megabytes(bytes)
bytes.to_f / Numeric::MEGABYTE
end
# Accepts either an Array or a String and returns an array
def ensure_array_from_string(string_or_array)
return string_or_array if string_or_array.is_a?(Array)
......
require 'spec_helper'
describe Gitlab::Git::RawDiffChange do
let(:raw_change) { }
let(:change) { described_class.new(raw_change) }
context 'bad input' do
let(:raw_change) { 'foo' }
it 'does not set most of the attrs' do
expect(change.blob_id).to eq('foo')
expect(change.operation).to eq(:unknown)
expect(change.old_path).to be_blank
expect(change.new_path).to be_blank
expect(change.blob_size).to be_blank
end
end
context 'adding a file' do
let(:raw_change) { '93e123ac8a3e6a0b600953d7598af629dec7b735 59 A bar/branch-test.txt' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:added)
expect(change.old_path).to be_blank
expect(change.new_path).to eq('bar/branch-test.txt')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'renaming a file' do
let(:raw_change) { "85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee" }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:renamed)
expect(change.old_path).to eq('files/js/commit.js.coffee')
expect(change.new_path).to eq('files/js/commit.coffee')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'modifying a file' do
let(:raw_change) { 'c60514b6d3d6bf4bec1030f70026e34dfbd69ad5 824 M README.md' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:modified)
expect(change.old_path).to eq('README.md')
expect(change.new_path).to eq('README.md')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'deleting a file' do
let(:raw_change) { '60d7a906c2fd9e4509aeb1187b98d0ea7ce827c9 15364 D files/.DS_Store' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:deleted)
expect(change.old_path).to eq('files/.DS_Store')
expect(change.new_path).to be_nil
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
end
......@@ -1043,6 +1043,44 @@ describe Gitlab::Git::Repository, seed_helper: true do
it { is_expected.to eq(17) }
end
describe '#raw_changes_between' do
let(:old_rev) { }
let(:new_rev) { }
let(:changes) { repository.raw_changes_between(old_rev, new_rev) }
context 'initial commit' do
let(:old_rev) { Gitlab::Git::BLANK_SHA }
let(:new_rev) { '1a0b36b3cdad1d2ee32457c102a8c0b7056fa863' }
it 'returns the changes' do
expect(changes).to be_present
expect(changes.size).to eq(3)
end
end
context 'with an invalid rev' do
let(:old_rev) { 'foo' }
let(:new_rev) { 'bar' }
it 'returns an error' do
expect { changes }.to raise_error(Gitlab::Git::Repository::GitError)
end
end
context 'with valid revs' do
let(:old_rev) { 'fa1b1e6c004a68b7d8763b86455da9e6b23e36d6' }
let(:new_rev) { '4b4918a572fa86f9771e5ba40fbd48e1eb03e2c6' }
it 'returns the changes' do
expect(changes.size).to eq(9)
expect(changes.first.operation).to eq(:modified)
expect(changes.first.new_path).to eq('.gitmodules')
expect(changes.last.operation).to eq(:added)
expect(changes.last.new_path).to eq('files/lfs/picture-invalid.png')
end
end
end
describe '#merge_base' do
shared_examples '#merge_base' do
where(:from, :to, :result) do
......
......@@ -1084,11 +1084,7 @@ describe Gitlab::GitAccess do
describe "max file size check" do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:end_sha) { '913c66a37b4a45b9769037c55c2d238bd0942d2e' }
before do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(1.5.megabytes.to_i)
end
let(:end_sha) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
it "returns false when size is too large" do
project.create_push_rule(max_file_size: 1)
......
......@@ -33,7 +33,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863').raw
request = Gitaly::CommitDiffRequest.new(
repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904',
left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id,
collapse_diffs: true,
enforce_limits: true,
......@@ -77,7 +77,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863')
request = Gitaly::CommitDeltaRequest.new(
repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904',
left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id
)
......@@ -90,7 +90,7 @@ describe Gitlab::GitalyClient::CommitService do
describe '#between' do
let(:from) { 'master' }
let(:to) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' }
let(:to) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do
request = Gitaly::CommitsBetweenRequest.new(
......@@ -155,7 +155,7 @@ describe Gitlab::GitalyClient::CommitService do
end
describe '#find_commit' do
let(:revision) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' }
let(:revision) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do
request = Gitaly::FindCommitRequest.new(
repository: repository_message, revision: revision
......
require 'spec_helper'
describe Gitlab::Utils do
delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, to: :described_class
delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, :bytes_to_megabytes, to: :described_class
describe '.slugify' do
{
......@@ -97,4 +97,12 @@ describe Gitlab::Utils do
expect(ensure_array_from_string(str)).to eq(%w[seven eight 9 10])
end
end
describe '.bytes_to_megabytes' do
it 'converts bytes to megabytes' do
bytes = 1.megabyte
expect(bytes_to_megabytes(bytes)).to eq(1)
end
end
end
require 'zlib'
class BareRepoOperations
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
include Gitlab::Popen
def initialize(path_to_repo)
@path_to_repo = path_to_repo
end
def commit_tree(tree_id, msg, parent: EMPTY_TREE_ID)
def commit_tree(tree_id, msg, parent: Gitlab::Git::EMPTY_TREE_ID)
commit_tree_args = ['commit-tree', tree_id, '-m', msg]
commit_tree_args += ['-p', parent] unless parent == EMPTY_TREE_ID
commit_tree_args += ['-p', parent] unless parent == Gitlab::Git::EMPTY_TREE_ID
commit_id = execute(commit_tree_args)
commit_id[0]
......@@ -21,7 +17,7 @@ class BareRepoOperations
# Based on https://stackoverflow.com/a/25556917/1856239
def commit_file(file, dst_path, branch = 'master')
head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || EMPTY_TREE_ID
head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || Gitlab::Git::EMPTY_TREE_ID
execute(['read-tree', '--empty'])
execute(['read-tree', head_id])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment