Commit 8b597734 authored by Douwe Maan's avatar Douwe Maan

Merge branch...

Merge branch 'rd-3429-enabling-maximum-file-size-limit-in-repository-causes-pushes-to-fail' into 'master'

Enabling maximum file size limit in repository causes pushes to fail

Closes #3429

See merge request gitlab-org/gitlab-ee!4989
parents b782bf8b 1eddbfca
...@@ -36,7 +36,6 @@ class PushRule < ActiveRecord::Base ...@@ -36,7 +36,6 @@ class PushRule < ActiveRecord::Base
commit_committer_check || commit_committer_check ||
member_check || member_check ||
file_name_regex.present? || file_name_regex.present? ||
max_file_size > 0 ||
prevent_secrets prevent_secrets
end end
......
---
title: Large pushes were failing when max file size push rule was active
merge_request: 4989
author:
type: fixed
...@@ -20,6 +20,7 @@ module EE ...@@ -20,6 +20,7 @@ module EE
super(skip_commits_check: true) super(skip_commits_check: true)
push_rule_check push_rule_check
file_size_check
# Check of commits should happen as the last step # Check of commits should happen as the last step
# given they're expensive in terms of performance # given they're expensive in terms of performance
commits_check commits_check
...@@ -29,6 +30,30 @@ module EE ...@@ -29,6 +30,30 @@ module EE
private private
def file_size_check
return if push_rule.nil? || push_rule.max_file_size.zero?
max_file_size = push_rule.max_file_size
large_file = changes.find do |c|
size_in_mb = ::Gitlab::Utils.bytes_to_megabytes(c.blob_size)
c.operation != :deleted && size_in_mb > max_file_size
end
if large_file
raise ::Gitlab::GitAccess::UnauthorizedError, %Q{File "#{large_file.new_path}" is larger than the allowed size of #{max_file_size} MB}
end
end
def changes
strong_memoize(:changes) do
return [] unless newrev
project.repository.raw_changes_between(oldrev, newrev)
end
end
def push_rule def push_rule
project.push_rule project.push_rule
end end
...@@ -152,9 +177,7 @@ module EE ...@@ -152,9 +177,7 @@ module EE
def push_rule_checks_commit? def push_rule_checks_commit?
return false unless push_rule return false unless push_rule
push_rule.max_file_size > 0 || push_rule.file_name_regex.present? || push_rule.prevent_secrets
push_rule.file_name_regex.present? ||
push_rule.prevent_secrets
end end
override :validations_for_commit override :validations_for_commit
...@@ -168,11 +191,7 @@ module EE ...@@ -168,11 +191,7 @@ module EE
def push_rule_commit_validations(commit) def push_rule_commit_validations(commit)
return [] unless push_rule return [] unless push_rule
[file_name_validation].tap do |validations| [file_name_validation]
if push_rule.max_file_size > 0
validations << file_size_validation(commit, push_rule.max_file_size)
end
end
end end
def validate_path_locks? def validate_path_locks?
...@@ -208,17 +227,6 @@ module EE ...@@ -208,17 +227,6 @@ module EE
end end
end end
end end
def file_size_validation(commit, max_file_size)
lambda do |diff|
return if diff.deleted_file
blob = project.repository.blob_at(commit.id, diff.new_path)
if blob && blob.size && blob.size > max_file_size.megabytes
return "File #{diff.new_path.inspect} is larger than the allowed size of #{max_file_size} MB"
end
end
end
end end
end end
end end
......
...@@ -250,7 +250,7 @@ describe Gitlab::Checks::ChangeAccess do ...@@ -250,7 +250,7 @@ describe Gitlab::Checks::ChangeAccess do
let(:push_rule) { create(:push_rule, max_file_size: 1) } let(:push_rule) { create(:push_rule, max_file_size: 1) }
before do before do
allow_any_instance_of(Blob).to receive(:size).and_return(2.megabytes) allow_any_instance_of(::Gitlab::Git::RawDiffChange).to receive(:blob_size).and_return(2.megabytes)
end end
it_behaves_like 'check ignored when push rule unlicensed' it_behaves_like 'check ignored when push rule unlicensed'
......
require 'spec_helper' require 'spec_helper'
describe PushRule do describe PushRule do
using RSpec::Parameterized::TableSyntax
let(:global_push_rule) { create(:push_rule_sample) } let(:global_push_rule) { create(:push_rule_sample) }
let(:push_rule) { create(:push_rule) } let(:push_rule) { create(:push_rule) }
let(:user) { create(:user) } let(:user) { create(:user) }
...@@ -83,32 +85,32 @@ describe PushRule do ...@@ -83,32 +85,32 @@ describe PushRule do
describe '#commit_validation?' do describe '#commit_validation?' do
let(:settings_with_global_default) { %i(reject_unsigned_commits) } let(:settings_with_global_default) { %i(reject_unsigned_commits) }
settings = { where(:setting, :value, :result) do
commit_message_regex: 'regex', :commit_message_regex | 'regex' | true
branch_name_regex: 'regex', :branch_name_regex | 'regex' | true
author_email_regex: 'regex', :author_email_regex | 'regex' | true
file_name_regex: 'regex', :file_name_regex | 'regex' | true
reject_unsigned_commits: true, :reject_unsigned_commits | true | true
commit_committer_check: true, :commit_committer_check | true | true
member_check: true, :member_check | true | true
prevent_secrets: true, :prevent_secrets | true | true
max_file_size: 1 :max_file_size | 1 | false
} end
settings.each do |setting, value| with_them do
context "when #{setting} is enabled at global level" do context "when rule is enabled at global level" do
before do before do
global_push_rule.update_column(setting, value) global_push_rule.update_column(setting, value)
end end
it "returns true at project level" do it "returns the default value at project level" do
rule = project.push_rule rule = project.push_rule
if settings_with_global_default.include?(setting) if settings_with_global_default.include?(setting)
rule.update_column(setting, nil) rule.update_column(setting, nil)
end end
expect(rule.commit_validation?).to eq(true) expect(rule.commit_validation?).to eq(result)
end end
end end
end end
......
module Gitlab module Gitlab
module Git module Git
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and
# https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
BLANK_SHA = ('0' * 40).freeze BLANK_SHA = ('0' * 40).freeze
TAG_REF_PREFIX = "refs/tags/".freeze TAG_REF_PREFIX = "refs/tags/".freeze
BRANCH_REF_PREFIX = "refs/heads/".freeze BRANCH_REF_PREFIX = "refs/heads/".freeze
......
module Gitlab
module Git
# This class behaves like a struct with fields :blob_id, :blob_size, :operation, :old_path, :new_path
# All those fields are (binary) strings or integers
class RawDiffChange
attr_reader :blob_id, :blob_size, :old_path, :new_path, :operation
def initialize(raw_change)
parse(raw_change)
end
private
# Input data has the following format:
#
# When a file has been modified:
# 7e3e39ebb9b2bf433b4ad17313770fbe4051649c 669 M\tfiles/ruby/popen.rb
#
# When a file has been renamed:
# 85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee
def parse(raw_change)
@blob_id, @blob_size, @raw_operation, raw_paths = raw_change.split(' ', 4)
@operation = extract_operation
@old_path, @new_path = extract_paths(raw_paths)
end
def extract_paths(file_path)
case operation
when :renamed
file_path.split(/\t/)
when :deleted
[file_path, nil]
when :added
[nil, file_path]
else
[file_path, file_path]
end
end
def extract_operation
case @raw_operation&.first(1)
when 'A'
:added
when 'C'
:copied
when 'D'
:deleted
when 'M'
:modified
when 'R'
:renamed
when 'T'
:type_changed
else
:unknown
end
end
end
end
end
...@@ -558,6 +558,24 @@ module Gitlab ...@@ -558,6 +558,24 @@ module Gitlab
count_commits(from: from, to: to, **options) count_commits(from: from, to: to, **options)
end end
# old_rev and new_rev are commit ID's
# the result of this method is an array of Gitlab::Git::RawDiffChange
def raw_changes_between(old_rev, new_rev)
result = []
circuit_breaker.perform do
Open3.pipeline_r(git_diff_cmd(old_rev, new_rev), format_git_cat_file_script, git_cat_file_cmd) do |last_stdout, wait_threads|
last_stdout.each_line { |line| result << ::Gitlab::Git::RawDiffChange.new(line.chomp!) }
if wait_threads.any? { |waiter| !waiter.value&.success? }
raise ::Gitlab::Git::Repository::GitError, "Unabled to obtain changes between #{old_rev} and #{new_rev}"
end
end
end
result
end
# Returns the SHA of the most recent common ancestor of +from+ and +to+ # Returns the SHA of the most recent common ancestor of +from+ and +to+
def merge_base(from, to) def merge_base(from, to)
gitaly_migrate(:merge_base) do |is_enabled| gitaly_migrate(:merge_base) do |is_enabled|
...@@ -2505,6 +2523,35 @@ module Gitlab ...@@ -2505,6 +2523,35 @@ module Gitlab
result.to_s(16) result.to_s(16)
end end
def build_git_cmd(*args)
object_directories = alternate_object_directories.join(File::PATH_SEPARATOR)
env = { 'PWD' => self.path }
env['GIT_ALTERNATE_OBJECT_DIRECTORIES'] = object_directories if object_directories.present?
[
env,
::Gitlab.config.git.bin_path,
*args,
{ chdir: self.path }
]
end
def git_diff_cmd(old_rev, new_rev)
old_rev = old_rev == ::Gitlab::Git::BLANK_SHA ? ::Gitlab::Git::EMPTY_TREE_ID : old_rev
build_git_cmd('diff', old_rev, new_rev, '--raw')
end
def git_cat_file_cmd
format = '%(objectname) %(objectsize) %(rest)'
build_git_cmd('cat-file', "--batch-check=#{format}")
end
def format_git_cat_file_script
File.expand_path('../support/format-git-cat-file-input', __FILE__)
end
end end
end end
end end
#!/usr/bin/env ruby
# This script formats the output of the `git diff <old_rev> <new_rev> --raw`
# command so it can be processed by `git cat-file`
# We need to convert this:
# ":100644 100644 5f53439... 85bc2f9... R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
# To:
# "85bc2f9 R\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee"
ARGF.each do |line|
_, _, old_blob_id, new_blob_id, rest = line.split(/\s/, 5)
old_blob_id.gsub!(/[^\h]/, '')
new_blob_id.gsub!(/[^\h]/, '')
# We can't pass '0000000...' to `git cat-file` given it will not return info about the deleted file
blob_id = new_blob_id =~ /\A0+\z/ ? old_blob_id : new_blob_id
$stdout.puts "#{blob_id} #{rest}"
end
...@@ -3,10 +3,6 @@ module Gitlab ...@@ -3,10 +3,6 @@ module Gitlab
class CommitService class CommitService
include Gitlab::EncodingHelper include Gitlab::EncodingHelper
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
def initialize(repository) def initialize(repository)
@gitaly_repo = repository.gitaly_repository @gitaly_repo = repository.gitaly_repository
@repository = repository @repository = repository
...@@ -37,7 +33,7 @@ module Gitlab ...@@ -37,7 +33,7 @@ module Gitlab
def diff(from, to, options = {}) def diff(from, to, options = {})
from_id = case from from_id = case from
when NilClass when NilClass
EMPTY_TREE_ID Gitlab::Git::EMPTY_TREE_ID
else else
if from.respond_to?(:oid) if from.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in # This is meant to match a Rugged::Commit. This should be impossible in
...@@ -50,7 +46,7 @@ module Gitlab ...@@ -50,7 +46,7 @@ module Gitlab
to_id = case to to_id = case to
when NilClass when NilClass
EMPTY_TREE_ID Gitlab::Git::EMPTY_TREE_ID
else else
if to.respond_to?(:oid) if to.respond_to?(:oid)
# This is meant to match a Rugged::Commit. This should be impossible in # This is meant to match a Rugged::Commit. This should be impossible in
...@@ -352,7 +348,7 @@ module Gitlab ...@@ -352,7 +348,7 @@ module Gitlab
end end
def diff_from_parent_request_params(commit, options = {}) def diff_from_parent_request_params(commit, options = {})
parent_id = commit.parent_ids.first || EMPTY_TREE_ID parent_id = commit.parent_ids.first || Gitlab::Git::EMPTY_TREE_ID
diff_between_commits_request_params(parent_id, commit.id, options) diff_between_commits_request_params(parent_id, commit.id, options)
end end
......
...@@ -80,6 +80,10 @@ module Gitlab ...@@ -80,6 +80,10 @@ module Gitlab
size size
end end
def bytes_to_megabytes(bytes)
bytes.to_f / Numeric::MEGABYTE
end
# Accepts either an Array or a String and returns an array # Accepts either an Array or a String and returns an array
def ensure_array_from_string(string_or_array) def ensure_array_from_string(string_or_array)
return string_or_array if string_or_array.is_a?(Array) return string_or_array if string_or_array.is_a?(Array)
......
require 'spec_helper'
describe Gitlab::Git::RawDiffChange do
let(:raw_change) { }
let(:change) { described_class.new(raw_change) }
context 'bad input' do
let(:raw_change) { 'foo' }
it 'does not set most of the attrs' do
expect(change.blob_id).to eq('foo')
expect(change.operation).to eq(:unknown)
expect(change.old_path).to be_blank
expect(change.new_path).to be_blank
expect(change.blob_size).to be_blank
end
end
context 'adding a file' do
let(:raw_change) { '93e123ac8a3e6a0b600953d7598af629dec7b735 59 A bar/branch-test.txt' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:added)
expect(change.old_path).to be_blank
expect(change.new_path).to eq('bar/branch-test.txt')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'renaming a file' do
let(:raw_change) { "85bc2f9753afd5f4fc5d7c75f74f8d526f26b4f3 107 R060\tfiles/js/commit.js.coffee\tfiles/js/commit.coffee" }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:renamed)
expect(change.old_path).to eq('files/js/commit.js.coffee')
expect(change.new_path).to eq('files/js/commit.coffee')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'modifying a file' do
let(:raw_change) { 'c60514b6d3d6bf4bec1030f70026e34dfbd69ad5 824 M README.md' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:modified)
expect(change.old_path).to eq('README.md')
expect(change.new_path).to eq('README.md')
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
context 'deleting a file' do
let(:raw_change) { '60d7a906c2fd9e4509aeb1187b98d0ea7ce827c9 15364 D files/.DS_Store' }
it 'initialize the proper attrs' do
expect(change.operation).to eq(:deleted)
expect(change.old_path).to eq('files/.DS_Store')
expect(change.new_path).to be_nil
expect(change.blob_id).to be_present
expect(change.blob_size).to be_present
end
end
end
...@@ -1043,6 +1043,44 @@ describe Gitlab::Git::Repository, seed_helper: true do ...@@ -1043,6 +1043,44 @@ describe Gitlab::Git::Repository, seed_helper: true do
it { is_expected.to eq(17) } it { is_expected.to eq(17) }
end end
describe '#raw_changes_between' do
let(:old_rev) { }
let(:new_rev) { }
let(:changes) { repository.raw_changes_between(old_rev, new_rev) }
context 'initial commit' do
let(:old_rev) { Gitlab::Git::BLANK_SHA }
let(:new_rev) { '1a0b36b3cdad1d2ee32457c102a8c0b7056fa863' }
it 'returns the changes' do
expect(changes).to be_present
expect(changes.size).to eq(3)
end
end
context 'with an invalid rev' do
let(:old_rev) { 'foo' }
let(:new_rev) { 'bar' }
it 'returns an error' do
expect { changes }.to raise_error(Gitlab::Git::Repository::GitError)
end
end
context 'with valid revs' do
let(:old_rev) { 'fa1b1e6c004a68b7d8763b86455da9e6b23e36d6' }
let(:new_rev) { '4b4918a572fa86f9771e5ba40fbd48e1eb03e2c6' }
it 'returns the changes' do
expect(changes.size).to eq(9)
expect(changes.first.operation).to eq(:modified)
expect(changes.first.new_path).to eq('.gitmodules')
expect(changes.last.operation).to eq(:added)
expect(changes.last.new_path).to eq('files/lfs/picture-invalid.png')
end
end
end
describe '#merge_base' do describe '#merge_base' do
shared_examples '#merge_base' do shared_examples '#merge_base' do
where(:from, :to, :result) do where(:from, :to, :result) do
......
...@@ -1146,11 +1146,7 @@ describe Gitlab::GitAccess do ...@@ -1146,11 +1146,7 @@ describe Gitlab::GitAccess do
describe "max file size check" do describe "max file size check" do
let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' } let(:start_sha) { 'cfe32cf61b73a0d5e9f13e774abde7ff789b1660' }
let(:end_sha) { '913c66a37b4a45b9769037c55c2d238bd0942d2e' } let(:end_sha) { 'c84ff944ff4529a70788a5e9003c2b7feae29047' }
before do
allow_any_instance_of(Gitlab::Git::Blob).to receive(:size).and_return(1.5.megabytes.to_i)
end
it "returns false when size is too large" do it "returns false when size is too large" do
project.create_push_rule(max_file_size: 1) project.create_push_rule(max_file_size: 1)
......
...@@ -33,7 +33,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -33,7 +33,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863').raw initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863').raw
request = Gitaly::CommitDiffRequest.new( request = Gitaly::CommitDiffRequest.new(
repository: repository_message, repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904', left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id, right_commit_id: initial_commit.id,
collapse_diffs: true, collapse_diffs: true,
enforce_limits: true, enforce_limits: true,
...@@ -77,7 +77,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -77,7 +77,7 @@ describe Gitlab::GitalyClient::CommitService do
initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863') initial_commit = project.commit('1a0b36b3cdad1d2ee32457c102a8c0b7056fa863')
request = Gitaly::CommitDeltaRequest.new( request = Gitaly::CommitDeltaRequest.new(
repository: repository_message, repository: repository_message,
left_commit_id: '4b825dc642cb6eb9a060e54bf8d69288fbee4904', left_commit_id: Gitlab::Git::EMPTY_TREE_ID,
right_commit_id: initial_commit.id right_commit_id: initial_commit.id
) )
...@@ -90,7 +90,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -90,7 +90,7 @@ describe Gitlab::GitalyClient::CommitService do
describe '#between' do describe '#between' do
let(:from) { 'master' } let(:from) { 'master' }
let(:to) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' } let(:to) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do it 'sends an RPC request' do
request = Gitaly::CommitsBetweenRequest.new( request = Gitaly::CommitsBetweenRequest.new(
...@@ -155,7 +155,7 @@ describe Gitlab::GitalyClient::CommitService do ...@@ -155,7 +155,7 @@ describe Gitlab::GitalyClient::CommitService do
end end
describe '#find_commit' do describe '#find_commit' do
let(:revision) { '4b825dc642cb6eb9a060e54bf8d69288fbee4904' } let(:revision) { Gitlab::Git::EMPTY_TREE_ID }
it 'sends an RPC request' do it 'sends an RPC request' do
request = Gitaly::FindCommitRequest.new( request = Gitaly::FindCommitRequest.new(
repository: repository_message, revision: revision repository: repository_message, revision: revision
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::Utils do describe Gitlab::Utils do
delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, to: :described_class delegate :to_boolean, :boolean_to_yes_no, :slugify, :random_string, :which, :ensure_array_from_string, :bytes_to_megabytes, to: :described_class
describe '.slugify' do describe '.slugify' do
{ {
...@@ -97,4 +97,12 @@ describe Gitlab::Utils do ...@@ -97,4 +97,12 @@ describe Gitlab::Utils do
expect(ensure_array_from_string(str)).to eq(%w[seven eight 9 10]) expect(ensure_array_from_string(str)).to eq(%w[seven eight 9 10])
end end
end end
describe '.bytes_to_megabytes' do
it 'converts bytes to megabytes' do
bytes = 1.megabyte
expect(bytes_to_megabytes(bytes)).to eq(1)
end
end
end end
require 'zlib' require 'zlib'
class BareRepoOperations class BareRepoOperations
# The ID of empty tree.
# See http://stackoverflow.com/a/40884093/1856239 and https://github.com/git/git/blob/3ad8b5bf26362ac67c9020bf8c30eee54a84f56d/cache.h#L1011-L1012
EMPTY_TREE_ID = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'.freeze
include Gitlab::Popen include Gitlab::Popen
def initialize(path_to_repo) def initialize(path_to_repo)
@path_to_repo = path_to_repo @path_to_repo = path_to_repo
end end
def commit_tree(tree_id, msg, parent: EMPTY_TREE_ID) def commit_tree(tree_id, msg, parent: Gitlab::Git::EMPTY_TREE_ID)
commit_tree_args = ['commit-tree', tree_id, '-m', msg] commit_tree_args = ['commit-tree', tree_id, '-m', msg]
commit_tree_args += ['-p', parent] unless parent == EMPTY_TREE_ID commit_tree_args += ['-p', parent] unless parent == Gitlab::Git::EMPTY_TREE_ID
commit_id = execute(commit_tree_args) commit_id = execute(commit_tree_args)
commit_id[0] commit_id[0]
...@@ -21,7 +17,7 @@ class BareRepoOperations ...@@ -21,7 +17,7 @@ class BareRepoOperations
# Based on https://stackoverflow.com/a/25556917/1856239 # Based on https://stackoverflow.com/a/25556917/1856239
def commit_file(file, dst_path, branch = 'master') def commit_file(file, dst_path, branch = 'master')
head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || EMPTY_TREE_ID head_id = execute(['show', '--format=format:%H', '--no-patch', branch], allow_failure: true)[0] || Gitlab::Git::EMPTY_TREE_ID
execute(['read-tree', '--empty']) execute(['read-tree', '--empty'])
execute(['read-tree', head_id]) execute(['read-tree', head_id])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment