Commit b61b3bfe authored by Matthias Käppler's avatar Matthias Käppler

Merge branch 'immediately-unlink-large-temporary-files' into 'master'

Immediately unlink potentially-large temporary files

See merge request gitlab-org/gitlab!57239
parents fb551ba2 704d44d5
......@@ -29,6 +29,7 @@ module Gitlab
require_dependency Rails.root.join('lib/gitlab/middleware/same_site_cookies')
require_dependency Rails.root.join('lib/gitlab/middleware/handle_ip_spoof_attack_error')
require_dependency Rails.root.join('lib/gitlab/middleware/handle_malformed_strings')
require_dependency Rails.root.join('lib/gitlab/middleware/rack_multipart_tempfile_factory')
require_dependency Rails.root.join('lib/gitlab/runtime')
# Settings in config/environments/* take precedence over those specified here.
......@@ -271,6 +272,8 @@ module Gitlab
config.middleware.insert_after ActionDispatch::ActionableExceptions, ::Gitlab::Middleware::HandleMalformedStrings
config.middleware.insert_after Rack::Sendfile, ::Gitlab::Middleware::RackMultipartTempfileFactory
# Allow access to GitLab API from other domains
config.middleware.insert_before Warden::Manager, Rack::Cors do
headers_to_expose = %w[Link X-Total X-Total-Pages X-Per-Page X-Page X-Next-Page X-Prev-Page X-Gitlab-Blob-Id X-Gitlab-Commit-Id X-Gitlab-Content-Sha256 X-Gitlab-Encoding X-Gitlab-File-Name X-Gitlab-File-Path X-Gitlab-Last-Commit-Id X-Gitlab-Ref X-Gitlab-Size]
......
# frozen_string_literal: true
if Gitlab::Runtime.puma?
raise "Remove this monkey patch: #{__FILE__}" unless Puma::Const::VERSION == '5.1.1'
if ENV['GITLAB_TEMPFILE_IMMEDIATE_UNLINK'] == '1'
# This is copied from https://github.com/puma/puma/blob/v5.1.1/lib/puma/client.rb,
# with two additions: both times we create a temporary file, we immediately
# call `#unlink`. This means that if the process gets terminated without being
# able to clean up itself, the temporary file will not linger on the file
# system. We will try to get this patch accepted upstream if it works for us
# (we just need to check if the temporary file responds to `#unlink` as that
# won't work on Windows, for instance).
module Puma
class Client
private
def setup_body
@body_read_start = Process.clock_gettime(Process::CLOCK_MONOTONIC, :millisecond)
if @env[HTTP_EXPECT] == CONTINUE
# TODO allow a hook here to check the headers before
# going forward
@io << HTTP_11_100
@io.flush
end
@read_header = false
body = @parser.body
te = @env[TRANSFER_ENCODING2]
if te
if te.include?(",")
te.split(",").each do |part|
if CHUNKED.casecmp(part.strip) == 0 # rubocop:disable Metrics/BlockNesting
return setup_chunked_body(body)
end
end
elsif CHUNKED.casecmp(te) == 0
return setup_chunked_body(body)
end
end
@chunked_body = false
cl = @env[CONTENT_LENGTH]
unless cl
@buffer = body.empty? ? nil : body
@body = EmptyBody
set_ready
return true
end
remain = cl.to_i - body.bytesize
if remain <= 0
@body = StringIO.new(body)
@buffer = nil
set_ready
return true
end
if remain > MAX_BODY
@body = Tempfile.new(Const::PUMA_TMP_BASE)
@body.binmode
@body.unlink # This is the changed part
@tempfile = @body
else
# The body[0,0] trick is to get an empty string in the same
# encoding as body.
@body = StringIO.new body[0,0] # rubocop:disable Layout/SpaceAfterComma
end
@body.write body
@body_remain = remain
return false # rubocop:disable Style/RedundantReturn
end
def setup_chunked_body(body)
@chunked_body = true
@partial_part_left = 0
@prev_chunk = ""
@body = Tempfile.new(Const::PUMA_TMP_BASE)
@body.binmode
@body.unlink # This is the changed part
@tempfile = @body
@chunked_content_length = 0
if decode_chunk(body)
@env[CONTENT_LENGTH] = @chunked_content_length
return true # rubocop:disable Style/RedundantReturn
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Middleware
class RackMultipartTempfileFactory
# Immediately unlink the created temporary file so we don't have to rely
# on Rack::TempfileReaper catching this after the fact.
FACTORY = lambda do |filename, content_type|
Rack::Multipart::Parser::TEMPFILE_FACTORY.call(filename, content_type).tap(&:unlink)
end
def initialize(app)
@app = app
end
def call(env)
if ENV['GITLAB_TEMPFILE_IMMEDIATE_UNLINK'] == '1'
env[Rack::RACK_MULTIPART_TEMPFILE_FACTORY] = FACTORY
end
@app.call(env)
end
end
end
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'rack'
RSpec.describe Gitlab::Middleware::RackMultipartTempfileFactory do
let(:app) do
lambda do |env|
params = Rack::Request.new(env).params
if params['file']
[200, { 'Content-Type' => params['file'][:type] }, [params['file'][:tempfile].read]]
else
[204, {}, []]
end
end
end
let(:file_contents) { '/9j/4AAQSkZJRgABAQAAAQABAAD//gA+Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcg' }
let(:multipart_fixture) do
boundary = 'AaB03x'
data = <<~DATA
--#{boundary}\r
Content-Disposition: form-data; name="file"; filename="dj.jpg"\r
Content-Type: image/jpeg\r
Content-Transfer-Encoding: base64\r
\r
#{file_contents}\r
--#{boundary}--\r
DATA
{
'CONTENT_TYPE' => "multipart/form-data; boundary=#{boundary}",
'CONTENT_LENGTH' => data.bytesize.to_s,
input: StringIO.new(data)
}
end
subject { described_class.new(app) }
context 'for a multipart request' do
let(:env) { Rack::MockRequest.env_for('/', multipart_fixture) }
context 'when the environment variable is enabled' do
before do
stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
end
it 'immediately unlinks the temporary file' do
tempfile = Tempfile.new('foo')
expect(tempfile.path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
expect(tempfile).to receive(:unlink).and_call_original
subject.call(env)
expect(tempfile.path).to be(nil)
end
it 'processes the request as normal' do
expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
end
end
context 'when the environment variable is disabled' do
it 'does not immediately unlink the temporary file' do
tempfile = Tempfile.new('foo')
expect(tempfile.path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
expect(tempfile).not_to receive(:unlink).and_call_original
subject.call(env)
expect(tempfile.path).not_to be(nil)
end
it 'processes the request as normal' do
expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
end
end
end
context 'for a regular request' do
let(:env) { Rack::MockRequest.env_for('/', params: { 'foo' => 'bar' }) }
it 'does nothing' do
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).not_to receive(:call)
expect(subject.call(env)).to eq([204, {}, []])
end
end
end
......@@ -1584,7 +1584,6 @@ RSpec.describe API::Projects do
expect(json_response['alt']).to eq("dk")
expect(json_response['url']).to start_with("/uploads/")
expect(json_response['url']).to end_with("/dk.png")
expect(json_response['full_path']).to start_with("/#{project.namespace.path}/#{project.path}/uploads")
end
......@@ -1596,6 +1595,24 @@ RSpec.describe API::Projects do
post api("/projects/#{project.id}/uploads", user), params: { file: file }
end
it "does not leave the temporary file in place after uploading, even when the tempfile reaper does not run" do
stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
tempfile = Tempfile.new('foo')
path = tempfile.path
allow_any_instance_of(Rack::TempfileReaper).to receive(:call) do |instance, env|
instance.instance_variable_get(:@app).call(env)
end
expect(path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
post api("/projects/#{project.id}/uploads", user), params: { file: fixture_file_upload("spec/fixtures/dk.png", "image/png") }
expect(tempfile.path).to be(nil)
expect(File.exist?(path)).to be(false)
end
shared_examples 'capped upload attachments' do
it "limits the upload to 1 GB" do
expect_next_instance_of(UploadService) do |instance|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment