Commit 9da1771d authored by Sean McGivern's avatar Sean McGivern Committed by Matthias Käppler

Immediately unlink Rack::Multipart temporary files

Rack writes files from multipart/form-data requests to disk in a
temporary file. Rack includes a middleware to clean these up -
Rack::TempfileReaper - but that won't withstand a process being sent
SIGKILL. To handle that case, we can immediately unlink the created
temporary file, which means it will be removed once we're done with it
or the current process goes away.

For development mode and test mode, we have to ensure that this new
middleware is before Gitlab::Middleware::Static, otherwise we might not
get the chance to set our own middleware.

With direct upload configured, GitLab mostly doesn't accept
multipart/form-data requests in a way where they reach Rack directly -
they typically go via Workhorse which accelerates them - but there are
cases where it can happen, and direct upload is still only an option.

To test this manually, we can set `$GITLAB_API_TOKEN_LOCAL` to a
personal access token for the API in the local environment,
`$PATH_TO_FILE` to be a path to a (preferably large) file to be
uploaded, and break the actual saving of uploads (in the default case
with GDK, stop Minio):

    curl -H "Private-Token: $GITLAB_API_TOKEN_LOCAL" \
      -F "file=@$PATH_TO_FILE" \
      http://localhost:3000/api/v4/projects/1/uploads

Once the upload is finished and the request fails, we'll see the file we
uploaded in `$TMPDIR`:

    $ ls -l $TMPDIR/RackMultipart* | awk '{ print $5, $8 }'
    952107008 17:40

With this change, that won't happen: we'll see the file created and
immediately unlinked, so no matter what happens, it won't stick around
on disk. (This specific test case is handled by Rack::TempfileReaper in
later versions of Rack, but it still depends on manual cleanup.)
parent d2be5844
......@@ -29,6 +29,7 @@ module Gitlab
require_dependency Rails.root.join('lib/gitlab/middleware/same_site_cookies')
require_dependency Rails.root.join('lib/gitlab/middleware/handle_ip_spoof_attack_error')
require_dependency Rails.root.join('lib/gitlab/middleware/handle_malformed_strings')
require_dependency Rails.root.join('lib/gitlab/middleware/rack_multipart_tempfile_factory')
require_dependency Rails.root.join('lib/gitlab/runtime')
# Settings in config/environments/* take precedence over those specified here.
......@@ -271,6 +272,8 @@ module Gitlab
config.middleware.insert_after ActionDispatch::ActionableExceptions, ::Gitlab::Middleware::HandleMalformedStrings
config.middleware.insert_after Rack::Sendfile, ::Gitlab::Middleware::RackMultipartTempfileFactory
# Allow access to GitLab API from other domains
config.middleware.insert_before Warden::Manager, Rack::Cors do
headers_to_expose = %w[Link X-Total X-Total-Pages X-Per-Page X-Page X-Next-Page X-Prev-Page X-Gitlab-Blob-Id X-Gitlab-Commit-Id X-Gitlab-Content-Sha256 X-Gitlab-Encoding X-Gitlab-File-Name X-Gitlab-File-Path X-Gitlab-Last-Commit-Id X-Gitlab-Ref X-Gitlab-Size]
......
# frozen_string_literal: true
module Gitlab
module Middleware
class RackMultipartTempfileFactory
# Immediately unlink the created temporary file so we don't have to rely
# on Rack::TempfileReaper catching this after the fact.
FACTORY = lambda do |filename, content_type|
Rack::Multipart::Parser::TEMPFILE_FACTORY.call(filename, content_type).tap(&:unlink)
end
def initialize(app)
@app = app
end
def call(env)
if ENV['GITLAB_TEMPFILE_IMMEDIATE_UNLINK'] == '1'
env[Rack::RACK_MULTIPART_TEMPFILE_FACTORY] = FACTORY
end
@app.call(env)
end
end
end
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'rack'
RSpec.describe Gitlab::Middleware::RackMultipartTempfileFactory do
let(:app) do
lambda do |env|
params = Rack::Request.new(env).params
if params['file']
[200, { 'Content-Type' => params['file'][:type] }, [params['file'][:tempfile].read]]
else
[204, {}, []]
end
end
end
let(:file_contents) { '/9j/4AAQSkZJRgABAQAAAQABAAD//gA+Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcg' }
let(:multipart_fixture) do
boundary = 'AaB03x'
data = <<~DATA
--#{boundary}\r
Content-Disposition: form-data; name="file"; filename="dj.jpg"\r
Content-Type: image/jpeg\r
Content-Transfer-Encoding: base64\r
\r
#{file_contents}\r
--#{boundary}--\r
DATA
{
'CONTENT_TYPE' => "multipart/form-data; boundary=#{boundary}",
'CONTENT_LENGTH' => data.bytesize.to_s,
input: StringIO.new(data)
}
end
subject { described_class.new(app) }
context 'for a multipart request' do
let(:env) { Rack::MockRequest.env_for('/', multipart_fixture) }
context 'when the environment variable is enabled' do
before do
stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
end
it 'immediately unlinks the temporary file' do
tempfile = Tempfile.new('foo')
expect(tempfile.path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
expect(tempfile).to receive(:unlink).and_call_original
subject.call(env)
expect(tempfile.path).to be(nil)
end
it 'processes the request as normal' do
expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
end
end
context 'when the environment variable is disabled' do
it 'does not immediately unlink the temporary file' do
tempfile = Tempfile.new('foo')
expect(tempfile.path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
expect(tempfile).not_to receive(:unlink).and_call_original
subject.call(env)
expect(tempfile.path).not_to be(nil)
end
it 'processes the request as normal' do
expect(subject.call(env)).to eq([200, { 'Content-Type' => 'image/jpeg' }, [file_contents]])
end
end
end
context 'for a regular request' do
let(:env) { Rack::MockRequest.env_for('/', params: { 'foo' => 'bar' }) }
it 'does nothing' do
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).not_to receive(:call)
expect(subject.call(env)).to eq([204, {}, []])
end
end
end
......@@ -1584,7 +1584,6 @@ RSpec.describe API::Projects do
expect(json_response['alt']).to eq("dk")
expect(json_response['url']).to start_with("/uploads/")
expect(json_response['url']).to end_with("/dk.png")
expect(json_response['full_path']).to start_with("/#{project.namespace.path}/#{project.path}/uploads")
end
......@@ -1596,6 +1595,24 @@ RSpec.describe API::Projects do
post api("/projects/#{project.id}/uploads", user), params: { file: file }
end
it "does not leave the temporary file in place after uploading, even when the tempfile reaper does not run" do
stub_env('GITLAB_TEMPFILE_IMMEDIATE_UNLINK', '1')
tempfile = Tempfile.new('foo')
path = tempfile.path
allow_any_instance_of(Rack::TempfileReaper).to receive(:call) do |instance, env|
instance.instance_variable_get(:@app).call(env)
end
expect(path).not_to be(nil)
expect(Rack::Multipart::Parser::TEMPFILE_FACTORY).to receive(:call).and_return(tempfile)
post api("/projects/#{project.id}/uploads", user), params: { file: fixture_file_upload("spec/fixtures/dk.png", "image/png") }
expect(tempfile.path).to be(nil)
expect(File.exist?(path)).to be(false)
end
shared_examples 'capped upload attachments' do
it "limits the upload to 1 GB" do
expect_next_instance_of(UploadService) do |instance|
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment