Commit cb3e3835 authored by Yorick Peterse's avatar Yorick Peterse

Merge branch 'extract-pages-with-rubyzip' into 'master'

Extract pages with rubyzip

See merge request gitlab/gitlabhq!2758
parents 300d1028 1a8100cf
...@@ -57,6 +57,7 @@ gem 'u2f', '~> 0.2.1' ...@@ -57,6 +57,7 @@ gem 'u2f', '~> 0.2.1'
# GitLab Pages # GitLab Pages
gem 'validates_hostname', '~> 1.0.6' gem 'validates_hostname', '~> 1.0.6'
gem 'rubyzip', '~> 1.2.2', require: false
# Browser detection # Browser detection
gem 'browser', '~> 2.5' gem 'browser', '~> 2.5'
......
...@@ -1137,6 +1137,7 @@ DEPENDENCIES ...@@ -1137,6 +1137,7 @@ DEPENDENCIES
ruby-prof (~> 0.17.0) ruby-prof (~> 0.17.0)
ruby-progressbar ruby-progressbar
ruby_parser (~> 3.8) ruby_parser (~> 3.8)
rubyzip (~> 1.2.2)
rugged (~> 0.27) rugged (~> 0.27)
sanitize (~> 4.6) sanitize (~> 4.6)
sass (~> 3.5) sass (~> 3.5)
......
...@@ -7,7 +7,11 @@ module Projects ...@@ -7,7 +7,11 @@ module Projects
BLOCK_SIZE = 32.kilobytes BLOCK_SIZE = 32.kilobytes
MAX_SIZE = 1.terabyte MAX_SIZE = 1.terabyte
SITE_PATH = 'public/'.freeze PUBLIC_DIR = 'public'.freeze
# this has to be invalid group name,
# as it shares the namespace with groups
TMP_EXTRACT_PATH = '@pages.tmp'.freeze
attr_reader :build attr_reader :build
...@@ -27,12 +31,11 @@ module Projects ...@@ -27,12 +31,11 @@ module Projects
raise InvalidStateError, 'pages are outdated' unless latest? raise InvalidStateError, 'pages are outdated' unless latest?
# Create temporary directory in which we will extract the artifacts # Create temporary directory in which we will extract the artifacts
FileUtils.mkdir_p(tmp_path) make_secure_tmp_dir(tmp_path) do |archive_path|
Dir.mktmpdir(nil, tmp_path) do |archive_path|
extract_archive!(archive_path) extract_archive!(archive_path)
# Check if we did extract public directory # Check if we did extract public directory
archive_public_path = File.join(archive_path, 'public') archive_public_path = File.join(archive_path, PUBLIC_DIR)
raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path) raise InvalidStateError, 'pages miss the public folder' unless Dir.exist?(archive_public_path)
raise InvalidStateError, 'pages are outdated' unless latest? raise InvalidStateError, 'pages are outdated' unless latest?
...@@ -85,22 +88,18 @@ module Projects ...@@ -85,22 +88,18 @@ module Projects
raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata? raise InvalidStateError, 'missing artifacts metadata' unless build.artifacts_metadata?
# Calculate page size after extract # Calculate page size after extract
public_entry = build.artifacts_metadata_entry(SITE_PATH, recursive: true) public_entry = build.artifacts_metadata_entry(PUBLIC_DIR + '/', recursive: true)
if public_entry.total_size > max_size if public_entry.total_size > max_size
raise InvalidStateError, "artifacts for pages are too large: #{public_entry.total_size}" raise InvalidStateError, "artifacts for pages are too large: #{public_entry.total_size}"
end end
# Requires UnZip at least 6.00 Info-ZIP.
# -qq be (very) quiet
# -n never overwrite existing files
# We add * to end of SITE_PATH, because we want to extract SITE_PATH and all subdirectories
site_path = File.join(SITE_PATH, '*')
build.artifacts_file.use_file do |artifacts_path| build.artifacts_file.use_file do |artifacts_path|
unless system(*%W(unzip -n #{artifacts_path} #{site_path} -d #{temp_path})) SafeZip::Extract.new(artifacts_path)
raise FailedToExtractError, 'pages failed to extract' .extract(directories: [PUBLIC_DIR], to: temp_path)
end
end end
rescue SafeZip::Extract::Error => e
raise FailedToExtractError, e.message
end end
def deploy_page!(archive_public_path) def deploy_page!(archive_public_path)
...@@ -139,7 +138,7 @@ module Projects ...@@ -139,7 +138,7 @@ module Projects
end end
def tmp_path def tmp_path
@tmp_path ||= File.join(::Settings.pages.path, 'tmp') @tmp_path ||= File.join(::Settings.pages.path, TMP_EXTRACT_PATH)
end end
def pages_path def pages_path
...@@ -147,11 +146,11 @@ module Projects ...@@ -147,11 +146,11 @@ module Projects
end end
def public_path def public_path
@public_path ||= File.join(pages_path, 'public') @public_path ||= File.join(pages_path, PUBLIC_DIR)
end end
def previous_public_path def previous_public_path
@previous_public_path ||= File.join(pages_path, "public.#{SecureRandom.hex}") @previous_public_path ||= File.join(pages_path, "#{PUBLIC_DIR}.#{SecureRandom.hex}")
end end
def ref def ref
...@@ -188,5 +187,15 @@ module Projects ...@@ -188,5 +187,15 @@ module Projects
def pages_deployments_failed_total_counter def pages_deployments_failed_total_counter
@pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed") @pages_deployments_failed_total_counter ||= Gitlab::Metrics.counter(:pages_deployments_failed_total, "Counter of GitLab Pages deployments which failed")
end end
def make_secure_tmp_dir(tmp_path)
FileUtils.mkdir_p(tmp_path)
path = Dir.mktmpdir(nil, tmp_path)
begin
yield(path)
ensure
FileUtils.remove_entry_secure(path)
end
end
end end
end end
---
title: Extract GitLab Pages using RubyZip
merge_request:
author:
type: security
# frozen_string_literal: true
module SafeZip
class Entry
attr_reader :zip_archive, :zip_entry
attr_reader :path, :params
def initialize(zip_archive, zip_entry, params)
@zip_archive = zip_archive
@zip_entry = zip_entry
@params = params
@path = ::File.expand_path(zip_entry.name, params.extract_path)
end
def path_dir
::File.dirname(path)
end
def real_path_dir
::File.realpath(path_dir)
end
def exist?
::File.exist?(path)
end
def extract
# do not extract if file is not part of target directory
return false unless matching_target_directory
# do not overwrite existing file
raise SafeZip::Extract::AlreadyExistsError, "File already exists #{zip_entry.name}" if exist?
create_path_dir
if zip_entry.file?
extract_file
elsif zip_entry.directory?
extract_dir
elsif zip_entry.symlink?
extract_symlink
else
raise SafeZip::Extract::UnsupportedEntryError, "File #{zip_entry.name} cannot be extracted"
end
rescue SafeZip::Extract::Error
raise
rescue => e
raise SafeZip::Extract::ExtractError, e.message
end
private
def extract_file
zip_archive.extract(zip_entry, path)
end
def extract_dir
FileUtils.mkdir(path)
end
def extract_symlink
source_path = read_symlink
real_source_path = expand_symlink(source_path)
# ensure that source path of symlink is within target directories
unless real_source_path.start_with?(matching_target_directory)
raise SafeZip::Extract::PermissionDeniedError, "Symlink cannot be created targeting: #{source_path}"
end
::File.symlink(source_path, path)
end
def create_path_dir
# Create all directories, but ignore permissions
FileUtils.mkdir_p(path_dir)
# disallow to make path dirs to point to another directories
unless path_dir == real_path_dir
raise SafeZip::Extract::PermissionDeniedError, "Directory of #{zip_entry.name} points to another directory"
end
end
def matching_target_directory
params.matching_target_directory(path)
end
def read_symlink
zip_archive.read(zip_entry)
end
def expand_symlink(source_path)
::File.realpath(source_path, path_dir)
rescue
raise SafeZip::Extract::SymlinkSourceDoesNotExistError, "Symlink source #{source_path} does not exist"
end
end
end
# frozen_string_literal: true
module SafeZip
class Extract
Error = Class.new(StandardError)
PermissionDeniedError = Class.new(Error)
SymlinkSourceDoesNotExistError = Class.new(Error)
UnsupportedEntryError = Class.new(Error)
AlreadyExistsError = Class.new(Error)
NoMatchingError = Class.new(Error)
ExtractError = Class.new(Error)
attr_reader :archive_path
def initialize(archive_file)
@archive_path = archive_file
end
def extract(opts = {})
params = SafeZip::ExtractParams.new(**opts)
if Feature.enabled?(:safezip_use_rubyzip, default_enabled: true)
extract_with_ruby_zip(params)
else
legacy_unsafe_extract_with_system_zip(params)
end
end
private
def extract_with_ruby_zip(params)
Zip::File.open(archive_path) do |zip_archive|
# Extract all files in the following order:
# 1. Directories first,
# 2. Files next,
# 3. Symlinks last (or anything else)
extracted = extract_all_entries(zip_archive, params,
zip_archive.lazy.select(&:directory?))
extracted += extract_all_entries(zip_archive, params,
zip_archive.lazy.select(&:file?))
extracted += extract_all_entries(zip_archive, params,
zip_archive.lazy.reject(&:directory?).reject(&:file?))
raise NoMatchingError, 'No entries extracted' unless extracted > 0
end
end
def extract_all_entries(zip_archive, params, entries)
entries.count do |zip_entry|
SafeZip::Entry.new(zip_archive, zip_entry, params)
.extract
end
end
def legacy_unsafe_extract_with_system_zip(params)
# Requires UnZip at least 6.00 Info-ZIP.
# -n never overwrite existing files
args = %W(unzip -n -qq #{archive_path})
# We add * to end of directory, because we want to extract directory and all subdirectories
args += params.directories_wildcard
# Target directory where we extract
args += %W(-d #{params.extract_path})
unless system(*args)
raise Error, 'archive failed to extract'
end
end
end
end
# frozen_string_literal: true
module SafeZip
class ExtractParams
include Gitlab::Utils::StrongMemoize
attr_reader :directories, :extract_path
def initialize(directories:, to:)
@directories = directories
@extract_path = ::File.realpath(to)
end
def matching_target_directory(path)
target_directories.find do |directory|
path.start_with?(directory)
end
end
def target_directories
strong_memoize(:target_directories) do
directories.map do |directory|
::File.join(::File.expand_path(directory, extract_path), '')
end
end
end
def directories_wildcard
strong_memoize(:directories_wildcard) do
directories.map do |directory|
::File.join(directory, '*')
end
end
end
end
end
require 'spec_helper'
describe SafeZip::Entry do
let(:target_path) { Dir.mktmpdir('safe-zip') }
let(:directories) { %w(public folder/with/subfolder) }
let(:params) { SafeZip::ExtractParams.new(directories: directories, to: target_path) }
let(:entry) { described_class.new(zip_archive, zip_entry, params) }
let(:entry_name) { 'public/folder/index.html' }
let(:entry_path_dir) { File.join(target_path, File.dirname(entry_name)) }
let(:entry_path) { File.join(target_path, entry_name) }
let(:zip_archive) { double }
let(:zip_entry) do
double(
name: entry_name,
file?: false,
directory?: false,
symlink?: false)
end
after do
FileUtils.remove_entry_secure(target_path)
end
context '#path_dir' do
subject { entry.path_dir }
it { is_expected.to eq(target_path + '/public/folder') }
end
context '#exist?' do
subject { entry.exist? }
context 'when entry does not exist' do
it { is_expected.not_to be_truthy }
end
context 'when entry does exist' do
before do
create_entry
end
it { is_expected.to be_truthy }
end
end
describe '#extract' do
subject { entry.extract }
context 'when entry does not match the filtered directories' do
using RSpec::Parameterized::TableSyntax
where(:entry_name) do
[
'assets/folder/index.html',
'public/../folder/index.html',
'public/../../../../../index.html',
'../../../../../public/index.html',
'/etc/passwd'
]
end
with_them do
it 'does not extract file' do
is_expected.to be_falsey
end
end
end
context 'when entry does exist' do
before do
create_entry
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::AlreadyExistsError)
end
end
context 'when entry type is unknown' do
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::UnsupportedEntryError)
end
end
context 'when entry is valid' do
shared_examples 'secured symlinks' do
context 'when we try to extract entry into symlinked folder' do
before do
FileUtils.mkdir_p(File.join(target_path, "source"))
File.symlink("source", File.join(target_path, "public"))
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
end
context 'and is file' do
before do
allow(zip_entry).to receive(:file?) { true }
end
it 'does extract file' do
expect(zip_archive).to receive(:extract)
.with(zip_entry, entry_path)
.and_return(true)
is_expected.to be_truthy
end
it_behaves_like 'secured symlinks'
end
context 'and is directory' do
let(:entry_name) { 'public/folder/assets' }
before do
allow(zip_entry).to receive(:directory?) { true }
end
it 'does create directory' do
is_expected.to be_truthy
expect(File.exist?(entry_path)).to eq(true)
end
it_behaves_like 'secured symlinks'
end
context 'and is symlink' do
let(:entry_name) { 'public/folder/assets' }
before do
allow(zip_entry).to receive(:symlink?) { true }
allow(zip_archive).to receive(:read).with(zip_entry) { entry_symlink }
end
shared_examples 'a valid symlink' do
it 'does create symlink' do
is_expected.to be_truthy
expect(File.exist?(entry_path)).to eq(true)
end
end
context 'when source is within target' do
let(:entry_symlink) { '../images' }
context 'but does not exist' do
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::SymlinkSourceDoesNotExistError)
end
end
context 'and does exist' do
before do
FileUtils.mkdir_p(File.join(target_path, 'public', 'images'))
end
it_behaves_like 'a valid symlink'
end
end
context 'when source points outside of target' do
let(:entry_symlink) { '../../images' }
before do
FileUtils.mkdir(File.join(target_path, 'images'))
end
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
context 'when source points to /etc/passwd' do
let(:entry_symlink) { '/etc/passwd' }
it 'raises an exception' do
expect { subject }.to raise_error(SafeZip::Extract::PermissionDeniedError)
end
end
end
end
end
private
def create_entry
FileUtils.mkdir_p(entry_path_dir)
FileUtils.touch(entry_path)
end
end
require 'spec_helper'
describe SafeZip::ExtractParams do
let(:target_path) { Dir.mktmpdir("safe-zip") }
let(:params) { described_class.new(directories: directories, to: target_path) }
let(:directories) { %w(public folder/with/subfolder) }
after do
FileUtils.remove_entry_secure(target_path)
end
describe '#extract_path' do
subject { params.extract_path }
it { is_expected.to eq(target_path) }
end
describe '#matching_target_directory' do
using RSpec::Parameterized::TableSyntax
subject { params.matching_target_directory(target_path + path) }
where(:path, :result) do
'/public/index.html' | '/public/'
'/non/existing/path' | nil
'/public' | nil
'/folder/with/index.html' | nil
end
with_them do
it { is_expected.to eq(result ? target_path + result : nil) }
end
end
describe '#target_directories' do
subject { params.target_directories }
it 'starts with target_path' do
is_expected.to all(start_with(target_path + '/'))
end
it 'ends with / for all paths' do
is_expected.to all(end_with('/'))
end
end
describe '#directories_wildcard' do
subject { params.directories_wildcard }
it 'adds * for all paths' do
is_expected.to all(end_with('/*'))
end
end
end
require 'spec_helper'
describe SafeZip::Extract do
let(:target_path) { Dir.mktmpdir('safe-zip') }
let(:directories) { %w(public) }
let(:object) { described_class.new(archive) }
let(:archive) { Rails.root.join('spec', 'fixtures', 'safe_zip', archive_name) }
after do
FileUtils.remove_entry_secure(target_path)
end
context '#extract' do
subject { object.extract(directories: directories, to: target_path) }
shared_examples 'extracts archive' do |param|
before do
stub_feature_flags(safezip_use_rubyzip: param)
end
it 'does extract archive' do
subject
expect(File.exist?(File.join(target_path, 'public', 'index.html'))).to eq(true)
expect(File.exist?(File.join(target_path, 'source'))).to eq(false)
end
end
shared_examples 'fails to extract archive' do |param|
before do
stub_feature_flags(safezip_use_rubyzip: param)
end
it 'does not extract archive' do
expect { subject }.to raise_error(SafeZip::Extract::Error)
end
end
%w(valid-simple.zip valid-symlinks-first.zip valid-non-writeable.zip).each do |name|
context "when using #{name} archive" do
let(:archive_name) { name }
context 'for RubyZip' do
it_behaves_like 'extracts archive', true
end
context 'for UnZip' do
it_behaves_like 'extracts archive', false
end
end
end
%w(invalid-symlink-does-not-exist.zip invalid-symlinks-outside.zip).each do |name|
context "when using #{name} archive" do
let(:archive_name) { name }
context 'for RubyZip' do
it_behaves_like 'fails to extract archive', true
end
context 'for UnZip (UNSAFE)' do
it_behaves_like 'extracts archive', false
end
end
end
context 'when no matching directories are found' do
let(:archive_name) { 'valid-simple.zip' }
let(:directories) { %w(non/existing) }
context 'for RubyZip' do
it_behaves_like 'fails to extract archive', true
end
context 'for UnZip' do
it_behaves_like 'fails to extract archive', false
end
end
end
end
...@@ -5,24 +5,27 @@ describe Projects::UpdatePagesService do ...@@ -5,24 +5,27 @@ describe Projects::UpdatePagesService do
set(:pipeline) { create(:ci_pipeline, project: project, sha: project.commit('HEAD').sha) } set(:pipeline) { create(:ci_pipeline, project: project, sha: project.commit('HEAD').sha) }
set(:build) { create(:ci_build, pipeline: pipeline, ref: 'HEAD') } set(:build) { create(:ci_build, pipeline: pipeline, ref: 'HEAD') }
let(:invalid_file) { fixture_file_upload('spec/fixtures/dk.png') } let(:invalid_file) { fixture_file_upload('spec/fixtures/dk.png') }
let(:extension) { 'zip' }
let(:file) { fixture_file_upload("spec/fixtures/pages.#{extension}") } let(:file) { fixture_file_upload("spec/fixtures/pages.zip") }
let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.#{extension}") } let(:empty_file) { fixture_file_upload("spec/fixtures/pages_empty.zip") }
let(:metadata) do let(:metadata_filename) { "spec/fixtures/pages.zip.meta" }
filename = "spec/fixtures/pages.#{extension}.meta" let(:metadata) { fixture_file_upload(metadata_filename) if File.exist?(metadata_filename) }
fixture_file_upload(filename) if File.exist?(filename)
end
subject { described_class.new(project, build) } subject { described_class.new(project, build) }
before do before do
stub_feature_flags(safezip_use_rubyzip: true)
project.remove_pages project.remove_pages
end end
context 'legacy artifacts' do context '::TMP_EXTRACT_PATH' do
let(:extension) { 'zip' } subject { described_class::TMP_EXTRACT_PATH }
it { is_expected.not_to match(Gitlab::PathRegex.namespace_format_regex) }
end
context 'legacy artifacts' do
before do before do
build.update(legacy_artifacts_file: file) build.update(legacy_artifacts_file: file)
build.update(legacy_artifacts_metadata: metadata) build.update(legacy_artifacts_metadata: metadata)
...@@ -132,6 +135,20 @@ describe Projects::UpdatePagesService do ...@@ -132,6 +135,20 @@ describe Projects::UpdatePagesService do
end end
end end
context 'when using pages with non-writeable public' do
let(:file) { fixture_file_upload("spec/fixtures/pages_non_writeable.zip") }
context 'when using RubyZip' do
before do
stub_feature_flags(safezip_use_rubyzip: true)
end
it 'succeeds to extract' do
expect(execute).to eq(:success)
end
end
end
context 'when timeout happens by DNS error' do context 'when timeout happens by DNS error' do
before do before do
allow_any_instance_of(described_class) allow_any_instance_of(described_class)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment