Commit e8f49b4b authored by Francisco Javier López's avatar Francisco Javier López Committed by Douwe Maan

Support LFS objects when creating a project by import

parent 0dd7563b
...@@ -956,6 +956,10 @@ class Repository ...@@ -956,6 +956,10 @@ class Repository
blob_data_at(sha, path) blob_data_at(sha, path)
end end
def lfsconfig_for(sha)
blob_data_at(sha, '.lfsconfig')
end
def fetch_ref(source_repository, source_ref:, target_ref:) def fetch_ref(source_repository, source_ref:, target_ref:)
raw_repository.fetch_ref(source_repository.raw_repository, source_ref: source_ref, target_ref: target_ref) raw_repository.fetch_ref(source_repository.raw_repository, source_ref: source_ref, target_ref: target_ref)
end end
......
...@@ -3,7 +3,7 @@ class BaseService ...@@ -3,7 +3,7 @@ class BaseService
attr_accessor :project, :current_user, :params attr_accessor :project, :current_user, :params
def initialize(project, user, params = {}) def initialize(project, user = nil, params = {})
@project, @current_user, @params = project, user, params.dup @project, @current_user, @params = project, user, params.dup
end end
......
...@@ -17,6 +17,8 @@ module Projects ...@@ -17,6 +17,8 @@ module Projects
def execute def execute
add_repository_to_project add_repository_to_project
download_lfs_objects
import_data import_data
success success
...@@ -37,7 +39,7 @@ module Projects ...@@ -37,7 +39,7 @@ module Projects
# We should skip the repository for a GitHub import or GitLab project import, # We should skip the repository for a GitHub import or GitLab project import,
# because these importers fetch the project repositories for us. # because these importers fetch the project repositories for us.
return if has_importer? && importer_class.try(:imports_repository?) return if importer_imports_repository?
if unknown_url? if unknown_url?
# In this case, we only want to import issues, not a repository. # In this case, we only want to import issues, not a repository.
...@@ -73,6 +75,27 @@ module Projects ...@@ -73,6 +75,27 @@ module Projects
end end
end end
def download_lfs_objects
# In this case, we only want to import issues
return if unknown_url?
# If it has its own repository importer, it has to implements its own lfs import download
return if importer_imports_repository?
return unless project.lfs_enabled?
oids_to_download = Projects::LfsPointers::LfsImportService.new(project).execute
download_service = Projects::LfsPointers::LfsDownloadService.new(project)
oids_to_download.each do |oid, link|
download_service.execute(oid, link)
end
rescue => e
# Right now, to avoid aborting the importing process, we silently fail
# if any exception raises.
Rails.logger.error("The Lfs import process failed. #{e.message}")
end
def import_data def import_data
return unless has_importer? return unless has_importer?
...@@ -98,5 +121,9 @@ module Projects ...@@ -98,5 +121,9 @@ module Projects
def unknown_url? def unknown_url?
project.import_url == Project::UNKNOWN_IMPORT_URL project.import_url == Project::UNKNOWN_IMPORT_URL
end end
def importer_imports_repository?
has_importer? && importer_class.try(:imports_repository?)
end
end end
end end
# This service lists the download link from a remote source based on the
# oids provided
module Projects
module LfsPointers
class LfsDownloadLinkListService < BaseService
DOWNLOAD_ACTION = 'download'.freeze
DownloadLinksError = Class.new(StandardError)
DownloadLinkNotFound = Class.new(StandardError)
attr_reader :remote_uri
def initialize(project, remote_uri: nil)
super(project)
@remote_uri = remote_uri
end
# This method accepts two parameters:
# - oids: hash of oids to query. The structure is { lfs_file_oid => lfs_file_size }
#
# Returns a hash with the structure { lfs_file_oids => download_link }
def execute(oids)
return {} unless project&.lfs_enabled? && remote_uri && oids.present?
get_download_links(oids)
end
private
def get_download_links(oids)
response = Gitlab::HTTP.post(remote_uri,
body: request_body(oids),
headers: headers)
raise DownloadLinksError, response.message unless response.success?
parse_response_links(response['objects'])
end
def parse_response_links(objects_response)
objects_response.each_with_object({}) do |entry, link_list|
begin
oid = entry['oid']
link = entry.dig('actions', DOWNLOAD_ACTION, 'href')
raise DownloadLinkNotFound unless link
link_list[oid] = add_credentials(link)
rescue DownloadLinkNotFound, URI::InvalidURIError
Rails.logger.error("Link for Lfs Object with oid #{oid} not found or invalid.")
end
end
end
def request_body(oids)
{
operation: DOWNLOAD_ACTION,
objects: oids.map { |oid, size| { oid: oid, size: size } }
}.to_json
end
def headers
{
'Accept' => LfsRequest::CONTENT_TYPE,
'Content-Type' => LfsRequest::CONTENT_TYPE
}.freeze
end
def add_credentials(link)
uri = URI.parse(link)
if should_add_credentials?(uri)
uri.user = remote_uri.user
uri.password = remote_uri.password
end
uri.to_s
end
# The download link can be a local url or an object storage url
# If the download link has the some host as the import url then
# we add the same credentials because we may need them
def should_add_credentials?(link_uri)
url_credentials? && link_uri.host == remote_uri.host
end
def url_credentials?
remote_uri.user.present? || remote_uri.password.present?
end
end
end
end
# This service downloads and links lfs objects from a remote URL
module Projects
module LfsPointers
class LfsDownloadService < BaseService
def execute(oid, url)
return unless project&.lfs_enabled? && oid.present? && url.present?
return if LfsObject.exists?(oid: oid)
sanitized_uri = Gitlab::UrlSanitizer.new(url)
with_tmp_file(oid) do |file|
size = download_and_save_file(file, sanitized_uri)
lfs_object = LfsObject.new(oid: oid, size: size, file: file)
project.all_lfs_objects << lfs_object
end
rescue StandardError => e
Rails.logger.error("LFS file with oid #{oid} could't be downloaded from #{sanitized_uri.sanitized_url}: #{e.message}")
end
private
def download_and_save_file(file, sanitized_uri)
IO.copy_stream(open(sanitized_uri.sanitized_url, headers(sanitized_uri)), file)
end
def headers(sanitized_uri)
{}.tap do |headers|
credentials = sanitized_uri.credentials
if credentials[:user].present? || credentials[:password].present?
# Using authentication headers in the request
headers[:http_basic_authentication] = [credentials[:user], credentials[:password]]
end
end
end
def with_tmp_file(oid)
create_tmp_storage_dir
File.open(File.join(tmp_storage_dir, oid), 'w') { |file| yield file }
end
def create_tmp_storage_dir
FileUtils.makedirs(tmp_storage_dir) unless Dir.exist?(tmp_storage_dir)
end
def tmp_storage_dir
@tmp_storage_dir ||= File.join(storage_dir, 'tmp', 'download')
end
def storage_dir
@storage_dir ||= Gitlab.config.lfs.storage_path
end
end
end
end
# This service manages the whole worflow of discovering the Lfs files in a
# repository, linking them to the project and downloading (and linking) the non
# existent ones.
module Projects
module LfsPointers
class LfsImportService < BaseService
include Gitlab::Utils::StrongMemoize
HEAD_REV = 'HEAD'.freeze
LFS_ENDPOINT_PATTERN = /^\t?url\s*=\s*(.+)$/.freeze
LFS_BATCH_API_ENDPOINT = '/info/lfs/objects/batch'.freeze
LfsImportError = Class.new(StandardError)
def execute
return {} unless project&.lfs_enabled?
if external_lfs_endpoint?
# If the endpoint host is different from the import_url it means
# that the repo is using a third party service for storing the LFS files.
# In this case, we have to disable lfs in the project
disable_lfs!
return {}
end
get_download_links
rescue LfsDownloadLinkListService::DownloadLinksError => e
raise LfsImportError, "The LFS objects download list couldn't be imported. Error: #{e.message}"
end
private
def external_lfs_endpoint?
lfsconfig_endpoint_uri && lfsconfig_endpoint_uri.host != import_uri.host
end
def disable_lfs!
project.update(lfs_enabled: false)
end
def get_download_links
existent_lfs = LfsListService.new(project).execute
linked_oids = LfsLinkService.new(project).execute(existent_lfs.keys)
# Retrieving those oids not linked and which we need to download
not_linked_lfs = existent_lfs.except(*linked_oids)
LfsDownloadLinkListService.new(project, remote_uri: current_endpoint_uri).execute(not_linked_lfs)
end
def lfsconfig_endpoint_uri
strong_memoize(:lfsconfig_endpoint_uri) do
# Retrieveing the blob data from the .lfsconfig file
data = project.repository.lfsconfig_for(HEAD_REV)
# Parsing the data to retrieve the url
parsed_data = data&.match(LFS_ENDPOINT_PATTERN)
if parsed_data
URI.parse(parsed_data[1]).tap do |endpoint|
endpoint.user ||= import_uri.user
endpoint.password ||= import_uri.password
end
end
end
rescue URI::InvalidURIError
raise LfsImportError, 'Invalid URL in .lfsconfig file'
end
def import_uri
@import_uri ||= URI.parse(project.import_url)
rescue URI::InvalidURIError
raise LfsImportError, 'Invalid project import URL'
end
def current_endpoint_uri
(lfsconfig_endpoint_uri || default_endpoint_uri)
end
# The import url must end with '.git' here we ensure it is
def default_endpoint_uri
@default_endpoint_uri ||= begin
import_uri.dup.tap do |uri|
path = uri.path.gsub(%r(/$), '')
path += '.git' unless path.ends_with?('.git')
uri.path = path + LFS_BATCH_API_ENDPOINT
end
end
end
end
end
end
# Given a list of oids, this services links the existent Lfs Objects to the project
module Projects
module LfsPointers
class LfsLinkService < BaseService
# Accept an array of oids to link
#
# Returns a hash with the same structure with oids linked
def execute(oids)
return {} unless project&.lfs_enabled?
# Search and link existing LFS Object
link_existing_lfs_objects(oids)
end
private
def link_existing_lfs_objects(oids)
existent_lfs_objects = LfsObject.where(oid: oids)
return [] unless existent_lfs_objects.any?
not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
project.all_lfs_objects << not_linked_lfs_objects
existent_lfs_objects.pluck(:oid)
end
end
end
end
# This service list all existent Lfs objects in a repository
module Projects
module LfsPointers
class LfsListService < BaseService
REV = 'HEAD'.freeze
# Retrieve all lfs blob pointers and returns a hash
# with the structure { lfs_file_oid => lfs_file_size }
def execute
return {} unless project&.lfs_enabled?
Gitlab::Git::LfsChanges.new(project.repository, REV)
.all_pointers
.map! { |blob| [blob.lfs_oid, blob.lfs_size] }
.to_h
end
end
end
end
...@@ -31,12 +31,14 @@ ...@@ -31,12 +31,14 @@
- github_importer:github_import_import_diff_note - github_importer:github_import_import_diff_note
- github_importer:github_import_import_issue - github_importer:github_import_import_issue
- github_importer:github_import_import_note - github_importer:github_import_import_note
- github_importer:github_import_import_lfs_object
- github_importer:github_import_import_pull_request - github_importer:github_import_import_pull_request
- github_importer:github_import_refresh_import_jid - github_importer:github_import_refresh_import_jid
- github_importer:github_import_stage_finish_import - github_importer:github_import_stage_finish_import
- github_importer:github_import_stage_import_base_data - github_importer:github_import_stage_import_base_data
- github_importer:github_import_stage_import_issues_and_diff_notes - github_importer:github_import_stage_import_issues_and_diff_notes
- github_importer:github_import_stage_import_notes - github_importer:github_import_stage_import_notes
- github_importer:github_import_stage_import_lfs_objects
- github_importer:github_import_stage_import_pull_requests - github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository - github_importer:github_import_stage_import_repository
......
...@@ -21,6 +21,7 @@ module Gitlab ...@@ -21,6 +21,7 @@ module Gitlab
STAGES = { STAGES = {
issues_and_diff_notes: Stage::ImportIssuesAndDiffNotesWorker, issues_and_diff_notes: Stage::ImportIssuesAndDiffNotesWorker,
notes: Stage::ImportNotesWorker, notes: Stage::ImportNotesWorker,
lfs_objects: Stage::ImportLfsObjectsWorker,
finish: Stage::FinishImportWorker finish: Stage::FinishImportWorker
}.freeze }.freeze
......
# frozen_string_literal: true
module Gitlab
module GithubImport
class ImportLfsObjectWorker
include ObjectImporter
def representation_class
Representation::LfsObject
end
def importer_class
Importer::LfsObjectImporter
end
def counter_name
:github_importer_imported_lfs_objects
end
def counter_description
'The number of imported GitHub Lfs Objects'
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Stage
class ImportLfsObjectsWorker
include ApplicationWorker
include GithubImport::Queue
include StageMethods
def perform(project_id)
return unless (project = find_project(project_id))
import(project)
end
# project - An instance of Project.
def import(project)
waiter = Importer::LfsObjectsImporter
.new(project, nil)
.execute
AdvanceStageWorker.perform_async(
project.id,
{ waiter.key => waiter.jobs_remaining },
:finish
)
end
end
end
end
end
...@@ -18,7 +18,7 @@ module Gitlab ...@@ -18,7 +18,7 @@ module Gitlab
AdvanceStageWorker.perform_async( AdvanceStageWorker.perform_async(
project.id, project.id,
{ waiter.key => waiter.jobs_remaining }, { waiter.key => waiter.jobs_remaining },
:finish :lfs_objects
) )
end end
end end
......
---
title: Added support for LFS Download in the importing process
merge_request: 18871
author:
type: fixed
...@@ -1543,7 +1543,7 @@ module Gitlab ...@@ -1543,7 +1543,7 @@ module Gitlab
end end
end end
def rev_list(including: [], excluding: [], objects: false, &block) def rev_list(including: [], excluding: [], options: [], objects: false, &block)
args = ['rev-list'] args = ['rev-list']
args.push(*rev_list_param(including)) args.push(*rev_list_param(including))
...@@ -1556,6 +1556,10 @@ module Gitlab ...@@ -1556,6 +1556,10 @@ module Gitlab
args.push('--objects') if objects args.push('--objects') if objects
if options.any?
args.push(*options)
end
run_git!(args, lazy_block: block) run_git!(args, lazy_block: block)
end end
......
...@@ -38,7 +38,10 @@ module Gitlab ...@@ -38,7 +38,10 @@ module Gitlab
end end
def all_objects(require_path: nil, &lazy_block) def all_objects(require_path: nil, &lazy_block)
get_objects(including: :all, require_path: require_path, &lazy_block) get_objects(including: :all,
options: ["--filter=blob:limit=#{Gitlab::Git::Blob::LFS_POINTER_MAX_SIZE}"],
require_path: require_path,
&lazy_block)
end end
# This methods returns an array of missed references # This methods returns an array of missed references
...@@ -54,8 +57,8 @@ module Gitlab ...@@ -54,8 +57,8 @@ module Gitlab
repository.rev_list(args).split("\n") repository.rev_list(args).split("\n")
end end
def get_objects(including: [], excluding: [], require_path: nil) def get_objects(including: [], excluding: [], options: [], require_path: nil)
opts = { including: including, excluding: excluding, objects: true } opts = { including: including, excluding: excluding, options: options, objects: true }
repository.rev_list(opts) do |lazy_output| repository.rev_list(opts) do |lazy_output|
objects = objects_from_output(lazy_output, require_path: require_path) objects = objects_from_output(lazy_output, require_path: require_path)
......
# frozen_string_literal: true
module Gitlab
module GithubImport
module Importer
class LfsObjectImporter
attr_reader :lfs_object, :project
# lfs_object - An instance of `Gitlab::GithubImport::Representation::LfsObject`.
# project - An instance of `Project`.
def initialize(lfs_object, project, _)
@lfs_object = lfs_object
@project = project
end
def execute
Projects::LfsPointers::LfsDownloadService
.new(project)
.execute(lfs_object.oid, lfs_object.download_link)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Importer
class LfsObjectsImporter
include ParallelScheduling
def importer_class
LfsObjectImporter
end
def representation_class
Representation::LfsObject
end
def sidekiq_worker_class
ImportLfsObjectWorker
end
def collection_method
:lfs_objects
end
def each_object_to_import
lfs_objects = Projects::LfsPointers::LfsImportService.new(project).execute
lfs_objects.each do |object|
yield object
end
rescue StandardError => e
Rails.logger.error("The Lfs import process failed. #{e.message}")
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Representation
class LfsObject
include ToHash
include ExposeAttribute
attr_reader :attributes
expose_attribute :oid, :download_link
# Builds a lfs_object
def self.from_api_response(lfs_object)
new({ oid: lfs_object[0], download_link: lfs_object[1] })
end
# Builds a new lfs_object using a Hash that was built from a JSON payload.
def self.from_json_hash(raw_hash)
new(Representation.symbolize_hash(raw_hash))
end
# attributes - A Hash containing the raw lfs_object details. The keys of this
# Hash must be Symbols.
def initialize(attributes)
@attributes = attributes
end
end
end
end
end
...@@ -19,7 +19,8 @@ module Gitlab ...@@ -19,7 +19,8 @@ module Gitlab
Importer::PullRequestsImporter, Importer::PullRequestsImporter,
Importer::IssuesImporter, Importer::IssuesImporter,
Importer::DiffNotesImporter, Importer::DiffNotesImporter,
Importer::NotesImporter Importer::NotesImporter,
Importer::LfsObjectsImporter
].freeze ].freeze
# project - The project to import the data into. # project - The project to import the data into.
......
...@@ -88,7 +88,7 @@ describe Gitlab::Git::RevList do ...@@ -88,7 +88,7 @@ describe Gitlab::Git::RevList do
context '#all_objects' do context '#all_objects' do
it 'fetches list of all pushed objects using rev-list' do it 'fetches list of all pushed objects using rev-list' do
stub_popen_rev_list('--all', '--objects', output: "sha1\nsha2") stub_popen_rev_list('--all', '--objects', '--filter=blob:limit=200', output: "sha1\nsha2")
expect { |b| rev_list.all_objects(&b) }.to yield_with_args(%w[sha1 sha2]) expect { |b| rev_list.all_objects(&b) }.to yield_with_args(%w[sha1 sha2])
end end
......
require 'spec_helper'
describe Gitlab::GithubImport::Importer::LfsObjectImporter do
let(:project) { create(:project) }
let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" }
let(:github_lfs_object) do
Gitlab::GithubImport::Representation::LfsObject.new(
oid: 'oid', download_link: download_link
)
end
let(:importer) { described_class.new(github_lfs_object, project, nil) }
describe '#execute' do
it 'calls the LfsDownloadService with the lfs object attributes' do
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService)
.to receive(:execute).with('oid', download_link)
importer.execute
end
end
end
require 'spec_helper'
describe Gitlab::GithubImport::Importer::LfsObjectsImporter do
let(:project) { double(:project, id: 4, import_source: 'foo/bar') }
let(:client) { double(:client) }
let(:download_link) { "http://www.gitlab.com/lfs_objects/oid" }
let(:github_lfs_object) { ['oid', download_link] }
describe '#parallel?' do
it 'returns true when running in parallel mode' do
importer = described_class.new(project, client)
expect(importer).to be_parallel
end
it 'returns false when running in sequential mode' do
importer = described_class.new(project, client, parallel: false)
expect(importer).not_to be_parallel
end
end
describe '#execute' do
context 'when running in parallel mode' do
it 'imports lfs objects in parallel' do
importer = described_class.new(project, client)
expect(importer).to receive(:parallel_import)
importer.execute
end
end
context 'when running in sequential mode' do
it 'imports lfs objects in sequence' do
importer = described_class.new(project, client, parallel: false)
expect(importer).to receive(:sequential_import)
importer.execute
end
end
end
describe '#sequential_import' do
it 'imports each lfs object in sequence' do
importer = described_class.new(project, client, parallel: false)
lfs_object_importer = double(:lfs_object_importer)
allow(importer)
.to receive(:each_object_to_import)
.and_yield(['oid', download_link])
expect(Gitlab::GithubImport::Importer::LfsObjectImporter)
.to receive(:new)
.with(
an_instance_of(Gitlab::GithubImport::Representation::LfsObject),
project,
client
)
.and_return(lfs_object_importer)
expect(lfs_object_importer).to receive(:execute)
importer.sequential_import
end
end
describe '#parallel_import' do
it 'imports each lfs object in parallel' do
importer = described_class.new(project, client)
allow(importer)
.to receive(:each_object_to_import)
.and_yield(github_lfs_object)
expect(Gitlab::GithubImport::ImportLfsObjectWorker)
.to receive(:perform_async)
.with(project.id, an_instance_of(Hash), an_instance_of(String))
waiter = importer.parallel_import
expect(waiter).to be_an_instance_of(Gitlab::JobWaiter)
expect(waiter.jobs_remaining).to eq(1)
end
end
describe '#collection_options' do
it 'returns an empty Hash' do
importer = described_class.new(project, client)
expect(importer.collection_options).to eq({})
end
end
end
...@@ -14,7 +14,8 @@ describe Gitlab::GithubImport::Importer::RepositoryImporter do ...@@ -14,7 +14,8 @@ describe Gitlab::GithubImport::Importer::RepositoryImporter do
disk_path: 'foo', disk_path: 'foo',
repository: repository, repository: repository,
create_wiki: true, create_wiki: true,
import_state: import_state import_state: import_state,
lfs_enabled?: true
) )
end end
......
...@@ -91,4 +91,23 @@ describe Gitlab::ImportSources do ...@@ -91,4 +91,23 @@ describe Gitlab::ImportSources do
end end
end end
end end
describe 'imports_repository? checker' do
let(:allowed_importers) { %w[github gitlab_project] }
it 'fails if any importer other than the allowed ones implements this method' do
current_importers = described_class.values.select { |kind| described_class.importer(kind).try(:imports_repository?) }
not_allowed_importers = current_importers - allowed_importers
expect(not_allowed_importers).to be_empty, failure_message(not_allowed_importers)
end
def failure_message(importers_class_names)
<<-MSG
It looks like the #{importers_class_names.join(', ')} importers implements its own way to import the repository.
That means that the lfs object download must be handled for each of them. You can use 'LfsImportService' and
'LfsDownloadService' to implement it. After that, add the importer name to the list of allowed importers in this spec.
MSG
end
end
end end
...@@ -3,9 +3,17 @@ require 'spec_helper' ...@@ -3,9 +3,17 @@ require 'spec_helper'
describe Projects::ImportService do describe Projects::ImportService do
let!(:project) { create(:project) } let!(:project) { create(:project) }
let(:user) { project.creator } let(:user) { project.creator }
let(:import_url) { 'http://www.gitlab.com/demo/repo.git' }
let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } }
subject { described_class.new(project, user) } subject { described_class.new(project, user) }
before do
allow(project).to receive(:lfs_enabled?).and_return(true)
allow_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute)
allow_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links)
end
describe '#async?' do describe '#async?' do
it 'returns true for an asynchronous importer' do it 'returns true for an asynchronous importer' do
importer_class = double(:importer, async?: true) importer_class = double(:importer, async?: true)
...@@ -63,6 +71,15 @@ describe Projects::ImportService do ...@@ -63,6 +71,15 @@ describe Projects::ImportService do
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - The repository could not be created." expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - The repository could not be created."
end end
context 'when repository creation succeeds' do
it 'does not download lfs files' do
expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute)
subject.execute
end
end
end end
context 'with known url' do context 'with known url' do
...@@ -91,6 +108,15 @@ describe Projects::ImportService do ...@@ -91,6 +108,15 @@ describe Projects::ImportService do
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
end end
context 'when repository import scheduled' do
it 'does not download lfs objects' do
expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute)
subject.execute
end
end
end end
context 'with a non Github repository' do context 'with a non Github repository' do
...@@ -99,9 +125,10 @@ describe Projects::ImportService do ...@@ -99,9 +125,10 @@ describe Projects::ImportService do
project.import_type = 'bitbucket' project.import_type = 'bitbucket'
end end
it 'succeeds if repository import is successfully' do it 'succeeds if repository import is successfull' do
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true) expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true)
expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true) expect_any_instance_of(Gitlab::BitbucketImport::Importer).to receive(:execute).and_return(true)
expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return({})
result = subject.execute result = subject.execute
...@@ -116,6 +143,29 @@ describe Projects::ImportService do ...@@ -116,6 +143,29 @@ describe Projects::ImportService do
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository" expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository"
end end
context 'when repository import scheduled' do
before do
allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_return(true)
allow(subject).to receive(:import_data)
end
it 'downloads lfs objects if lfs_enabled is enabled for project' do
allow(project).to receive(:lfs_enabled?).and_return(true)
expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute).twice
subject.execute
end
it 'does not download lfs objects if lfs_enabled is not enabled for project' do
allow(project).to receive(:lfs_enabled?).and_return(false)
expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute)
subject.execute
end
end
end end
end end
...@@ -147,6 +197,26 @@ describe Projects::ImportService do ...@@ -147,6 +197,26 @@ describe Projects::ImportService do
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
end end
context 'when importer' do
it 'has a custom repository importer it does not download lfs objects' do
allow(Gitlab::GithubImport::ParallelImporter).to receive(:imports_repository?).and_return(true)
expect_any_instance_of(Projects::LfsPointers::LfsImportService).not_to receive(:execute)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).not_to receive(:execute)
subject.execute
end
it 'does not have a custom repository importer downloads lfs objects' do
allow(Gitlab::GithubImport::ParallelImporter).to receive(:imports_repository?).and_return(false)
expect_any_instance_of(Projects::LfsPointers::LfsImportService).to receive(:execute).and_return(oid_download_links)
expect_any_instance_of(Projects::LfsPointers::LfsDownloadService).to receive(:execute)
subject.execute
end
end
end end
context 'with blocked import_URL' do context 'with blocked import_URL' do
......
require 'spec_helper'
describe Projects::LfsPointers::LfsDownloadLinkListService do
let(:import_url) { 'http://www.gitlab.com/demo/repo.git' }
let(:lfs_endpoint) { "#{import_url}/info/lfs/objects/batch" }
let!(:project) { create(:project, import_url: import_url) }
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } }
let(:remote_uri) { URI.parse(lfs_endpoint) }
let(:objects_response) do
body = new_oids.map do |oid, size|
{
'oid' => oid,
'size' => size,
'actions' => {
'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" }
}
}
end
Struct.new(:success?, :objects).new(true, body)
end
let(:invalid_object_response) do
[
'oid' => 'whatever',
'size' => 123
]
end
subject { described_class.new(project, remote_uri: remote_uri) }
before do
allow(project).to receive(:lfs_enabled?).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(objects_response)
end
describe '#execute' do
it 'retrieves each download link of every non existent lfs object' do
subject.execute(new_oids).each do |oid, link|
expect(link).to eq "#{import_url}/gitlab-lfs/objects/#{oid}"
end
end
context 'credentials' do
context 'when the download link and the lfs_endpoint have the same host' do
context 'when lfs_endpoint has credentials' do
let(:import_url) { 'http://user:password@www.gitlab.com/demo/repo.git' }
it 'adds credentials to the download_link' do
result = subject.execute(new_oids)
result.each do |oid, link|
expect(link.starts_with?('http://user:password@')).to be_truthy
end
end
end
context 'when lfs_endpoint does not have any credentials' do
it 'does not add any credentials' do
result = subject.execute(new_oids)
result.each do |oid, link|
expect(link.starts_with?('http://user:password@')).to be_falsey
end
end
end
end
context 'when the download link and the lfs_endpoint have different hosts' do
let(:import_url_with_credentials) { 'http://user:password@www.otherdomain.com/demo/repo.git' }
let(:lfs_endpoint) { "#{import_url_with_credentials}/info/lfs/objects/batch" }
it 'downloads without any credentials' do
result = subject.execute(new_oids)
result.each do |oid, link|
expect(link.starts_with?('http://user:password@')).to be_falsey
end
end
end
end
end
describe '#get_download_links' do
it 'raise errorif request fails' do
allow(Gitlab::HTTP).to receive(:post).and_return(Struct.new(:success?, :message).new(false, 'Failed request'))
expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError)
end
end
describe '#parse_response_links' do
it 'does not add oid entry if href not found' do
expect(Rails.logger).to receive(:error).with("Link for Lfs Object with oid whatever not found or invalid.")
result = subject.send(:parse_response_links, invalid_object_response)
expect(result).to be_empty
end
end
end
require 'spec_helper'
describe Projects::LfsPointers::LfsDownloadService do
let(:project) { create(:project) }
let(:oid) { '9e548e25631dd9ce6b43afd6359ab76da2819d6a5b474e66118c7819e1d8b3e8' }
let(:download_link) { "http://gitlab.com/#{oid}" }
let(:lfs_content) do
<<~HEREDOC
whatever
HEREDOC
end
subject { described_class.new(project) }
before do
allow(project).to receive(:lfs_enabled?).and_return(true)
WebMock.stub_request(:get, download_link).to_return(body: lfs_content)
end
describe '#execute' do
context 'when file download succeeds' do
it 'a new lfs object is created' do
expect { subject.execute(oid, download_link) }.to change { LfsObject.count }.from(0).to(1)
end
it 'has the same oid' do
subject.execute(oid, download_link)
expect(LfsObject.first.oid).to eq oid
end
it 'stores the content' do
subject.execute(oid, download_link)
expect(File.read(LfsObject.first.file.file.file)).to eq lfs_content
end
end
context 'when file download fails' do
it 'no lfs object is created' do
expect { subject.execute(oid, download_link) }.to change { LfsObject.count }
end
end
context 'when credentials present' do
let(:download_link_with_credentials) { "http://user:password@gitlab.com/#{oid}" }
before do
WebMock.stub_request(:get, download_link).with(headers: { 'Authorization' => 'Basic dXNlcjpwYXNzd29yZA==' }).to_return(body: lfs_content)
end
it 'the request adds authorization headers' do
subject.execute(oid, download_link_with_credentials)
end
end
context 'when an lfs object with the same oid already exists' do
before do
create(:lfs_object, oid: 'oid')
end
it 'does not download the file' do
expect(subject).not_to receive(:download_and_save_file)
subject.execute('oid', download_link)
end
end
end
end
require 'spec_helper'
describe Projects::LfsPointers::LfsImportService do
let(:import_url) { 'http://www.gitlab.com/demo/repo.git' }
let(:default_endpoint) { "#{import_url}/info/lfs/objects/batch"}
let(:group) { create(:group, lfs_enabled: true)}
let!(:project) { create(:project, namespace: group, import_url: import_url, lfs_enabled: true) }
let!(:lfs_objects_project) { create_list(:lfs_objects_project, 2, project: project) }
let!(:existing_lfs_objects) { LfsObject.pluck(:oid, :size).to_h }
let(:oids) { { 'oid1' => 123, 'oid2' => 125 } }
let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } }
let(:all_oids) { existing_lfs_objects.merge(oids) }
let(:remote_uri) { URI.parse(lfs_endpoint) }
subject { described_class.new(project) }
before do
allow(project.repository).to receive(:lfsconfig_for).and_return(nil)
allow(Gitlab.config.lfs).to receive(:enabled).and_return(true)
allow_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute).and_return(all_oids)
end
describe '#execute' do
context 'when no lfs pointer is linked' do
before do
allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return([])
allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).and_return(oid_download_links)
expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: URI.parse(default_endpoint)).and_call_original
end
it 'retrieves all lfs pointers in the project repository' do
expect_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute)
subject.execute
end
it 'links existent lfs objects to the project' do
expect_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute)
subject.execute
end
it 'retrieves the download links of non existent objects' do
expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(all_oids)
subject.execute
end
end
context 'when some lfs objects are linked' do
before do
allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return(existing_lfs_objects.keys)
allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).and_return(oid_download_links)
end
it 'retrieves the download links of non existent objects' do
expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(oids)
subject.execute
end
end
context 'when all lfs objects are linked' do
before do
allow_any_instance_of(Projects::LfsPointers::LfsLinkService).to receive(:execute).and_return(all_oids.keys)
allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute)
end
it 'retrieves no download links' do
expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with({}).and_call_original
expect(subject.execute).to be_empty
end
end
context 'when lfsconfig file exists' do
before do
allow(project.repository).to receive(:lfsconfig_for).and_return("[lfs]\n\turl = #{lfs_endpoint}\n")
end
context 'when url points to the same import url host' do
let(:lfs_endpoint) { "#{import_url}/different_endpoint" }
let(:service) { double }
before do
allow(service).to receive(:execute)
end
it 'downloads lfs object using the new endpoint' do
expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: remote_uri).and_return(service)
subject.execute
end
context 'when import url has credentials' do
let(:import_url) { 'http://user:password@www.gitlab.com/demo/repo.git'}
it 'adds the credentials to the new endpoint' do
expect(Projects::LfsPointers::LfsDownloadLinkListService)
.to receive(:new).with(project, remote_uri: URI.parse("http://user:password@www.gitlab.com/demo/repo.git/different_endpoint"))
.and_return(service)
subject.execute
end
context 'when url has its own credentials' do
let(:lfs_endpoint) { "http://user1:password1@www.gitlab.com/demo/repo.git/different_endpoint" }
it 'does not add the import url credentials' do
expect(Projects::LfsPointers::LfsDownloadLinkListService)
.to receive(:new).with(project, remote_uri: remote_uri)
.and_return(service)
subject.execute
end
end
end
end
context 'when url points to a third party service' do
let(:lfs_endpoint) { 'http://third_party_service.com/info/lfs/objects/' }
it 'disables lfs from the project' do
expect(project.lfs_enabled?).to be_truthy
subject.execute
expect(project.lfs_enabled?).to be_falsey
end
it 'does not download anything' do
expect_any_instance_of(Projects::LfsPointers::LfsListService).not_to receive(:execute)
subject.execute
end
end
end
end
describe '#default_endpoint_uri' do
let(:import_url) { 'http://www.gitlab.com/demo/repo' }
it 'adds suffix .git if the url does not have it' do
expect(subject.send(:default_endpoint_uri).path).to match(/repo.git/)
end
end
end
require 'spec_helper'
describe Projects::LfsPointers::LfsLinkService do
let!(:project) { create(:project, lfs_enabled: true) }
let!(:lfs_objects_project) { create_list(:lfs_objects_project, 2, project: project) }
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } }
let(:all_oids) { LfsObject.pluck(:oid, :size).to_h.merge(new_oids) }
let(:new_lfs_object) { create(:lfs_object) }
let(:new_oid_list) { all_oids.merge(new_lfs_object.oid => new_lfs_object.size) }
subject { described_class.new(project) }
before do
allow(project).to receive(:lfs_enabled?).and_return(true)
end
describe '#execute' do
it 'links existing lfs objects to the project' do
expect(project.all_lfs_objects.count).to eq 2
linked = subject.execute(new_oid_list.keys)
expect(project.all_lfs_objects.count).to eq 3
expect(linked.size).to eq 3
end
it 'returns linked oids' do
linked = lfs_objects_project.map(&:lfs_object).map(&:oid) << new_lfs_object.oid
expect(subject.execute(new_oid_list.keys)).to eq linked
end
end
end
require 'spec_helper'
describe Gitlab::GithubImport::Stage::ImportLfsObjectsWorker do
let(:project) { create(:project) }
let(:worker) { described_class.new }
describe '#import' do
it 'imports all the lfs objects' do
importer = double(:importer)
waiter = Gitlab::JobWaiter.new(2, '123')
expect(Gitlab::GithubImport::Importer::LfsObjectsImporter)
.to receive(:new)
.with(project, nil)
.and_return(importer)
expect(importer)
.to receive(:execute)
.and_return(waiter)
expect(Gitlab::GithubImport::AdvanceStageWorker)
.to receive(:perform_async)
.with(project.id, { '123' => 2 }, :finish)
worker.import(project)
end
end
end
...@@ -21,7 +21,7 @@ describe Gitlab::GithubImport::Stage::ImportNotesWorker do ...@@ -21,7 +21,7 @@ describe Gitlab::GithubImport::Stage::ImportNotesWorker do
expect(Gitlab::GithubImport::AdvanceStageWorker) expect(Gitlab::GithubImport::AdvanceStageWorker)
.to receive(:perform_async) .to receive(:perform_async)
.with(project.id, { '123' => 2 }, :finish) .with(project.id, { '123' => 2 }, :lfs_objects)
worker.import(client, project) worker.import(client, project)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment