Commit 94b8336b authored by Yorick Peterse's avatar Yorick Peterse

Replace old GH importer with the parallel importer

parent d6ceb5e5
...@@ -87,6 +87,10 @@ module Projects ...@@ -87,6 +87,10 @@ module Projects
end end
end end
def importer_class
Gitlab::ImportSources.importer(project.import_type)
end
def has_importer? def has_importer?
Gitlab::ImportSources.importer_names.include?(project.import_type) Gitlab::ImportSources.importer_names.include?(project.import_type)
end end
...@@ -95,10 +99,6 @@ module Projects ...@@ -95,10 +99,6 @@ module Projects
importer_class.new(project) importer_class.new(project)
end end
def importer_class
Gitlab::ImportSources.importer(project.import_type)
end
def unknown_url? def unknown_url?
project.import_url == Project::UNKNOWN_IMPORT_URL project.import_url == Project::UNKNOWN_IMPORT_URL
end end
......
module Github
class Client
TIMEOUT = 60
DEFAULT_PER_PAGE = 100
attr_reader :connection, :rate_limit
def initialize(options)
@connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT)
faraday.options.timeout = options.fetch(:timeout, TIMEOUT)
faraday.authorization 'token', options.fetch(:token)
faraday.adapter :net_http
faraday.ssl.verify = verify_ssl
end
@rate_limit = RateLimit.new(connection)
end
def get(url, query = {})
exceed, reset_in = rate_limit.get
sleep reset_in if exceed
Github::Response.new(connection.get(url, { per_page: DEFAULT_PER_PAGE }.merge(query)))
end
private
def root_endpoint
custom_endpoint || github_endpoint
end
def custom_endpoint
github_omniauth_provider.dig('args', 'client_options', 'site')
end
def verify_ssl
# If there is no config, we're connecting to github.com
# and we should verify ssl.
github_omniauth_provider.fetch('verify_ssl', true)
end
def github_endpoint
OmniAuth::Strategies::GitHub.default_options[:client_options][:site]
end
def github_omniauth_provider
@github_omniauth_provider ||=
Gitlab.config.omniauth.providers
.find { |provider| provider.name == 'github' }
.to_h
end
end
end
module Github
class Collection
attr_reader :options
def initialize(options)
@options = options
end
def fetch(url, query = {})
return [] if url.blank?
Enumerator.new do |yielder|
loop do
response = client.get(url, query)
response.body.each { |item| yielder << item }
raise StopIteration unless response.rels.key?(:next)
url = response.rels[:next]
end
end.lazy
end
private
def client
@client ||= Github::Client.new(options)
end
end
end
module Github
RepositoryFetchError = Class.new(StandardError)
end
This diff is collapsed.
module Github
class Import
class Issue < ::Issue
self.table_name = 'issues'
self.reset_callbacks :save
self.reset_callbacks :create
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
end
end
module Github
class Import
class LegacyDiffNote < ::LegacyDiffNote
self.table_name = 'notes'
self.store_full_sti_class = false
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
end
end
module Github
class Import
class MergeRequest < ::MergeRequest
self.table_name = 'merge_requests'
self.reset_callbacks :create
self.reset_callbacks :save
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
end
end
module Github
class Import
class Note < ::Note
self.table_name = 'notes'
self.store_full_sti_class = false
self.reset_callbacks :save
self.reset_callbacks :commit
self.reset_callbacks :update
self.reset_callbacks :validate
end
end
end
module Github
class RateLimit
SAFE_REMAINING_REQUESTS = 100
SAFE_RESET_TIME = 500
RATE_LIMIT_URL = '/rate_limit'.freeze
attr_reader :connection
def initialize(connection)
@connection = connection
end
def get
response = connection.get(RATE_LIMIT_URL)
# GitHub Rate Limit API returns 404 when the rate limit is disabled
return false unless response.status != 404
body = Oj.load(response.body, class_cache: false, mode: :compat)
remaining = body.dig('rate', 'remaining').to_i
reset_in = body.dig('rate', 'reset').to_i
exceed = remaining <= SAFE_REMAINING_REQUESTS
[exceed, reset_in]
end
end
end
module Github
class Repositories
attr_reader :options
def initialize(options)
@options = options
end
def fetch
Collection.new(options).fetch(repos_url)
end
private
def repos_url
'/user/repos'
end
end
end
module Github
module Representation
class Base
def initialize(raw, options = {})
@raw = raw
@options = options
end
def id
raw['id']
end
def url
raw['url']
end
def created_at
raw['created_at']
end
def updated_at
raw['updated_at']
end
private
attr_reader :raw, :options
end
end
end
module Github
module Representation
class Branch < Representation::Base
attr_reader :repository
def user
raw.dig('user', 'login') || 'unknown'
end
def repo?
raw['repo'].present?
end
def repo
return unless repo?
@repo ||= Github::Representation::Repo.new(raw['repo'])
end
def ref
raw['ref']
end
def sha
raw['sha']
end
def short_sha
Commit.truncate_sha(sha)
end
def valid?
sha.present? && ref.present?
end
def restore!(name)
repository.create_branch(name, sha)
rescue Gitlab::Git::Repository::InvalidRef => e
Rails.logger.error("#{self.class.name}: Could not restore branch #{name}: #{e}")
end
def remove!(name)
repository.delete_branch(name)
rescue Gitlab::Git::Repository::DeleteBranchError => e
Rails.logger.error("#{self.class.name}: Could not remove branch #{name}: #{e}")
end
private
def repository
@repository ||= options.fetch(:repository)
end
end
end
end
module Github
module Representation
class Comment < Representation::Base
def note
raw['body'] || ''
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def commit_id
raw['commit_id']
end
def line_code
return unless on_diff?
parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines)
generate_line_code(parsed_lines.to_a.last)
end
private
def generate_line_code(line)
Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos)
end
def on_diff?
diff_hunk.present?
end
def diff_hunk
raw['diff_hunk']
end
def file_path
raw['path']
end
end
end
end
module Github
module Representation
class Issuable < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['body'] || ''
end
def milestone
return unless raw['milestone'].present?
@milestone ||= Github::Representation::Milestone.new(raw['milestone'])
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def labels?
raw['labels'].any?
end
def labels
@labels ||= Array(raw['labels']).map do |label|
Github::Representation::Label.new(label, options)
end
end
end
end
end
module Github
module Representation
class Issue < Representation::Issuable
def state
raw['state'] == 'closed' ? 'closed' : 'opened'
end
def comments?
raw['comments'] > 0
end
def pull_request?
raw['pull_request'].present?
end
def assigned?
raw['assignees'].present?
end
def assignees
@assignees ||= Array(raw['assignees']).map do |user|
Github::Representation::User.new(user, options)
end
end
end
end
end
module Github
module Representation
class Label < Representation::Base
def color
"##{raw['color']}"
end
def title
raw['name']
end
end
end
end
module Github
module Representation
class Milestone < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['description']
end
def due_date
raw['due_on']
end
def state
raw['state'] == 'closed' ? 'closed' : 'active'
end
end
end
end
module Github
module Representation
class PullRequest < Representation::Issuable
delegate :sha, to: :source_branch, prefix: true
delegate :sha, to: :target_branch, prefix: true
def source_project
project
end
def source_branch_name
# Mimic the "user:branch" displayed in the MR widget,
# i.e. "Request to merge rymai:add-external-mounts into master"
cross_project? ? "#{source_branch.user}:#{source_branch.ref}" : source_branch.ref
end
def target_project
project
end
def target_branch_name
target_branch.ref
end
def state
return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present?
return 'closed' if raw['state'] == 'closed'
'opened'
end
def opened?
state == 'opened'
end
def valid?
source_branch.valid? && target_branch.valid?
end
def assigned?
raw['assignee'].present?
end
def assignee
return unless assigned?
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
end
private
def project
@project ||= options.fetch(:project)
end
def source_branch
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end
def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end
def cross_project?
return true unless source_branch.repo?
source_branch.repo.id != target_branch.repo.id
end
end
end
end
module Github
module Representation
class Release < Representation::Base
def description
raw['body']
end
def tag
raw['tag_name']
end
def valid?
!raw['draft']
end
end
end
end
module Github
module Representation
class Repo < Representation::Base
end
end
end
module Github
module Representation
class User < Representation::Base
def email
return @email if defined?(@email)
@email = Github::User.new(username, options).get.fetch('email', nil)
end
def username
raw['login']
end
end
end
end
module Github
class Response
attr_reader :raw, :headers, :status
def initialize(response)
@raw = response
@headers = response.headers
@status = response.status
end
def body
Oj.load(raw.body, class_cache: false, mode: :compat)
end
def rels
links = headers['Link'].to_s.split(', ').map do |link|
href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures
[name.to_sym, href]
end
Hash[*links.flatten]
end
end
end
module Github
class User
attr_reader :username, :options
def initialize(username, options)
@username = username
@options = options
end
def get
client.get(user_url).body
end
private
def client
@client ||= Github::Client.new(options)
end
def user_url
"/users/#{username}"
end
end
end
...@@ -8,7 +8,7 @@ module Gitlab ...@@ -8,7 +8,7 @@ module Gitlab
ImportSource = Struct.new(:name, :title, :importer) ImportSource = Struct.new(:name, :title, :importer)
ImportTable = [ ImportTable = [
ImportSource.new('github', 'GitHub', Github::Import), ImportSource.new('github', 'GitHub', Gitlab::GithubImport::ParallelImporter),
ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer), ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer),
ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer), ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer),
ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer), ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer),
......
...@@ -7,7 +7,7 @@ class GithubImport ...@@ -7,7 +7,7 @@ class GithubImport
end end
def initialize(token, gitlab_username, project_path, extras) def initialize(token, gitlab_username, project_path, extras)
@options = { token: token, verbose: true } @options = { token: token }
@project_path = project_path @project_path = project_path
@current_user = User.find_by_username(gitlab_username) @current_user = User.find_by_username(gitlab_username)
@github_repo = extras.empty? ? nil : extras.first @github_repo = extras.empty? ? nil : extras.first
...@@ -42,7 +42,9 @@ class GithubImport ...@@ -42,7 +42,9 @@ class GithubImport
import_success = false import_success = false
timings = Benchmark.measure do timings = Benchmark.measure do
import_success = Github::Import.new(@project, @options).execute import_success = Gitlab::GithubImport::SequentialImporter
.new(@project, token: @options[:token])
.execute
end end
if import_success if import_success
......
require 'spec_helper'
describe Github::Client do
let(:connection) { spy }
let(:rate_limit) { double(get: [false, 1]) }
let(:client) { described_class.new({}) }
let(:results) { double }
let(:response) { double }
before do
allow(Faraday).to receive(:new).and_return(connection)
allow(Github::RateLimit).to receive(:new).with(connection).and_return(rate_limit)
end
describe '#get' do
before do
allow(Github::Response).to receive(:new).with(results).and_return(response)
end
it 'uses a default per_page param' do
expect(connection).to receive(:get).with('/foo', per_page: 100).and_return(results)
expect(client.get('/foo')).to eq(response)
end
context 'with per_page given' do
it 'overwrites the default per_page' do
expect(connection).to receive(:get).with('/foo', per_page: 30).and_return(results)
expect(client.get('/foo', per_page: 30)).to eq(response)
end
end
end
end
require 'spec_helper'
describe Github::Import::LegacyDiffNote do
describe '#type' do
it 'returns the original note type' do
expect(described_class.new.type).to eq('LegacyDiffNote')
end
end
end
require 'spec_helper'
describe Github::Import::Note do
describe '#type' do
it 'returns the original note type' do
expect(described_class.new.type).to eq('Note')
end
end
end
...@@ -56,7 +56,7 @@ describe Gitlab::ImportSources do ...@@ -56,7 +56,7 @@ describe Gitlab::ImportSources do
describe '.importer' do describe '.importer' do
import_sources = { import_sources = {
'github' => Github::Import, 'github' => Gitlab::GithubImport::ParallelImporter,
'bitbucket' => Gitlab::BitbucketImport::Importer, 'bitbucket' => Gitlab::BitbucketImport::Importer,
'gitlab' => Gitlab::GitlabImport::Importer, 'gitlab' => Gitlab::GitlabImport::Importer,
'google_code' => Gitlab::GoogleCodeImport::Importer, 'google_code' => Gitlab::GoogleCodeImport::Importer,
......
...@@ -72,21 +72,24 @@ describe Projects::ImportService do ...@@ -72,21 +72,24 @@ describe Projects::ImportService do
end end
context 'with a Github repository' do context 'with a Github repository' do
it 'succeeds if repository import is successfully' do it 'succeeds if repository import was scheduled' do
expect_any_instance_of(Github::Import).to receive(:execute).and_return(true) expect_any_instance_of(Gitlab::GithubImport::ParallelImporter)
.to receive(:execute)
.and_return(true)
result = subject.execute result = subject.execute
expect(result[:status]).to eq :success expect(result[:status]).to eq :success
end end
it 'fails if repository import fails' do it 'fails if repository import was not scheduled' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Gitlab::GithubImport::ParallelImporter)
.to receive(:execute)
.and_return(false)
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported."
end end
end end
...@@ -127,47 +130,22 @@ describe Projects::ImportService do ...@@ -127,47 +130,22 @@ describe Projects::ImportService do
end end
it 'succeeds if importer succeeds' do it 'succeeds if importer succeeds' do
allow_any_instance_of(Github::Import).to receive(:execute).and_return(true) allow_any_instance_of(Gitlab::GithubImport::ParallelImporter)
.to receive(:execute).and_return(true)
result = subject.execute result = subject.execute
expect(result[:status]).to eq :success expect(result[:status]).to eq :success
end end
it 'flushes various caches' do
allow_any_instance_of(Github::Import).to receive(:execute)
.and_return(true)
expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute
end
it 'fails if importer fails' do it 'fails if importer fails' do
allow_any_instance_of(Github::Import).to receive(:execute).and_return(false) allow_any_instance_of(Gitlab::GithubImport::ParallelImporter)
.to receive(:execute)
result = subject.execute .and_return(false)
expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - The remote data could not be imported."
end
it 'fails if importer raise an error' do
allow_any_instance_of(Github::Import).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Github: failed to connect API"
end
it 'expires content cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false)
expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new)
expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment