Commit d255425b authored by Sean McGivern's avatar Sean McGivern

Merge branch 'fix/github-importer' into 'master'

Refactoring rake task to import GitHub repositories

See merge request !10695
parents f00bb1c2 d082b789
...@@ -17,6 +17,8 @@ gem 'pg', '~> 0.18.2', group: :postgres ...@@ -17,6 +17,8 @@ gem 'pg', '~> 0.18.2', group: :postgres
gem 'rugged', '~> 0.25.1.1' gem 'rugged', '~> 0.25.1.1'
gem 'faraday', '~> 0.11.0'
# Authentication libraries # Authentication libraries
gem 'devise', '~> 4.2' gem 'devise', '~> 4.2'
gem 'doorkeeper', '~> 4.2.0' gem 'doorkeeper', '~> 4.2.0'
...@@ -186,7 +188,7 @@ gem 'gemnasium-gitlab-service', '~> 0.2' ...@@ -186,7 +188,7 @@ gem 'gemnasium-gitlab-service', '~> 0.2'
gem 'slack-notifier', '~> 1.5.1' gem 'slack-notifier', '~> 1.5.1'
# Asana integration # Asana integration
gem 'asana', '~> 0.4.0' gem 'asana', '~> 0.6.0'
# FogBugz integration # FogBugz integration
gem 'ruby-fogbugz', '~> 0.2.1' gem 'ruby-fogbugz', '~> 0.2.1'
...@@ -345,7 +347,7 @@ gem 'html2text' ...@@ -345,7 +347,7 @@ gem 'html2text'
gem 'ruby-prof', '~> 0.16.2' gem 'ruby-prof', '~> 0.16.2'
# OAuth # OAuth
gem 'oauth2', '~> 1.2.0' gem 'oauth2', '~> 1.3.0'
# Soft deletion # Soft deletion
gem 'paranoia', '~> 2.2' gem 'paranoia', '~> 2.2'
......
...@@ -47,7 +47,7 @@ GEM ...@@ -47,7 +47,7 @@ GEM
akismet (2.0.0) akismet (2.0.0)
allocations (1.0.5) allocations (1.0.5)
arel (6.0.4) arel (6.0.4)
asana (0.4.0) asana (0.6.0)
faraday (~> 0.9) faraday (~> 0.9)
faraday_middleware (~> 0.9) faraday_middleware (~> 0.9)
faraday_middleware-multi_json (~> 0.0) faraday_middleware-multi_json (~> 0.0)
...@@ -193,10 +193,10 @@ GEM ...@@ -193,10 +193,10 @@ GEM
factory_girl_rails (4.7.0) factory_girl_rails (4.7.0)
factory_girl (~> 4.7.0) factory_girl (~> 4.7.0)
railties (>= 3.0.0) railties (>= 3.0.0)
faraday (0.9.2) faraday (0.11.0)
multipart-post (>= 1.2, < 3) multipart-post (>= 1.2, < 3)
faraday_middleware (0.10.0) faraday_middleware (0.11.0.1)
faraday (>= 0.7.4, < 0.10) faraday (>= 0.7.4, < 1.0)
faraday_middleware-multi_json (0.0.6) faraday_middleware-multi_json (0.0.6)
faraday_middleware faraday_middleware
multi_json multi_json
...@@ -454,15 +454,15 @@ GEM ...@@ -454,15 +454,15 @@ GEM
mini_portile2 (~> 2.1.0) mini_portile2 (~> 2.1.0)
numerizer (0.1.1) numerizer (0.1.1)
oauth (0.5.1) oauth (0.5.1)
oauth2 (1.2.0) oauth2 (1.3.1)
faraday (>= 0.8, < 0.10) faraday (>= 0.8, < 0.12)
jwt (~> 1.0) jwt (~> 1.0)
multi_json (~> 1.3) multi_json (~> 1.3)
multi_xml (~> 0.5) multi_xml (~> 0.5)
rack (>= 1.2, < 3) rack (>= 1.2, < 3)
octokit (4.6.2) octokit (4.6.2)
sawyer (~> 0.8.0, >= 0.5.3) sawyer (~> 0.8.0, >= 0.5.3)
oj (2.17.4) oj (2.17.5)
omniauth (1.4.2) omniauth (1.4.2)
hashie (>= 1.2, < 4) hashie (>= 1.2, < 4)
rack (>= 1.0, < 3) rack (>= 1.0, < 3)
...@@ -853,7 +853,7 @@ DEPENDENCIES ...@@ -853,7 +853,7 @@ DEPENDENCIES
after_commit_queue (~> 1.3.0) after_commit_queue (~> 1.3.0)
akismet (~> 2.0) akismet (~> 2.0)
allocations (~> 1.0) allocations (~> 1.0)
asana (~> 0.4.0) asana (~> 0.6.0)
asciidoctor (~> 1.5.2) asciidoctor (~> 1.5.2)
asciidoctor-plantuml (= 0.0.7) asciidoctor-plantuml (= 0.0.7)
attr_encrypted (~> 3.0.0) attr_encrypted (~> 3.0.0)
...@@ -891,6 +891,7 @@ DEPENDENCIES ...@@ -891,6 +891,7 @@ DEPENDENCIES
email_reply_trimmer (~> 0.1) email_reply_trimmer (~> 0.1)
email_spec (~> 1.6.0) email_spec (~> 1.6.0)
factory_girl_rails (~> 4.7.0) factory_girl_rails (~> 4.7.0)
faraday (~> 0.11.0)
ffaker (~> 2.4) ffaker (~> 2.4)
flay (~> 2.8.0) flay (~> 2.8.0)
fog-aws (~> 0.9) fog-aws (~> 0.9)
...@@ -943,7 +944,7 @@ DEPENDENCIES ...@@ -943,7 +944,7 @@ DEPENDENCIES
mysql2 (~> 0.3.16) mysql2 (~> 0.3.16)
net-ssh (~> 3.0.1) net-ssh (~> 3.0.1)
nokogiri (~> 1.6.7, >= 1.6.7.2) nokogiri (~> 1.6.7, >= 1.6.7.2)
oauth2 (~> 1.2.0) oauth2 (~> 1.3.0)
octokit (~> 4.6.2) octokit (~> 4.6.2)
oj (~> 2.17.4) oj (~> 2.17.4)
omniauth (~> 1.4.2) omniauth (~> 1.4.2)
......
module Github
class Client
attr_reader :connection, :rate_limit
def initialize(options)
@connection = Faraday.new(url: options.fetch(:url)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, 60)
faraday.options.timeout = options.fetch(:timeout, 60)
faraday.authorization 'token', options.fetch(:token)
faraday.adapter :net_http
end
@rate_limit = RateLimit.new(connection)
end
def get(url, query = {})
exceed, reset_in = rate_limit.get
sleep reset_in if exceed
Github::Response.new(connection.get(url, query))
end
end
end
module Github
class Collection
attr_reader :options
def initialize(options)
@options = options
end
def fetch(url, query = {})
return [] if url.blank?
Enumerator.new do |yielder|
loop do
response = client.get(url, query)
response.body.each { |item| yielder << item }
raise StopIteration unless response.rels.key?(:next)
url = response.rels[:next]
end
end.lazy
end
private
def client
@client ||= Github::Client.new(options)
end
end
end
module Github
RepositoryFetchError = Class.new(StandardError)
end
This diff is collapsed.
module Github
class RateLimit
SAFE_REMAINING_REQUESTS = 100
SAFE_RESET_TIME = 500
RATE_LIMIT_URL = '/rate_limit'.freeze
attr_reader :connection
def initialize(connection)
@connection = connection
end
def get
response = connection.get(RATE_LIMIT_URL)
# GitHub Rate Limit API returns 404 when the rate limit is disabled
return false unless response.status != 404
body = Oj.load(response.body, class_cache: false, mode: :compat)
remaining = body.dig('rate', 'remaining').to_i
reset_in = body.dig('rate', 'reset').to_i
exceed = remaining <= SAFE_REMAINING_REQUESTS
[exceed, reset_in]
end
end
end
module Github
class Repositories
attr_reader :options
def initialize(options)
@options = options
end
def fetch
Collection.new(options).fetch(repos_url)
end
private
def repos_url
'/user/repos'
end
end
end
module Github
module Representation
class Base
def initialize(raw, options = {})
@raw = raw
@options = options
end
def id
raw['id']
end
def url
raw['url']
end
def created_at
raw['created_at']
end
def updated_at
raw['updated_at']
end
private
attr_reader :raw, :options
end
end
end
module Github
module Representation
class Branch < Representation::Base
attr_reader :repository
def user
raw.dig('user', 'login') || 'unknown'
end
def repo
return @repo if defined?(@repo)
@repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present?
end
def ref
raw['ref']
end
def sha
raw['sha']
end
def short_sha
Commit.truncate_sha(sha)
end
def exists?
branch_exists? && commit_exists?
end
def valid?
sha.present? && ref.present?
end
private
def branch_exists?
repository.branch_exists?(ref)
end
def commit_exists?
repository.branch_names_contains(sha).include?(ref)
end
def repository
@repository ||= options.fetch(:repository)
end
end
end
end
module Github
module Representation
class Comment < Representation::Base
def note
raw['body'] || ''
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def commit_id
raw['commit_id']
end
def line_code
return unless on_diff?
parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines)
generate_line_code(parsed_lines.to_a.last)
end
private
def generate_line_code(line)
Gitlab::Diff::LineCode.generate(file_path, line.new_pos, line.old_pos)
end
def on_diff?
diff_hunk.present?
end
def diff_hunk
raw['diff_hunk']
end
def file_path
raw['path']
end
end
end
end
module Github
module Representation
class Issuable < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['body'] || ''
end
def milestone
return unless raw['milestone'].present?
@milestone ||= Github::Representation::Milestone.new(raw['milestone'])
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def assignee
return unless assigned?
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
end
def assigned?
raw['assignee'].present?
end
end
end
end
module Github
module Representation
class Issue < Representation::Issuable
def labels
raw['labels']
end
def state
raw['state'] == 'closed' ? 'closed' : 'opened'
end
def has_comments?
raw['comments'] > 0
end
def has_labels?
labels.count > 0
end
def pull_request?
raw['pull_request'].present?
end
end
end
end
module Github
module Representation
class Label < Representation::Base
def color
"##{raw['color']}"
end
def title
raw['name']
end
end
end
end
module Github
module Representation
class Milestone < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['description']
end
def due_date
raw['due_on']
end
def state
raw['state'] == 'closed' ? 'closed' : 'active'
end
end
end
end
module Github
module Representation
class PullRequest < Representation::Issuable
attr_reader :project
delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true
delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true
def source_project
project
end
def source_branch_exists?
!cross_project? && source_branch.exists?
end
def source_branch_name
@source_branch_name ||=
if cross_project? || !source_branch_exists?
source_branch_name_prefixed
else
source_branch_ref
end
end
def target_project
project
end
def target_branch_name
@target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed
end
def state
return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present?
return 'closed' if raw['state'] == 'closed'
'opened'
end
def opened?
state == 'opened'
end
def valid?
source_branch.valid? && target_branch.valid?
end
private
def project
@project ||= options.fetch(:project)
end
def source_branch
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end
def source_branch_name_prefixed
"gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}"
end
def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end
def target_branch_name_prefixed
"gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}"
end
def cross_project?
return true if source_branch_repo.nil?
source_branch_repo.id != target_branch_repo.id
end
end
end
end
module Github
module Representation
class Release < Representation::Base
def description
raw['body']
end
def tag
raw['tag_name']
end
def valid?
!raw['draft']
end
end
end
end
module Github
module Representation
class Repo < Representation::Base
end
end
end
module Github
module Representation
class User < Representation::Base
def email
return @email if defined?(@email)
@email = Github::User.new(username, options).get.fetch('email', nil)
end
def username
raw['login']
end
end
end
end
module Github
class Response
attr_reader :raw, :headers, :status
def initialize(response)
@raw = response
@headers = response.headers
@status = response.status
end
def body
Oj.load(raw.body, class_cache: false, mode: :compat)
end
def rels
links = headers['Link'].to_s.split(', ').map do |link|
href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures
[name.to_sym, href]
end
Hash[*links.flatten]
end
end
end
module Github
class User
attr_reader :username, :options
def initialize(username, options)
@username = username
@options = options
end
def get
client.get(user_url).body
end
private
def client
@client ||= Github::Client.new(options)
end
def user_url
"/users/#{username}"
end
end
end
require 'benchmark' require 'benchmark'
require 'rainbow/ext/string' require 'rainbow/ext/string'
require_relative '../gitlab/shell_adapter'
require_relative '../gitlab/github_import/importer'
class NewImporter < ::Gitlab::GithubImport::Importer
def execute
# Same as ::Gitlab::GithubImport::Importer#execute, but showing some progress.
puts 'Importing repository...'.color(:aqua)
import_repository unless project.repository_exists?
puts 'Importing labels...'.color(:aqua)
import_labels
puts 'Importing milestones...'.color(:aqua)
import_milestones
puts 'Importing pull requests...'.color(:aqua)
import_pull_requests
puts 'Importing issues...'.color(:aqua)
import_issues
puts 'Importing issue comments...'.color(:aqua)
import_comments(:issues)
puts 'Importing pull request comments...'.color(:aqua)
import_comments(:pull_requests)
puts 'Importing wiki...'.color(:aqua)
import_wiki
# Gitea doesn't have a Release API yet
# See https://github.com/go-gitea/gitea/issues/330
unless project.gitea_import?
import_releases
end
handle_errors
project.repository.after_import
project.import_finish
true
end
def import_repository
begin
raise 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
project.create_repository
project.repository.add_remote(project.import_type, project.import_url)
project.repository.set_remote_as_mirror(project.import_type)
project.repository.fetch_remote(project.import_type, forced: true)
rescue => e
# Expire cache to prevent scenarios such as:
# 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
# 2. Retried import, repo is broken or not imported but +exists?+ still returns true
project.repository.expire_content_cache if project.repository_exists?
raise "Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}"
end
end
end
class GithubImport class GithubImport
def self.run!(*args) def self.run!(*args)
...@@ -69,14 +7,14 @@ class GithubImport ...@@ -69,14 +7,14 @@ class GithubImport
end end
def initialize(token, gitlab_username, project_path, extras) def initialize(token, gitlab_username, project_path, extras)
@token = token @options = { url: 'https://api.github.com', token: token, verbose: true }
@project_path = project_path @project_path = project_path
@current_user = User.find_by_username(gitlab_username) @current_user = User.find_by_username(gitlab_username)
@github_repo = extras.empty? ? nil : extras.first @github_repo = extras.empty? ? nil : extras.first
end end
def run! def run!
@repo = GithubRepos.new(@token, @current_user, @github_repo).choose_one! @repo = GithubRepos.new(@options, @current_user, @github_repo).choose_one!
raise 'No repo found!' unless @repo raise 'No repo found!' unless @repo
...@@ -90,25 +28,24 @@ class GithubImport ...@@ -90,25 +28,24 @@ class GithubImport
private private
def show_warning! def show_warning!
puts "This will import GH #{@repo.full_name.bright} into GL #{@project_path.bright} as #{@current_user.name}" puts "This will import GitHub #{@repo['full_name'].bright} into GitLab #{@project_path.bright} as #{@current_user.name}"
puts "Permission checks are ignored. Press any key to continue.".color(:red) puts "Permission checks are ignored. Press any key to continue.".color(:red)
STDIN.getch STDIN.getch
puts 'Starting the import...'.color(:green) puts 'Starting the import (this could take a while)'.color(:green)
end end
def import! def import!
import_url = @project.import_url.gsub(/\:\/\/(.*@)?/, "://#{@token}@")
@project.update(import_url: import_url)
@project.import_start @project.import_start
timings = Benchmark.measure do timings = Benchmark.measure do
NewImporter.new(@project).execute Github::Import.new(@project, @options).execute
end end
puts "Import finished. Timings: #{timings}".color(:green) puts "Import finished. Timings: #{timings}".color(:green)
@project.import_finish
end end
def new_project def new_project
...@@ -116,17 +53,17 @@ class GithubImport ...@@ -116,17 +53,17 @@ class GithubImport
namespace_path, _sep, name = @project_path.rpartition('/') namespace_path, _sep, name = @project_path.rpartition('/')
namespace = find_or_create_namespace(namespace_path) namespace = find_or_create_namespace(namespace_path)
Project.create!( Projects::CreateService.new(
import_url: "https://#{@token}@github.com/#{@repo.full_name}.git", @current_user,
name: name, name: name,
path: name, path: name,
description: @repo.description, description: @repo['description'],
namespace: namespace, namespace_id: namespace.id,
visibility_level: visibility_level, visibility_level: visibility_level,
import_type: 'github', import_type: 'github',
import_source: @repo.full_name, import_source: @repo['full_name'],
creator: @current_user skip_wiki: @repo['has_wiki']
) ).execute
end end
end end
...@@ -134,7 +71,6 @@ class GithubImport ...@@ -134,7 +71,6 @@ class GithubImport
return @current_user.namespace if names == @current_user.namespace_path return @current_user.namespace if names == @current_user.namespace_path
return @current_user.namespace unless @current_user.can_create_group? return @current_user.namespace unless @current_user.can_create_group?
names = params[:target_namespace].presence || names
full_path_namespace = Namespace.find_by_full_path(names) full_path_namespace = Namespace.find_by_full_path(names)
return full_path_namespace if full_path_namespace return full_path_namespace if full_path_namespace
...@@ -159,13 +95,13 @@ class GithubImport ...@@ -159,13 +95,13 @@ class GithubImport
end end
def visibility_level def visibility_level
@repo.private ? Gitlab::VisibilityLevel::PRIVATE : current_application_settings.default_project_visibility @repo['private'] ? Gitlab::VisibilityLevel::PRIVATE : current_application_settings.default_project_visibility
end end
end end
class GithubRepos class GithubRepos
def initialize(token, current_user, github_repo) def initialize(options, current_user, github_repo)
@token = token @options = options
@current_user = current_user @current_user = current_user
@github_repo = github_repo @github_repo = github_repo
end end
...@@ -174,17 +110,17 @@ class GithubRepos ...@@ -174,17 +110,17 @@ class GithubRepos
return found_github_repo if @github_repo return found_github_repo if @github_repo
repos.each do |repo| repos.each do |repo|
print "ID: #{repo[:id].to_s.bright} ".color(:green) print "ID: #{repo['id'].to_s.bright}".color(:green)
puts "- Name: #{repo[:full_name]}".color(:green) print "\tName: #{repo['full_name']}\n".color(:green)
end end
print 'ID? '.bright print 'ID? '.bright
repos.find { |repo| repo[:id] == repo_id } repos.find { |repo| repo['id'] == repo_id }
end end
def found_github_repo def found_github_repo
repos.find { |repo| repo[:full_name] == @github_repo } repos.find { |repo| repo['full_name'] == @github_repo }
end end
def repo_id def repo_id
...@@ -192,11 +128,7 @@ class GithubRepos ...@@ -192,11 +128,7 @@ class GithubRepos
end end
def repos def repos
@repos ||= client.repos Github::Repositories.new(@options).fetch
end
def client
@client ||= Gitlab::GithubImport::Client.new(@token, {})
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment