Commit d255425b authored by Sean McGivern's avatar Sean McGivern

Merge branch 'fix/github-importer' into 'master'

Refactoring rake task to import GitHub repositories

See merge request !10695
parents f00bb1c2 d082b789
......@@ -17,6 +17,8 @@ gem 'pg', '~> 0.18.2', group: :postgres
gem 'rugged', '~> 0.25.1.1'
gem 'faraday', '~> 0.11.0'
# Authentication libraries
gem 'devise', '~> 4.2'
gem 'doorkeeper', '~> 4.2.0'
......@@ -186,7 +188,7 @@ gem 'gemnasium-gitlab-service', '~> 0.2'
gem 'slack-notifier', '~> 1.5.1'
# Asana integration
gem 'asana', '~> 0.4.0'
gem 'asana', '~> 0.6.0'
# FogBugz integration
gem 'ruby-fogbugz', '~> 0.2.1'
......@@ -345,7 +347,7 @@ gem 'html2text'
gem 'ruby-prof', '~> 0.16.2'
# OAuth
gem 'oauth2', '~> 1.2.0'
gem 'oauth2', '~> 1.3.0'
# Soft deletion
gem 'paranoia', '~> 2.2'
......
......@@ -47,7 +47,7 @@ GEM
akismet (2.0.0)
allocations (1.0.5)
arel (6.0.4)
asana (0.4.0)
asana (0.6.0)
faraday (~> 0.9)
faraday_middleware (~> 0.9)
faraday_middleware-multi_json (~> 0.0)
......@@ -193,10 +193,10 @@ GEM
factory_girl_rails (4.7.0)
factory_girl (~> 4.7.0)
railties (>= 3.0.0)
faraday (0.9.2)
faraday (0.11.0)
multipart-post (>= 1.2, < 3)
faraday_middleware (0.10.0)
faraday (>= 0.7.4, < 0.10)
faraday_middleware (0.11.0.1)
faraday (>= 0.7.4, < 1.0)
faraday_middleware-multi_json (0.0.6)
faraday_middleware
multi_json
......@@ -454,15 +454,15 @@ GEM
mini_portile2 (~> 2.1.0)
numerizer (0.1.1)
oauth (0.5.1)
oauth2 (1.2.0)
faraday (>= 0.8, < 0.10)
oauth2 (1.3.1)
faraday (>= 0.8, < 0.12)
jwt (~> 1.0)
multi_json (~> 1.3)
multi_xml (~> 0.5)
rack (>= 1.2, < 3)
octokit (4.6.2)
sawyer (~> 0.8.0, >= 0.5.3)
oj (2.17.4)
oj (2.17.5)
omniauth (1.4.2)
hashie (>= 1.2, < 4)
rack (>= 1.0, < 3)
......@@ -853,7 +853,7 @@ DEPENDENCIES
after_commit_queue (~> 1.3.0)
akismet (~> 2.0)
allocations (~> 1.0)
asana (~> 0.4.0)
asana (~> 0.6.0)
asciidoctor (~> 1.5.2)
asciidoctor-plantuml (= 0.0.7)
attr_encrypted (~> 3.0.0)
......@@ -891,6 +891,7 @@ DEPENDENCIES
email_reply_trimmer (~> 0.1)
email_spec (~> 1.6.0)
factory_girl_rails (~> 4.7.0)
faraday (~> 0.11.0)
ffaker (~> 2.4)
flay (~> 2.8.0)
fog-aws (~> 0.9)
......@@ -943,7 +944,7 @@ DEPENDENCIES
mysql2 (~> 0.3.16)
net-ssh (~> 3.0.1)
nokogiri (~> 1.6.7, >= 1.6.7.2)
oauth2 (~> 1.2.0)
oauth2 (~> 1.3.0)
octokit (~> 4.6.2)
oj (~> 2.17.4)
omniauth (~> 1.4.2)
......
module Github
class Client
attr_reader :connection, :rate_limit
def initialize(options)
@connection = Faraday.new(url: options.fetch(:url)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, 60)
faraday.options.timeout = options.fetch(:timeout, 60)
faraday.authorization 'token', options.fetch(:token)
faraday.adapter :net_http
end
@rate_limit = RateLimit.new(connection)
end
def get(url, query = {})
exceed, reset_in = rate_limit.get
sleep reset_in if exceed
Github::Response.new(connection.get(url, query))
end
end
end
module Github
class Collection
attr_reader :options
def initialize(options)
@options = options
end
def fetch(url, query = {})
return [] if url.blank?
Enumerator.new do |yielder|
loop do
response = client.get(url, query)
response.body.each { |item| yielder << item }
raise StopIteration unless response.rels.key?(:next)
url = response.rels[:next]
end
end.lazy
end
private
def client
@client ||= Github::Client.new(options)
end
end
end
module Github
RepositoryFetchError = Class.new(StandardError)
end
This diff is collapsed.
module Github
class RateLimit
SAFE_REMAINING_REQUESTS = 100
SAFE_RESET_TIME = 500
RATE_LIMIT_URL = '/rate_limit'.freeze
attr_reader :connection
def initialize(connection)
@connection = connection
end
def get
response = connection.get(RATE_LIMIT_URL)
# GitHub Rate Limit API returns 404 when the rate limit is disabled
return false unless response.status != 404
body = Oj.load(response.body, class_cache: false, mode: :compat)
remaining = body.dig('rate', 'remaining').to_i
reset_in = body.dig('rate', 'reset').to_i
exceed = remaining <= SAFE_REMAINING_REQUESTS
[exceed, reset_in]
end
end
end
module Github
class Repositories
attr_reader :options
def initialize(options)
@options = options
end
def fetch
Collection.new(options).fetch(repos_url)
end
private
def repos_url
'/user/repos'
end
end
end
module Github
module Representation
class Base
def initialize(raw, options = {})
@raw = raw
@options = options
end
def id
raw['id']
end
def url
raw['url']
end
def created_at
raw['created_at']
end
def updated_at
raw['updated_at']
end
private
attr_reader :raw, :options
end
end
end
module Github
module Representation
class Branch < Representation::Base
attr_reader :repository
def user
raw.dig('user', 'login') || 'unknown'
end
def repo
return @repo if defined?(@repo)
@repo = Github::Representation::Repo.new(raw['repo']) if raw['repo'].present?
end
def ref
raw['ref']
end
def sha
raw['sha']
end
def short_sha
Commit.truncate_sha(sha)
end
def exists?
branch_exists? && commit_exists?
end
def valid?
sha.present? && ref.present?
end
private
def branch_exists?
repository.branch_exists?(ref)
end
def commit_exists?
repository.branch_names_contains(sha).include?(ref)
end
def repository
@repository ||= options.fetch(:repository)
end
end
end
end
module Github
module Representation
class Comment < Representation::Base
def note
raw['body'] || ''
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def commit_id
raw['commit_id']
end
def line_code
return unless on_diff?
parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines)
generate_line_code(parsed_lines.to_a.last)
end
private
def generate_line_code(line)
Gitlab::Diff::LineCode.generate(file_path, line.new_pos, line.old_pos)
end
def on_diff?
diff_hunk.present?
end
def diff_hunk
raw['diff_hunk']
end
def file_path
raw['path']
end
end
end
end
module Github
module Representation
class Issuable < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['body'] || ''
end
def milestone
return unless raw['milestone'].present?
@milestone ||= Github::Representation::Milestone.new(raw['milestone'])
end
def author
@author ||= Github::Representation::User.new(raw['user'], options)
end
def assignee
return unless assigned?
@assignee ||= Github::Representation::User.new(raw['assignee'], options)
end
def assigned?
raw['assignee'].present?
end
end
end
end
module Github
module Representation
class Issue < Representation::Issuable
def labels
raw['labels']
end
def state
raw['state'] == 'closed' ? 'closed' : 'opened'
end
def has_comments?
raw['comments'] > 0
end
def has_labels?
labels.count > 0
end
def pull_request?
raw['pull_request'].present?
end
end
end
end
module Github
module Representation
class Label < Representation::Base
def color
"##{raw['color']}"
end
def title
raw['name']
end
end
end
end
module Github
module Representation
class Milestone < Representation::Base
def iid
raw['number']
end
def title
raw['title']
end
def description
raw['description']
end
def due_date
raw['due_on']
end
def state
raw['state'] == 'closed' ? 'closed' : 'active'
end
end
end
end
module Github
module Representation
class PullRequest < Representation::Issuable
attr_reader :project
delegate :user, :repo, :ref, :sha, to: :source_branch, prefix: true
delegate :user, :exists?, :repo, :ref, :sha, :short_sha, to: :target_branch, prefix: true
def source_project
project
end
def source_branch_exists?
!cross_project? && source_branch.exists?
end
def source_branch_name
@source_branch_name ||=
if cross_project? || !source_branch_exists?
source_branch_name_prefixed
else
source_branch_ref
end
end
def target_project
project
end
def target_branch_name
@target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed
end
def state
return 'merged' if raw['state'] == 'closed' && raw['merged_at'].present?
return 'closed' if raw['state'] == 'closed'
'opened'
end
def opened?
state == 'opened'
end
def valid?
source_branch.valid? && target_branch.valid?
end
private
def project
@project ||= options.fetch(:project)
end
def source_branch
@source_branch ||= Representation::Branch.new(raw['head'], repository: project.repository)
end
def source_branch_name_prefixed
"gh-#{target_branch_short_sha}/#{iid}/#{source_branch_user}/#{source_branch_ref}"
end
def target_branch
@target_branch ||= Representation::Branch.new(raw['base'], repository: project.repository)
end
def target_branch_name_prefixed
"gl-#{target_branch_short_sha}/#{iid}/#{target_branch_user}/#{target_branch_ref}"
end
def cross_project?
return true if source_branch_repo.nil?
source_branch_repo.id != target_branch_repo.id
end
end
end
end
module Github
module Representation
class Release < Representation::Base
def description
raw['body']
end
def tag
raw['tag_name']
end
def valid?
!raw['draft']
end
end
end
end
module Github
module Representation
class Repo < Representation::Base
end
end
end
module Github
module Representation
class User < Representation::Base
def email
return @email if defined?(@email)
@email = Github::User.new(username, options).get.fetch('email', nil)
end
def username
raw['login']
end
end
end
end
module Github
class Response
attr_reader :raw, :headers, :status
def initialize(response)
@raw = response
@headers = response.headers
@status = response.status
end
def body
Oj.load(raw.body, class_cache: false, mode: :compat)
end
def rels
links = headers['Link'].to_s.split(', ').map do |link|
href, name = link.match(/<(.*?)>; rel="(\w+)"/).captures
[name.to_sym, href]
end
Hash[*links.flatten]
end
end
end
module Github
class User
attr_reader :username, :options
def initialize(username, options)
@username = username
@options = options
end
def get
client.get(user_url).body
end
private
def client
@client ||= Github::Client.new(options)
end
def user_url
"/users/#{username}"
end
end
end
require 'benchmark'
require 'rainbow/ext/string'
require_relative '../gitlab/shell_adapter'
require_relative '../gitlab/github_import/importer'
class NewImporter < ::Gitlab::GithubImport::Importer
def execute
# Same as ::Gitlab::GithubImport::Importer#execute, but showing some progress.
puts 'Importing repository...'.color(:aqua)
import_repository unless project.repository_exists?
puts 'Importing labels...'.color(:aqua)
import_labels
puts 'Importing milestones...'.color(:aqua)
import_milestones
puts 'Importing pull requests...'.color(:aqua)
import_pull_requests
puts 'Importing issues...'.color(:aqua)
import_issues
puts 'Importing issue comments...'.color(:aqua)
import_comments(:issues)
puts 'Importing pull request comments...'.color(:aqua)
import_comments(:pull_requests)
puts 'Importing wiki...'.color(:aqua)
import_wiki
# Gitea doesn't have a Release API yet
# See https://github.com/go-gitea/gitea/issues/330
unless project.gitea_import?
import_releases
end
handle_errors
project.repository.after_import
project.import_finish
true
end
def import_repository
begin
raise 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
project.create_repository
project.repository.add_remote(project.import_type, project.import_url)
project.repository.set_remote_as_mirror(project.import_type)
project.repository.fetch_remote(project.import_type, forced: true)
rescue => e
# Expire cache to prevent scenarios such as:
# 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
# 2. Retried import, repo is broken or not imported but +exists?+ still returns true
project.repository.expire_content_cache if project.repository_exists?
raise "Error importing repository #{project.import_url} into #{project.path_with_namespace} - #{e.message}"
end
end
end
class GithubImport
def self.run!(*args)
......@@ -69,14 +7,14 @@ class GithubImport
end
def initialize(token, gitlab_username, project_path, extras)
@token = token
@options = { url: 'https://api.github.com', token: token, verbose: true }
@project_path = project_path
@current_user = User.find_by_username(gitlab_username)
@github_repo = extras.empty? ? nil : extras.first
end
def run!
@repo = GithubRepos.new(@token, @current_user, @github_repo).choose_one!
@repo = GithubRepos.new(@options, @current_user, @github_repo).choose_one!
raise 'No repo found!' unless @repo
......@@ -90,25 +28,24 @@ class GithubImport
private
def show_warning!
puts "This will import GH #{@repo.full_name.bright} into GL #{@project_path.bright} as #{@current_user.name}"
puts "This will import GitHub #{@repo['full_name'].bright} into GitLab #{@project_path.bright} as #{@current_user.name}"
puts "Permission checks are ignored. Press any key to continue.".color(:red)
STDIN.getch
puts 'Starting the import...'.color(:green)
puts 'Starting the import (this could take a while)'.color(:green)
end
def import!
import_url = @project.import_url.gsub(/\:\/\/(.*@)?/, "://#{@token}@")
@project.update(import_url: import_url)
@project.import_start
timings = Benchmark.measure do
NewImporter.new(@project).execute
Github::Import.new(@project, @options).execute
end
puts "Import finished. Timings: #{timings}".color(:green)
@project.import_finish
end
def new_project
......@@ -116,17 +53,17 @@ class GithubImport
namespace_path, _sep, name = @project_path.rpartition('/')
namespace = find_or_create_namespace(namespace_path)
Project.create!(
import_url: "https://#{@token}@github.com/#{@repo.full_name}.git",
Projects::CreateService.new(
@current_user,
name: name,
path: name,
description: @repo.description,
namespace: namespace,
description: @repo['description'],
namespace_id: namespace.id,
visibility_level: visibility_level,
import_type: 'github',
import_source: @repo.full_name,
creator: @current_user
)
import_source: @repo['full_name'],
skip_wiki: @repo['has_wiki']
).execute
end
end
......@@ -134,7 +71,6 @@ class GithubImport
return @current_user.namespace if names == @current_user.namespace_path
return @current_user.namespace unless @current_user.can_create_group?
names = params[:target_namespace].presence || names
full_path_namespace = Namespace.find_by_full_path(names)
return full_path_namespace if full_path_namespace
......@@ -159,13 +95,13 @@ class GithubImport
end
def visibility_level
@repo.private ? Gitlab::VisibilityLevel::PRIVATE : current_application_settings.default_project_visibility
@repo['private'] ? Gitlab::VisibilityLevel::PRIVATE : current_application_settings.default_project_visibility
end
end
class GithubRepos
def initialize(token, current_user, github_repo)
@token = token
def initialize(options, current_user, github_repo)
@options = options
@current_user = current_user
@github_repo = github_repo
end
......@@ -174,17 +110,17 @@ class GithubRepos
return found_github_repo if @github_repo
repos.each do |repo|
print "ID: #{repo[:id].to_s.bright} ".color(:green)
puts "- Name: #{repo[:full_name]}".color(:green)
print "ID: #{repo['id'].to_s.bright}".color(:green)
print "\tName: #{repo['full_name']}\n".color(:green)
end
print 'ID? '.bright
repos.find { |repo| repo[:id] == repo_id }
repos.find { |repo| repo['id'] == repo_id }
end
def found_github_repo
repos.find { |repo| repo[:full_name] == @github_repo }
repos.find { |repo| repo['full_name'] == @github_repo }
end
def repo_id
......@@ -192,11 +128,7 @@ class GithubRepos
end
def repos
@repos ||= client.repos
end
def client
@client ||= Gitlab::GithubImport::Client.new(@token, {})
Github::Repositories.new(@options).fetch
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment