Commit 897696d4 authored by Sean McGivern's avatar Sean McGivern Committed by Rémy Coutable

Merge branch 'fix/make-github-import-retryable' into 'master'

Modify GitHub importer to be retryable

Closes #23533

See merge request !7003
Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent 25d77f76
......@@ -29,7 +29,7 @@ module Projects
if unknown_url?
# In this case, we only want to import issues, not a repository.
create_repository
else
elsif !project.repository_exists?
import_repository
end
end
......
......@@ -10,7 +10,9 @@ module Gitlab
end
def create!
self.klass.create!(self.attributes)
project.public_send(project_association).find_or_create_by!(find_condition) do |record|
record.attributes = attributes
end
end
private
......
......@@ -110,7 +110,7 @@ module Gitlab
if block_given?
yield data
# api.last_response could change while we're yielding (e.g. fetching labels for each PR)
# so we cache our own last request
# so we cache our own last response
each_response_page(last_response, &block)
else
each_response_page(last_response) { |page| data.concat(page) }
......
......@@ -24,7 +24,8 @@ module Gitlab
import_milestones
import_issues
import_pull_requests
import_comments
import_comments(:issues)
import_comments(:pull_requests)
import_wiki
import_releases
handle_errors
......@@ -48,7 +49,7 @@ module Gitlab
end
def import_labels
client.labels(repo, per_page: 100) do |labels|
fetch_resources(:labels, repo, per_page: 100) do |labels|
labels.each do |raw|
begin
label = LabelFormatter.new(project, raw).create!
......@@ -61,7 +62,7 @@ module Gitlab
end
def import_milestones
client.milestones(repo, state: :all, per_page: 100) do |milestones|
fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
milestones.each do |raw|
begin
MilestoneFormatter.new(project, raw).create!
......@@ -73,7 +74,7 @@ module Gitlab
end
def import_issues
client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
issues.each do |raw|
gh_issue = IssueFormatter.new(project, raw)
......@@ -90,7 +91,7 @@ module Gitlab
end
def import_pull_requests
client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
pull_requests.each do |raw|
pull_request = PullRequestFormatter.new(project, raw)
next unless pull_request.valid?
......@@ -148,12 +149,23 @@ module Gitlab
end
end
def import_comments
client.issues_comments(repo, per_page: 100) do |comments|
create_comments(comments)
end
def import_comments(issuable_type)
resource_type = "#{issuable_type}_comments".to_sym
# Two notes here:
# 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
# compare it against every comment in the current imported page until we find match, and that's where start importing
# 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
# only comments on diffs, so select last note not based on noteable_type but on line_code
line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
last_note = project.notes.where("line_code IS #{line_code_is}").last
fetch_resources(resource_type, repo, per_page: 100) do |comments|
if last_note
discard_inserted_comments(comments, last_note)
last_note = nil
end
client.pull_requests_comments(repo, per_page: 100) do |comments|
create_comments(comments)
end
end
......@@ -177,6 +189,24 @@ module Gitlab
end
end
def discard_inserted_comments(comments, last_note)
last_note_attrs = nil
cut_off_index = comments.find_index do |raw|
comment = CommentFormatter.new(project, raw)
comment_attrs = comment.attributes
last_note_attrs ||= last_note.slice(*comment_attrs.keys)
comment_attrs.with_indifferent_access == last_note_attrs
end
# No matching resource in the collection, which means we got halted right on the end of the last page, so all good
return unless cut_off_index
# Otherwise, remove the resources we've already inserted
comments.shift(cut_off_index + 1)
end
def import_wiki
unless project.wiki.repository_exists?
wiki = WikiFormatter.new(project)
......@@ -192,7 +222,7 @@ module Gitlab
end
def import_releases
client.releases(repo, per_page: 100) do |releases|
fetch_resources(:releases, repo, per_page: 100) do |releases|
releases.each do |raw|
begin
gh_release = ReleaseFormatter.new(project, raw)
......@@ -203,6 +233,47 @@ module Gitlab
end
end
end
def fetch_resources(resource_type, *opts)
return if imported?(resource_type)
opts.last.merge!(page: current_page(resource_type))
client.public_send(resource_type, *opts) do |resources|
yield resources
increment_page(resource_type)
end
imported!(resource_type)
end
def imported?(resource_type)
Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
end
def imported!(resource_type)
Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
end
def increment_page(resource_type)
key = "#{cache_key_prefix}:#{resource_type}:current-page"
# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
page = Rails.cache.read(key)
page += 1
Rails.cache.write(key, page)
page
end
def current_page(resource_type)
Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
end
def cache_key_prefix
@cache_key_prefix ||= "github-import:#{project.id}"
end
end
end
end
......@@ -20,8 +20,12 @@ module Gitlab
raw_data.comments > 0
end
def klass
Issue
def project_association
:issues
end
def find_condition
{ iid: number }
end
def number
......
......@@ -9,8 +9,8 @@ module Gitlab
}
end
def klass
Label
def project_association
:labels
end
def create!
......
......@@ -14,8 +14,12 @@ module Gitlab
}
end
def klass
Milestone
def project_association
:milestones
end
def find_condition
{ iid: raw_data.number }
end
private
......
......@@ -24,8 +24,12 @@ module Gitlab
}
end
def klass
MergeRequest
def project_association
:merge_requests
end
def find_condition
{ iid: number }
end
def number
......
......@@ -11,8 +11,12 @@ module Gitlab
}
end
def klass
Release
def project_association
:releases
end
def find_condition
{ tag: raw_data.tag_name }
end
def valid?
......
......@@ -2,6 +2,10 @@ require 'spec_helper'
describe Gitlab::GithubImport::Importer, lib: true do
describe '#execute' do
before do
allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
end
context 'when an error occurs' do
let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) }
let(:octocat) { double(id: 123456, login: 'octocat') }
......@@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do
message: 'The remote data could not be fully imported.',
errors: [
{ type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" },
{ type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" },
{ type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" },
{ type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
{ type: :wiki, errors: "Gitlab::Shell::Error" },
{ type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" }
]
......
......@@ -110,7 +110,7 @@ describe Projects::ImportService, services: true do
end
it 'expires existence cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(true)
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true)
expect_any_instance_of(Gitlab::Shell).to receive(:import_repository).with(project.repository_storage_path, project.path_with_namespace, project.import_url).and_raise(Gitlab::Shell::Error.new('Failed to import the repository'))
expect_any_instance_of(Repository).to receive(:expire_emptiness_caches).and_call_original
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment