Commit fd60ec69 authored by Ash McKenzie's avatar Ash McKenzie

Merge branch 'mk/test-package-file-create-event-handling' into 'master'

Geo: Test Blob Replicators more generically

Closes #118745

See merge request gitlab-org/gitlab!25951
parents c15031bb 7718b242
......@@ -142,3 +142,212 @@ ActiveRecord hooks:
The framework behind all this is located in
[`ee/lib/gitlab/geo/`](https://gitlab.com/gitlab-org/gitlab/-/tree/master/ee/lib/gitlab/geo).
## Existing Replicator Strategies
Before writing a new kind of Replicator Strategy, check below to see if your
resource can already be handled by one of the existing strategies. Consult with
the Geo team if you are unsure.
### Blob Replicator Strategy
Models that use
[CarrierWave's](https://github.com/carrierwaveuploader/carrierwave) `Uploader::Base`
can be easily supported by Geo with the `Geo::BlobReplicatorStrategy` module.
First, each file should have its own primary ID and model. Geo strongly
recommends treating *every single file* as a first-class citizen, because in
our experience this greatly simplifies tracking replication and verification
state.
For example, to add support for files referenced by a `Widget` model with a
`widgets` table, you would perform the following steps:
1. Add verification state fields to the `widgets` table so the Geo primary can
track verification state:
```ruby
# frozen_string_literal: true
class AddVerificationStateToWidgets < ActiveRecord::Migration[6.0]
DOWNTIME = false
def change
add_column :widgets, :verification_retry_at, :datetime_with_timezone
add_column :widgets, :last_verification_ran_at, :datetime_with_timezone
add_column :widgets, :verification_checksum, :string
add_column :widgets, :verification_failure, :string
add_column :widgets, :verification_retry_count, :integer
end
end
```
1. Add a partial index on `verification_failure` to ensure re-verification can
be performed efficiently:
```ruby
# frozen_string_literal: true
class AddVerificationFailureIndexToWidgets < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :widgets, :verification_failure, where: "(verification_failure IS NOT NULL)", name: "widgets_verification_failure_partial"
end
def down
remove_concurrent_index :widgets, :verification_failure
end
end
```
1. Include `Gitlab::Geo::ReplicableModel` in the `Widget` class, and specify
the Replicator class `with_replicator Geo::WidgetReplicator`.
At this point the `Widget` class should look like this:
```ruby
# frozen_string_literal: true
class Widget < ApplicationRecord
include ::Gitlab::Geo::ReplicableModel
with_replicator Geo::WidgetReplicator
mount_uploader :file, WidgetUploader
...
end
```
1. Create `ee/app/replicators/geo/widget_replicator.rb`. Implement the
`#carrierwave_uploader` method which should return a `CarrierWave::Uploader`.
And implement the private `#model` method to return the `Widget` class.
```ruby
# frozen_string_literal: true
module Geo
class WidgetReplicator < Gitlab::Geo::Replicator
include ::Geo::BlobReplicatorStrategy
def carrierwave_uploader
model_record.file
end
private
def model
::Widget
end
end
end
```
1. Create `ee/spec/replicators/geo/widget_replicator_spec.rb` and perform
the setup necessary to define the `model_record` variable for the shared
examples.
```ruby
# frozen_string_literal: true
require 'spec_helper'
describe Geo::WidgetReplicator do
let(:model_record) { build(:widget) }
it_behaves_like 'a blob replicator'
end
```
1. Create the `widget_registry` table so Geo secondaries can track the sync and
verification state of each Widget's file:
```ruby
# frozen_string_literal: true
class CreateWidgetRegistry < ActiveRecord::Migration[5.2]
DOWNTIME = false
def change
create_table :widget_registry, id: :serial, force: :cascade do |t|
t.integer :widget_id, null: false
t.integer :state, default: 0, null: false
t.integer :retry_count, default: 0
t.string :last_sync_failure, limit: 255
t.datetime_with_timezone :retry_at
t.datetime_with_timezone :last_synced_at
t.datetime_with_timezone :created_at, null: false
t.index :widget_id, name: :index_widget_registry_on_repository_id, using: :btree
t.index :retry_at, name: :index_widget_registry_on_retry_at, using: :btree
t.index :state, name: :index_widget_registry_on_state, using: :btree
end
end
end
```
1. Create `ee/app/models/geo/widget_registry.rb`:
```ruby
# frozen_string_literal: true
class Geo::WidgetRegistry < Geo::BaseRegistry
include Geo::StateMachineRegistry
belongs_to :widget, class_name: 'Widget'
end
```
1. Create `ee/spec/factories/geo/widget_registry.rb`:
```ruby
# frozen_string_literal: true
FactoryBot.define do
factory :widget_registry, class: 'Geo::WidgetRegistry' do
widget
state { Geo::WidgetRegistry.state_value(:pending) }
trait :synced do
state { Geo::WidgetRegistry.state_value(:synced) }
last_synced_at { 5.days.ago }
end
trait :failed do
state { Geo::WidgetRegistry.state_value(:failed) }
last_synced_at { 1.day.ago }
retry_count { 2 }
last_sync_failure { 'Random error' }
end
trait :started do
state { Geo::WidgetRegistry.state_value(:started) }
last_synced_at { 1.day.ago }
retry_count { 0 }
end
end
end
```
1. Create `ee/spec/models/geo/widget_registry.rb`:
```ruby
# frozen_string_literal: true
require 'spec_helper'
describe Geo::WidgetRegistry, :geo, type: :model do
let_it_be(:registry) { create(:widget_registry) }
specify 'factory is valid' do
expect(registry).to be_valid
end
end
```
Widget files should now be replicated and verified by Geo!
......@@ -11,8 +11,7 @@ module Geo
class_methods do
end
# Called by Packages::PackageFile on create
def publish_created_event
def handle_after_create_commit
publish(:created, **created_params)
end
......
......@@ -12,7 +12,6 @@ class Geo::PackageFileRegistry < Geo::BaseRegistry
belongs_to :package_file, class_name: 'Packages::PackageFile'
scope :package_file_id_not_in, -> (ids) { where.not(package_file_id: ids) }
scope :never, -> { where(last_synced_at: nil) }
scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) }
......
......@@ -35,7 +35,6 @@ class Packages::PackageFile < ApplicationRecord
with_replicator Geo::PackageFileReplicator
after_save :update_file_metadata, if: :saved_change_to_file?
after_create_commit -> { replicator.publish_created_event }
update_project_statistics project_statistics_name: :packages_size
......
......@@ -3,11 +3,14 @@
module Gitlab
module Geo
module ReplicableModel
def self.included(klass)
klass.extend(ClassMethods)
extend ActiveSupport::Concern
included do
# If this hook turns out not to apply to all Models, perhaps we should extract a `ReplicableBlobModel`
after_create_commit -> { replicator.handle_after_create_commit if replicator.respond_to?(:handle_after_create_commit) }
end
module ClassMethods
class_methods do
def with_replicator(klass)
raise ArgumentError, 'Must be a class inheriting from Gitlab::Geo::Replicator' unless klass < ::Gitlab::Geo::Replicator
......
......@@ -3,8 +3,6 @@
FactoryBot.define do
factory :package_file_registry, class: 'Geo::PackageFileRegistry' do
association :package_file, factory: [:package_file, :npm]
last_sync_failure { nil }
last_synced_at { nil }
state { Geo::PackageFileRegistry.state_value(:pending) }
trait :synced do
......
......@@ -38,6 +38,10 @@ describe Gitlab::Geo::Replicator do
context 'model DSL' do
class DummyModel
include ActiveModel::Model
def self.after_create_commit(*args)
end
include Gitlab::Geo::ReplicableModel
with_replicator DummyReplicator
......@@ -46,7 +50,7 @@ describe Gitlab::Geo::Replicator do
subject { DummyModel.new }
it 'adds replicator method to the model' do
expect(subject).respond_to? :replicator
expect(subject).to respond_to(:replicator)
end
it 'instantiates a replicator into the model' do
......
......@@ -3,35 +3,7 @@
require 'spec_helper'
describe Geo::PackageFileReplicator do
include EE::GeoHelpers
let(:model_record) { build(:package_file, :npm) }
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:model_record) { create(:package_file, :npm) }
subject { described_class.new(model_record: model_record) }
before do
stub_current_geo_node(primary)
end
describe '#publish_created_event' do
it "creates a Geo::Event" do
expect do
subject.publish_created_event
end.to change { ::Geo::Event.count }.by(1)
expect(::Geo::Event.last.attributes).to include("replicable_name" => "package_file", "event_name" => "created", "payload" => { "model_record_id" => model_record.id })
end
end
describe '#consume_created_event' do
it 'invokes Geo::BlobDownloadService' do
service = double(:service)
expect(service).to receive(:execute)
expect(::Geo::BlobDownloadService).to receive(:new).with(replicator: subject).and_return(service)
subject.consume_created_event
end
end
it_behaves_like 'a blob replicator'
end
# frozen_string_literal: true
# Include these shared examples in specs of Replicators that include
# BlobReplicatorStrategy.
#
# A let variable called model_record should be defined in the spec. It should be
# a valid, unpersisted instance of the model class.
#
RSpec.shared_examples 'a blob replicator' do
include EE::GeoHelpers
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
subject(:replicator) { model_record.replicator }
before do
stub_current_geo_node(primary)
end
describe '#handle_after_create_commit' do
it 'creates a Geo::Event' do
expect do
replicator.handle_after_create_commit
end.to change { ::Geo::Event.count }.by(1)
expect(::Geo::Event.last.attributes).to include(
"replicable_name" => replicator.replicable_name, "event_name" => "created", "payload" => { "model_record_id" => replicator.model_record.id })
end
end
describe '#consume_created_event' do
it 'invokes Geo::BlobDownloadService' do
service = double(:service)
expect(service).to receive(:execute)
expect(::Geo::BlobDownloadService).to receive(:new).with(replicator: replicator).and_return(service)
replicator.consume_created_event
end
end
describe '#carrierwave_uploader' do
it 'is implemented' do
expect do
replicator.carrierwave_uploader
end.not_to raise_error
end
end
describe '#model' do
let(:invoke_model) { replicator.send(:model) }
it 'is implemented' do
expect do
invoke_model
end.not_to raise_error
end
it 'is a Class' do
expect(invoke_model).to be_a(Class)
end
# For convenience (and reliability), instead of asking developers to include shared examples on each model spec as well
context 'replicable model' do
it 'defines #replicator' do
expect(model_record).to respond_to(:replicator)
end
it 'invokes replicator.handle_after_create_commit on create' do
expect(replicator).to receive(:handle_after_create_commit)
model_record.save!
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment