Commit 9ac714cb authored by Nick Thomas's avatar Nick Thomas

Merge branch 'sh-save-node-status-database' into 'master'

Add API support and storage for GeoNode status in the database

Closes #3867 and #3740

See merge request gitlab-org/gitlab-ee!3230
parents 032a2e36 53a5489f
class Geo::FileRegistry < Geo::BaseRegistry class Geo::FileRegistry < Geo::BaseRegistry
scope :failed, -> { where(success: false) } scope :failed, -> { where(success: false) }
scope :synced, -> { where(success: true) } scope :synced, -> { where(success: true) }
scope :lfs_objects, -> { where(file_type: :lfs) }
scope :attachments, -> { where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES) }
end end
...@@ -6,6 +6,7 @@ class GeoNode < ActiveRecord::Base ...@@ -6,6 +6,7 @@ class GeoNode < ActiveRecord::Base
has_many :geo_node_namespace_links has_many :geo_node_namespace_links
has_many :namespaces, through: :geo_node_namespace_links has_many :namespaces, through: :geo_node_namespace_links
has_one :status, class_name: 'GeoNodeStatus'
default_values schema: lambda { Gitlab.config.gitlab.protocol }, default_values schema: lambda { Gitlab.config.gitlab.protocol },
host: lambda { Gitlab.config.gitlab.host }, host: lambda { Gitlab.config.gitlab.host },
...@@ -41,7 +42,9 @@ class GeoNode < ActiveRecord::Base ...@@ -41,7 +42,9 @@ class GeoNode < ActiveRecord::Base
encode: true encode: true
def current? def current?
Gitlab::Geo.current_node == self host == Gitlab.config.gitlab.host &&
port == Gitlab.config.gitlab.port &&
relative_url_root == Gitlab.config.gitlab.relative_url_root
end end
def secondary? def secondary?
...@@ -181,6 +184,43 @@ class GeoNode < ActiveRecord::Base ...@@ -181,6 +184,43 @@ class GeoNode < ActiveRecord::Base
end end
end end
def lfs_objects_synced_count
return unless secondary?
relation = Geo::FileRegistry.lfs_objects.synced
if restricted_project_ids
relation = relation.where(file_id: lfs_objects.pluck(:id))
end
relation.count
end
def lfs_objects_failed_count
return unless secondary?
Geo::FileRegistry.lfs_objects.failed.count
end
def attachments_synced_count
return unless secondary?
upload_ids = uploads.pluck(:id)
synced_ids = Geo::FileRegistry.attachments.synced.pluck(:file_id)
(synced_ids & upload_ids).length
end
def attachments_failed_count
return unless secondary?
Geo::FileRegistry.attachments.failed.count
end
def find_or_build_status
status || build_status
end
private private
def geo_api_url(suffix) def geo_api_url(suffix)
......
class GeoNodeStatus class GeoNodeStatus < ActiveRecord::Base
include ActiveModel::Model belongs_to :geo_node
attr_accessor :id, :success # Whether we were successful in reaching this node
attr_writer :health attr_accessor :success
def health # Be sure to keep this consistent with Prometheus naming conventions
@health ||= HealthCheck::Utils.process_checks(['geo']) PROMETHEUS_METRICS = {
rescue NotImplementedError => e db_replication_lag_seconds: 'Database replication lag (seconds)',
@health = e.to_s repositories_count: 'Total number of repositories available on primary',
end repositories_synced_count: 'Number of repositories synced on secondary',
repositories_failed_count: 'Number of repositories failed to sync on secondary',
def healthy? lfs_objects_count: 'Total number of LFS objects available on primary',
health.blank? lfs_objects_synced_count: 'Number of LFS objects synced on secondary',
end lfs_objects_failed_count: 'Number of LFS objects failed to sync on secondary',
attachments_count: 'Total number of file attachments available on primary',
def db_replication_lag_seconds attachments_synced_count: 'Number of attachments synced on secondary',
return @db_replication_lag_seconds if defined?(@db_replication_lag_seconds) attachments_failed_count: 'Number of attachments failed to sync on secondary',
last_event_id: 'Database ID of the latest event log entry on the primary',
@db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds if Gitlab::Geo.secondary? last_event_timestamp: 'Time of the latest event log entry on the primary',
end cursor_last_event_id: 'Last database ID of the event log processed by the secondary',
cursor_last_event_timestamp: 'Time of the event log processed by the secondary',
last_successful_status_check_timestamp: 'Time when Geo node status was updated internally',
status_message: 'Summary of health status'
}.freeze
def db_replication_lag_seconds=(value) def self.current_node_status
@db_replication_lag_seconds = value current_node = Gitlab::Geo.current_node
end
def last_event_id
@last_event_id ||= latest_event&.id
end
def last_event_id=(value)
@last_event_id = value
end
def last_event_timestamp return unless current_node
@last_event_timestamp ||= Geo::EventLog.latest_event&.created_at&.to_i
end
def last_event_timestamp=(value) status = current_node.find_or_build_status
@last_event_timestamp = value
end
def cursor_last_event_id # Since we're retrieving our own data, we mark this as a successful load
return @cursor_last_event_id if defined?(@cursor_last_event_id) status.success = true
status.load_data_from_current_node
@cursor_last_event_id = cursor_last_processed&.event_id if Gitlab::Geo.secondary? status.save if Gitlab::Geo.primary?
end
def cursor_last_event_id=(value) status
@cursor_last_event_id = value
end end
def cursor_last_event_timestamp def self.from_json(json_data)
event_id = cursor_last_event_id json_data.slice!(*allowed_params)
return unless event_id
@cursor_last_event_timestamp ||= Geo::EventLog.find_by(id: event_id)&.created_at&.to_i GeoNodeStatus.new(json_data)
end end
def cursor_last_event_timestamp=(value) def self.allowed_params
@cursor_last_event_timestamp = value excluded_params = %w(id last_successful_status_check_at created_at updated_at).freeze
end extra_params = %w(success health last_event_timestamp cursor_last_event_timestamp).freeze
self.column_names - excluded_params + extra_params
def repositories_count
@repositories_count ||= repositories.count
end end
def repositories_count=(value) def load_data_from_current_node
@repositories_count = value.to_i self.status_message =
begin
HealthCheck::Utils.process_checks(['geo'])
rescue NotImplementedError => e
e.to_s
end end
def repositories_synced_count latest_event = Geo::EventLog.latest_event
@repositories_synced_count ||= project_registries.synced.count self.last_event_id = latest_event&.id
end self.last_event_date = latest_event&.created_at
self.repositories_count = geo_node.projects.count
self.lfs_objects_count = geo_node.lfs_objects.count
self.attachments_count = geo_node.uploads.count
self.last_successful_status_check_at = Time.now
def repositories_synced_count=(value) if Gitlab::Geo.secondary?
@repositories_synced_count = value.to_i self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds
self.cursor_last_event_id = Geo::EventLogState.last_processed&.event_id
self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at
self.repositories_synced_count = geo_node.project_registries.synced.count
self.repositories_failed_count = geo_node.project_registries.failed.count
self.lfs_objects_synced_count = geo_node.lfs_objects_synced_count
self.lfs_objects_failed_count = geo_node.lfs_objects_failed_count
self.attachments_synced_count = geo_node.attachments_synced_count
self.attachments_failed_count = geo_node.attachments_failed_count
end end
def repositories_synced_in_percentage self
sync_percentage(repositories_count, repositories_synced_count)
end end
def repositories_failed_count alias_attribute :health, :status_message
@repositories_failed_count ||= project_registries.failed.count
end
def repositories_failed_count=(value) def healthy?
@repositories_failed_count = value.to_i status_message.blank? || status_message == 'Healthy'.freeze
end end
def lfs_objects_count def last_successful_status_check_timestamp
@lfs_objects_count ||= lfs_objects.count self.last_successful_status_check_at.to_i
end end
def lfs_objects_count=(value) def last_successful_status_check_timestamp=(value)
@lfs_objects_count = value.to_i self.last_successful_status_check_at = Time.at(value)
end end
def lfs_objects_synced_count def last_event_timestamp
@lfs_objects_synced_count ||= begin self.last_event_date.to_i
relation = Geo::FileRegistry.synced.where(file_type: :lfs)
if Gitlab::Geo.current_node.restricted_project_ids
relation = relation.where(file_id: lfs_objects.pluck(:id))
end end
relation.count def last_event_timestamp=(value)
end self.last_event_date = Time.at(value)
end end
def lfs_objects_synced_count=(value) def cursor_last_event_timestamp
@lfs_objects_synced_count = value.to_i self.cursor_last_event_date.to_i
end end
def lfs_objects_failed_count def cursor_last_event_timestamp=(value)
@lfs_objects_failed_count ||= Geo::FileRegistry.failed.where(file_type: :lfs).count self.cursor_last_event_date = Time.at(value)
end end
def lfs_objects_failed_count=(value) def repositories_synced_in_percentage
@lfs_objects_failed_count = value.to_i sync_percentage(repositories_count, repositories_synced_count)
end end
def lfs_objects_synced_in_percentage def lfs_objects_synced_in_percentage
sync_percentage(lfs_objects_count, lfs_objects_synced_count) sync_percentage(lfs_objects_count, lfs_objects_synced_count)
end end
def attachments_count
@attachments_count ||= attachments.count
end
def attachments_count=(value)
@attachments_count = value.to_i
end
def attachments_synced_count
@attachments_synced_count ||= begin
upload_ids = attachments.pluck(:id)
synced_ids = Geo::FileRegistry.synced.where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES).pluck(:file_id)
(synced_ids & upload_ids).length
end
end
def attachments_synced_count=(value)
@attachments_synced_count = value.to_i
end
def attachments_failed_count
@attachments_failed_count ||= Geo::FileRegistry.failed.where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES).count
end
def attachments_failed_count=(value)
@attachments_failed_count = value.to_i
end
def attachments_synced_in_percentage def attachments_synced_in_percentage
sync_percentage(attachments_count, attachments_synced_count) sync_percentage(attachments_count, attachments_synced_count)
end end
...@@ -170,28 +136,4 @@ class GeoNodeStatus ...@@ -170,28 +136,4 @@ class GeoNodeStatus
(synced.to_f / total.to_f) * 100.0 (synced.to_f / total.to_f) * 100.0
end end
def attachments
@attachments ||= Gitlab::Geo.current_node.uploads
end
def lfs_objects
@lfs_objects ||= Gitlab::Geo.current_node.lfs_objects
end
def project_registries
@project_registries ||= Gitlab::Geo.current_node.project_registries
end
def repositories
@repositories ||= Gitlab::Geo.current_node.projects
end
def latest_event
Geo::EventLog.latest_event
end
def cursor_last_processed
Geo::EventLogState.last_processed
end
end end
class GeoNodeStatusEntity < Grape::Entity class GeoNodeStatusEntity < Grape::Entity
include ActionView::Helpers::NumberHelper include ActionView::Helpers::NumberHelper
expose :id expose :geo_node_id
expose :healthy?, as: :healthy expose :healthy?, as: :healthy
expose :health do |node| expose :health do |node|
...@@ -35,4 +35,6 @@ class GeoNodeStatusEntity < Grape::Entity ...@@ -35,4 +35,6 @@ class GeoNodeStatusEntity < Grape::Entity
expose :last_event_timestamp expose :last_event_timestamp
expose :cursor_last_event_id expose :cursor_last_event_id
expose :cursor_last_event_timestamp expose :cursor_last_event_timestamp
expose :last_successful_status_check_timestamp
end end
...@@ -23,6 +23,8 @@ module Geo ...@@ -23,6 +23,8 @@ module Geo
end end
def fetch_geo_node_metrics(node) def fetch_geo_node_metrics(node)
return unless node.enabled?
status = node_status(node) status = node_status(node)
unless status.success unless status.success
...@@ -30,30 +32,29 @@ module Geo ...@@ -30,30 +32,29 @@ module Geo
return return
end end
NodeStatusService::STATUS_DATA.each do |key, docstring| update_db_metrics(node, status) if Gitlab::Geo.primary?
value = status[key] update_prometheus_metrics(node, status)
end
def update_db_metrics(node, status)
db_status = node.find_or_build_status
db_status.update_attributes(status.attributes.compact.merge(last_successful_status_check_at: Time.now.utc))
end
def update_prometheus_metrics(node, status)
GeoNodeStatus::PROMETHEUS_METRICS.each do |column, docstring|
value = status[column]
next unless value.is_a?(Integer) next unless value.is_a?(Integer)
gauge = Gitlab::Metrics.gauge(gauge_metric_name(key), docstring, {}, :max) gauge = Gitlab::Metrics.gauge(gauge_metric_name(column), docstring, {}, :max)
gauge.set(metric_labels(node), value) gauge.set(metric_labels(node), value)
end end
set_last_updated_at(node)
end end
def node_status(node) def node_status(node)
NodeStatusService.new.call(node) NodeStatusFetchService.new.call(node)
end
def set_last_updated_at(node)
gauge = Gitlab::Metrics.gauge(
:geo_status_last_updated_timestamp,
'UNIX timestamp of last time Geo node status was updated internally',
{},
:max)
gauge.set(metric_labels(node), Time.now.to_i)
end end
def increment_failed_status_counter(node) def increment_failed_status_counter(node)
......
module Geo module Geo
class NodeStatusService class NodeStatusFetchService
include Gitlab::CurrentSettings include Gitlab::CurrentSettings
include HTTParty include HTTParty
STATUS_DATA = {
health: 'Summary of health status',
db_replication_lag_seconds: 'Database replication lag (seconds)',
repositories_count: 'Total number of repositories available on primary',
repositories_synced_count: 'Number of repositories synced on secondary',
repositories_failed_count: 'Number of repositories failed to sync on secondary',
lfs_objects_count: 'Total number of LFS objects available on primary',
lfs_objects_synced_count: 'Number of LFS objects synced on secondary',
lfs_objects_failed_count: 'Number of LFS objects failed to sync on secondary',
attachments_count: 'Total number of file attachments available on primary',
attachments_synced_count: 'Number of attachments synced on secondary',
attachments_failed_count: 'Number of attachments failed to sync on secondary',
last_event_id: 'Database ID of the latest event log entry on the primary',
last_event_timestamp: 'UNIX timestamp of the latest event log entry on the primary',
cursor_last_event_id: 'Last database ID of the event log processed by the secondary',
cursor_last_event_timestamp: 'Last UNIX timestamp of the event log processed by the secondary'
}.freeze
def call(geo_node) def call(geo_node)
data = { id: geo_node.id } return GeoNodeStatus.current_node_status if geo_node.current?
data = { success: false }
begin begin
response = self.class.get(geo_node.status_url, headers: headers, timeout: timeout) response = self.class.get(geo_node.status_url, headers: headers, timeout: timeout)
data[:success] = response.success? data[:success] = response.success?
if response.success? if response.success?
data.merge!(response.parsed_response.symbolize_keys.slice(*STATUS_DATA.keys)) data.merge!(response.parsed_response)
else else
message = "Could not connect to Geo node - HTTP Status Code: #{response.code} #{response.message}" message = "Could not connect to Geo node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response payload = response.parsed_response
...@@ -51,11 +35,7 @@ module Geo ...@@ -51,11 +35,7 @@ module Geo
data[:health] = e.message data[:health] = e.message
end end
GeoNodeStatus.new(data) GeoNodeStatus.from_json(data.as_json)
end
def status_keys
STATUS_DATA.stringify_keys.keys
end end
private private
......
...@@ -22,7 +22,7 @@ module Geo ...@@ -22,7 +22,7 @@ module Geo
end end
cursor_last_event_ids = Gitlab::Geo.secondary_nodes.map do |node| cursor_last_event_ids = Gitlab::Geo.secondary_nodes.map do |node|
Geo::NodeStatusService.new.call(node).cursor_last_event_id node.status&.cursor_last_event_id
end end
if cursor_last_event_ids.include?(nil) if cursor_last_event_ids.include?(nil)
......
class CreateGeoNodeStatuses < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table :geo_node_statuses do |t|
t.references :geo_node, null: false, index: { unique: true }, foreign_key: { on_delete: :cascade }
t.integer :db_replication_lag_seconds
t.integer :repositories_count
t.integer :repositories_synced_count
t.integer :repositories_failed_count
t.integer :lfs_objects_count
t.integer :lfs_objects_synced_count
t.integer :lfs_objects_failed_count
t.integer :attachments_count
t.integer :attachments_synced_count
t.integer :attachments_failed_count
t.integer :last_event_id
t.datetime_with_timezone :last_event_date
t.integer :cursor_last_event_id
t.datetime_with_timezone :cursor_last_event_date
t.datetime_with_timezone :created_at, null: false
t.datetime_with_timezone :updated_at, null: false
t.datetime_with_timezone :last_successful_status_check_at
t.string :status_message
end
end
end
...@@ -802,6 +802,30 @@ ActiveRecord::Schema.define(version: 20171107090120) do ...@@ -802,6 +802,30 @@ ActiveRecord::Schema.define(version: 20171107090120) do
add_index "geo_node_namespace_links", ["geo_node_id", "namespace_id"], name: "index_geo_node_namespace_links_on_geo_node_id_and_namespace_id", unique: true, using: :btree add_index "geo_node_namespace_links", ["geo_node_id", "namespace_id"], name: "index_geo_node_namespace_links_on_geo_node_id_and_namespace_id", unique: true, using: :btree
add_index "geo_node_namespace_links", ["geo_node_id"], name: "index_geo_node_namespace_links_on_geo_node_id", using: :btree add_index "geo_node_namespace_links", ["geo_node_id"], name: "index_geo_node_namespace_links_on_geo_node_id", using: :btree
create_table "geo_node_statuses", force: :cascade do |t|
t.integer "geo_node_id", null: false
t.integer "db_replication_lag_seconds"
t.integer "repositories_count"
t.integer "repositories_synced_count"
t.integer "repositories_failed_count"
t.integer "lfs_objects_count"
t.integer "lfs_objects_synced_count"
t.integer "lfs_objects_failed_count"
t.integer "attachments_count"
t.integer "attachments_synced_count"
t.integer "attachments_failed_count"
t.integer "last_event_id"
t.datetime_with_timezone "last_event_date"
t.integer "cursor_last_event_id"
t.datetime_with_timezone "cursor_last_event_date"
t.datetime_with_timezone "created_at", null: false
t.datetime_with_timezone "updated_at", null: false
t.datetime_with_timezone "last_successful_status_check_at"
t.string "status_message"
end
add_index "geo_node_statuses", ["geo_node_id"], name: "index_geo_node_statuses_on_geo_node_id", unique: true, using: :btree
create_table "geo_nodes", force: :cascade do |t| create_table "geo_nodes", force: :cascade do |t|
t.string "schema" t.string "schema"
t.string "host" t.string "host"
...@@ -2272,6 +2296,7 @@ ActiveRecord::Schema.define(version: 20171107090120) do ...@@ -2272,6 +2296,7 @@ ActiveRecord::Schema.define(version: 20171107090120) do
add_foreign_key "geo_event_log", "geo_repository_updated_events", column: "repository_updated_event_id", on_delete: :cascade add_foreign_key "geo_event_log", "geo_repository_updated_events", column: "repository_updated_event_id", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "geo_nodes", on_delete: :cascade add_foreign_key "geo_node_namespace_links", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "namespaces", on_delete: :cascade add_foreign_key "geo_node_namespace_links", "namespaces", on_delete: :cascade
add_foreign_key "geo_node_statuses", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_repositories_changed_events", "geo_nodes", on_delete: :cascade add_foreign_key "geo_repositories_changed_events", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_repository_created_events", "projects", on_delete: :cascade add_foreign_key "geo_repository_created_events", "projects", on_delete: :cascade
add_foreign_key "geo_repository_renamed_events", "projects", on_delete: :cascade add_foreign_key "geo_repository_renamed_events", "projects", on_delete: :cascade
......
...@@ -69,8 +69,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`. ...@@ -69,8 +69,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
|geo_last_event_timestamp | Gauge | 10.2 | UNIX timestamp of the latest event log entry on the primary | url |geo_last_event_timestamp | Gauge | 10.2 | UNIX timestamp of the latest event log entry on the primary | url
|geo_cursor_last_event_id | Gauge | 10.2 | Last database ID of the event log processed by the secondary | url |geo_cursor_last_event_id | Gauge | 10.2 | Last database ID of the event log processed by the secondary | url
|geo_cursor_last_event_timestamp | Gauge | 10.2 | Last UNIX timestamp of the event log processed by the secondary | url |geo_cursor_last_event_timestamp | Gauge | 10.2 | Last UNIX timestamp of the event log processed by the secondary | url
|geo_status_last_updated_timestamp | Gauge | 10.2 | Last timestamp when the status was successfully updated | url
|geo_status_failed_total | Counter | 10.2 | Number of times retrieving the status from the Geo Node failed | url |geo_status_failed_total | Counter | 10.2 | Number of times retrieving the status from the Geo Node failed | url
|geo_last_successful_status_check_timestamp | Gauge | Last timestamp when the status was successfully updated | url
## Metrics shared directory ## Metrics shared directory
......
...@@ -68,7 +68,7 @@ class Admin::GeoNodesController < Admin::ApplicationController ...@@ -68,7 +68,7 @@ class Admin::GeoNodesController < Admin::ApplicationController
end end
def status def status
status = Geo::NodeStatusService.new.call(@node) status = Geo::NodeStatusFetchService.new.call(@node)
respond_to do |format| respond_to do |format|
format.json do format.json do
......
...@@ -126,6 +126,7 @@ module API ...@@ -126,6 +126,7 @@ module API
mount ::API::Files mount ::API::Files
mount ::API::Groups mount ::API::Groups
mount ::API::Geo mount ::API::Geo
mount ::API::GeoNodes
mount ::API::Internal mount ::API::Internal
mount ::API::Issues mount ::API::Issues
mount ::API::IssueLinks mount ::API::IssueLinks
......
...@@ -1018,24 +1018,14 @@ module API ...@@ -1018,24 +1018,14 @@ module API
expose :active?, as: :active expose :active?, as: :active
end end
class GeoNodeStatus < Grape::Entity class GeoNode < Grape::Entity
expose :id expose :id
expose :db_replication_lag_seconds expose :url
expose :health expose :primary?, as: :primary
expose :healthy?, as: :healthy expose :enabled
expose :repositories_count expose :files_max_capacity
expose :repositories_synced_count expose :repos_max_capacity
expose :repositories_failed_count expose :clone_protocol
expose :lfs_objects_count
expose :lfs_objects_synced_count
expose :lfs_objects_failed_count
expose :attachments_count
expose :attachments_synced_count
expose :attachments_failed_count
expose :last_event_id
expose :last_event_timestamp
expose :cursor_last_event_id
expose :cursor_last_event_timestamp
end end
class PersonalAccessToken < Grape::Entity class PersonalAccessToken < Grape::Entity
......
...@@ -36,7 +36,8 @@ module API ...@@ -36,7 +36,8 @@ module API
authenticate_by_gitlab_geo_node_token! authenticate_by_gitlab_geo_node_token!
require_node_to_be_secondary! require_node_to_be_secondary!
present GeoNodeStatus.new(id: Gitlab::Geo.current_node.id), with: Entities::GeoNodeStatus status = ::GeoNodeStatus.current_node_status
present status, with: GeoNodeStatusEntity
end end
end end
......
module API
class GeoNodes < Grape::API
include PaginationParams
include APIGuard
before { authenticated_as_admin! }
resource :geo_nodes do
# Get all Geo node information
#
# Example request:
# GET /geo_nodes
desc 'Retrieves the available Geo nodes' do
success Entities::GeoNode
end
get do
nodes = GeoNode.all
present paginate(nodes), with: Entities::GeoNode
end
# Get all Geo node statuses
#
# Example request:
# GET /geo_nodes/status
desc 'Get status for all Geo nodes' do
success GeoNodeStatusEntity
end
get '/status' do
status = GeoNodeStatus.all
present paginate(status), with: GeoNodeStatusEntity
end
# Get all Geo node information
#
# Example request:
# GET /geo_nodes/:id
desc 'Get a single GeoNode' do
success Entities::GeoNode
end
params do
requires :id, type: Integer, desc: 'The ID of the node'
end
get ':id' do
node = GeoNode.find_by(id: params[:id])
not_found!('GeoNode') unless node
present node, with: Entities::GeoNode
end
# Get Geo metrics for a single node
#
# Example request:
# GET /geo_nodes/:id/status
desc 'Get metrics for a single Geo node' do
success Entities::GeoNode
end
params do
requires :id, type: Integer, desc: 'The ID of the node'
end
get ':id/status' do
geo_node = GeoNode.find(params[:id])
not_found('Geo node not found') unless geo_node
status =
if geo_node.current?
GeoNodeStatus.current_node_status
else
geo_node.status
end
not_found!('Status for Geo node not found') unless status
present status, with: ::GeoNodeStatusEntity
end
end
end
end
...@@ -279,11 +279,11 @@ describe Admin::GeoNodesController, :postgresql do ...@@ -279,11 +279,11 @@ describe Admin::GeoNodesController, :postgresql do
end end
context 'with add-on license' do context 'with add-on license' do
let(:geo_node_status) { build(:geo_node_status, :healthy) } let(:geo_node_status) { build(:geo_node_status, :healthy, geo_node: geo_node) }
before do before do
allow(Gitlab::Geo).to receive(:license_allows?).and_return(true) allow(Gitlab::Geo).to receive(:license_allows?).and_return(true)
allow_any_instance_of(Geo::NodeStatusService).to receive(:call).and_return(geo_node_status) allow_any_instance_of(Geo::NodeStatusFetchService).to receive(:call).and_return(geo_node_status)
end end
it 'returns the status' do it 'returns the status' do
......
...@@ -2,6 +2,7 @@ module EE ...@@ -2,6 +2,7 @@ module EE
module GeoHelpers module GeoHelpers
def stub_current_geo_node(node) def stub_current_geo_node(node)
allow(::Gitlab::Geo).to receive(:current_node).and_return(node) allow(::Gitlab::Geo).to receive(:current_node).and_return(node)
allow(node).to receive(:current?).and_return(true)
end end
end end
end end
FactoryGirl.define do FactoryGirl.define do
factory :geo_node_status do factory :geo_node_status do
skip_create
sequence(:id) sequence(:id)
geo_node
trait :healthy do trait :healthy do
health nil health nil
......
{
"type": "object",
"required" : [
"id",
"url",
"primary",
"enabled",
"files_max_capacity",
"repos_max_capacity",
"clone_protocol"
],
"properties" : {
"id": { "type": "integer" },
"url": { "type": ["string", "null"] },
"primary": { "type": "boolean" },
"enabled": { "type": "boolean" },
"files_max_capacity": { "type": "integer" },
"repos_max_capacity": { "type": "integer" },
"clone_protocol": { "type": ["string"] }
},
"additionalProperties": false
}
{ {
"type": "object", "type": "object",
"required" : [ "required" : [
"id", "geo_node_id",
"healthy", "healthy",
"health", "health",
"attachments_count", "attachments_count",
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
"cursor_last_event_timestamp" "cursor_last_event_timestamp"
], ],
"properties" : { "properties" : {
"id": { "type": "integer" }, "geo_node_id": { "type": "integer" },
"healthy": { "type": "boolean" }, "healthy": { "type": "boolean" },
"health": { "type": "string" }, "health": { "type": ["string", "null"] },
"attachments_count": { "type": "integer" }, "attachments_count": { "type": "integer" },
"attachments_failed_count": { "type": "integer" }, "attachments_failed_count": { "type": "integer" },
"attachments_synced_count": { "type": "integer" }, "attachments_synced_count": { "type": "integer" },
...@@ -39,7 +39,8 @@ ...@@ -39,7 +39,8 @@
"last_event_id": { "type": ["integer", "null"] }, "last_event_id": { "type": ["integer", "null"] },
"last_event_timestamp": { "type": ["integer", "null"] }, "last_event_timestamp": { "type": ["integer", "null"] },
"cursor_last_event_id": { "type": ["integer", "null"] }, "cursor_last_event_id": { "type": ["integer", "null"] },
"cursor_last_event_timestamp": { "type": ["integer", "null"] } "cursor_last_event_timestamp": { "type": ["integer", "null"] },
"last_successful_status_check_timestamp": { "type": ["integer", "null"] }
}, },
"additionalProperties": false "additionalProperties": false
} }
{
"type": "array",
"items": { "$ref": "geo_node_status.json" }
}
{
"type": "array",
"items": { "$ref": "geo_node.json" }
}
...@@ -138,7 +138,7 @@ describe GeoNode, type: :model do ...@@ -138,7 +138,7 @@ describe GeoNode, type: :model do
end end
it 'returns false when node is not the current node' do it 'returns false when node is not the current node' do
stub_current_geo_node(double) subject.port = Gitlab.config.gitlab.port + 1
expect(subject.current?).to eq false expect(subject.current?).to eq false
end end
...@@ -238,6 +238,18 @@ describe GeoNode, type: :model do ...@@ -238,6 +238,18 @@ describe GeoNode, type: :model do
end end
end end
describe '#find_or_build_status' do
it 'returns a new status' do
status = new_node.find_or_build_status
expect(status).to be_a(GeoNodeStatus)
status.save
expect(new_node.find_or_build_status).to eq(status)
end
end
describe '#oauth_callback_url' do describe '#oauth_callback_url' do
let(:oauth_callback_url) { 'https://localhost:3000/gitlab/oauth/geo/callback' } let(:oauth_callback_url) { 'https://localhost:3000/gitlab/oauth/geo/callback' }
...@@ -332,6 +344,78 @@ describe GeoNode, type: :model do ...@@ -332,6 +344,78 @@ describe GeoNode, type: :model do
end end
end end
describe '#lfs_objects_synced_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.lfs_objects_synced_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.lfs_objects_synced_count).to eq(0)
end
end
end
describe '#lfs_objects_failed_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.lfs_objects_failed_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.lfs_objects_failed_count).to eq(0)
end
end
end
describe '#attachments_synced_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.attachments_synced_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.attachments_synced_count).to eq(0)
end
end
end
describe '#attachments_failed_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.attachments_failed_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.attachments_failed_count).to eq(0)
end
end
end
describe '#geo_node_key' do describe '#geo_node_key' do
context 'primary node' do context 'primary node' do
it 'cannot be set' do it 'cannot be set' do
......
require 'spec_helper' require 'spec_helper'
describe GeoNodeStatus do describe GeoNodeStatus, :geo do
set(:geo_node) { create(:geo_node, :primary) } include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
set(:group) { create(:group) } set(:group) { create(:group) }
set(:project_1) { create(:project, group: group) } set(:project_1) { create(:project, group: group) }
set(:project_2) { create(:project, group: group) } set(:project_2) { create(:project, group: group) }
set(:project_3) { create(:project) } set(:project_3) { create(:project) }
set(:project_4) { create(:project) } set(:project_4) { create(:project) }
subject { described_class.new } subject { described_class.current_node_status }
before do
stub_current_geo_node(secondary)
end
describe '#healthy?' do describe '#healthy?' do
context 'when health is blank' do context 'when health is blank' do
it 'returns true' do it 'returns true' do
subject.health = '' subject.status_message = ''
expect(subject.healthy?).to eq true expect(subject.healthy?).to be true
end end
end end
context 'when health is present' do context 'when health is present' do
it 'returns true' do
subject.status_message = 'Healthy'
expect(subject.healthy?).to be true
end
it 'returns false' do it 'returns false' do
subject.health = 'something went wrong' subject.status_message = 'something went wrong'
expect(subject.healthy?).to eq false expect(subject.healthy?).to be false
end end
end end
end end
describe '#health' do describe '#status_message' do
it 'delegates to the HealthCheck' do it 'delegates to the HealthCheck' do
subject.health = nil
expect(HealthCheck::Utils).to receive(:process_checks).with(['geo']).once expect(HealthCheck::Utils).to receive(:process_checks).with(['geo']).once
subject.health subject
end end
end end
...@@ -53,27 +65,29 @@ describe GeoNodeStatus do ...@@ -53,27 +65,29 @@ describe GeoNodeStatus do
it 'does not count synced files that were replaced' do it 'does not count synced files that were replaced' do
user = create(:user, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png')) user = create(:user, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png'))
subject = described_class.new
expect(subject.attachments_count).to eq(1) expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(0) expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader') upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
create(:geo_file_registry, :avatar, file_id: upload.id) create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1) expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(1) expect(subject.attachments_synced_count).to eq(1)
user.update(avatar: fixture_file_upload(Rails.root + 'spec/fixtures/rails_sample.jpg', 'image/jpg')) user.update(avatar: fixture_file_upload(Rails.root + 'spec/fixtures/rails_sample.jpg', 'image/jpg'))
subject = described_class.new subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1) expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(0) expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader') upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
create(:geo_file_registry, :avatar, file_id: upload.id) create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1) expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(1) expect(subject.attachments_synced_count).to eq(1)
end end
...@@ -116,7 +130,7 @@ describe GeoNodeStatus do ...@@ -116,7 +130,7 @@ describe GeoNodeStatus do
end end
it 'returns the right percentage with group restrictions' do it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group]) secondary.update_attribute(:namespaces, [group])
create(:geo_file_registry, :avatar, file_id: upload_1.id) create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id) create(:geo_file_registry, :avatar, file_id: upload_2.id)
...@@ -133,6 +147,7 @@ describe GeoNodeStatus do ...@@ -133,6 +147,7 @@ describe GeoNodeStatus do
end end
it "doesn't attempt to set replication lag if primary" do it "doesn't attempt to set replication lag if primary" do
stub_current_geo_node(primary)
expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds) expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds)
expect(subject.db_replication_lag_seconds).to eq(nil) expect(subject.db_replication_lag_seconds).to eq(nil)
...@@ -168,14 +183,14 @@ describe GeoNodeStatus do ...@@ -168,14 +183,14 @@ describe GeoNodeStatus do
end end
it 'returns the right percentage with no group restrictions' do it 'returns the right percentage with no group restrictions' do
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id) create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id, success: true)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25) expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25)
end end
it 'returns the right percentage with group restrictions' do it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group]) secondary.update_attribute(:namespaces, [group])
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id) create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id, success: true)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(50) expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(50)
end end
...@@ -192,7 +207,7 @@ describe GeoNodeStatus do ...@@ -192,7 +207,7 @@ describe GeoNodeStatus do
end end
it 'returns the right number of failed repos with group restrictions' do it 'returns the right number of failed repos with group restrictions' do
geo_node.update_attribute(:namespaces, [group]) secondary.update_attribute(:namespaces, [group])
expect(subject.repositories_failed_count).to eq(1) expect(subject.repositories_failed_count).to eq(1)
end end
...@@ -210,17 +225,17 @@ describe GeoNodeStatus do ...@@ -210,17 +225,17 @@ describe GeoNodeStatus do
end end
it 'returns the right percentage with group restrictions' do it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group]) secondary.update_attribute(:namespaces, [group])
create(:geo_project_registry, :synced, project: project_1) create(:geo_project_registry, :synced, project: project_1)
expect(subject.repositories_synced_in_percentage).to be_within(0.0001).of(50) expect(subject.repositories_synced_in_percentage).to be_within(0.0001).of(50)
end end
end end
describe '#last_event_id and #last_event_timestamp' do describe '#last_event_id and #last_event_date' do
it 'returns nil when no events are available' do it 'returns nil when no events are available' do
expect(subject.last_event_id).to be_nil expect(subject.last_event_id).to be_nil
expect(subject.last_event_timestamp).to be_nil expect(subject.last_event_date).to be_nil
end end
it 'returns the latest event' do it 'returns the latest event' do
...@@ -228,14 +243,14 @@ describe GeoNodeStatus do ...@@ -228,14 +243,14 @@ describe GeoNodeStatus do
event = create(:geo_event_log, created_at: created_at) event = create(:geo_event_log, created_at: created_at)
expect(subject.last_event_id).to eq(event.id) expect(subject.last_event_id).to eq(event.id)
expect(subject.last_event_timestamp).to eq(created_at.to_i) expect(subject.last_event_date).to eq(created_at)
end end
end end
describe '#cursor_last_event_id and #cursor_last_event_timestamp' do describe '#cursor_last_event_id and #cursor_last_event_date' do
it 'returns nil when no events are available' do it 'returns nil when no events are available' do
expect(subject.cursor_last_event_id).to be_nil expect(subject.cursor_last_event_id).to be_nil
expect(subject.cursor_last_event_timestamp).to be_nil expect(subject.cursor_last_event_date).to be_nil
end end
it 'returns the latest event ID if secondary' do it 'returns the latest event ID if secondary' do
...@@ -246,9 +261,10 @@ describe GeoNodeStatus do ...@@ -246,9 +261,10 @@ describe GeoNodeStatus do
end end
it "doesn't attempt to retrieve cursor if primary" do it "doesn't attempt to retrieve cursor if primary" do
stub_current_geo_node(primary)
create(:geo_event_log_state) create(:geo_event_log_state)
expect(subject.cursor_last_event_timestamp).to eq(nil) expect(subject.cursor_last_event_date).to eq(nil)
expect(subject.cursor_last_event_id).to eq(nil) expect(subject.cursor_last_event_id).to eq(nil)
end end
end end
...@@ -264,40 +280,40 @@ describe GeoNodeStatus do ...@@ -264,40 +280,40 @@ describe GeoNodeStatus do
end end
end end
context 'when no values are available' do shared_examples 'timestamp parameters' do |timestamp_column, date_column|
it 'returns 0 for each attribute' do it 'returns the value it was assigned via UNIX timestamp' do
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag_seconds).and_return(nil) now = Time.now.beginning_of_day.utc
subject.attachments_count = nil subject.update_attribute(timestamp_column, now.to_i)
subject.attachments_synced_count = nil
subject.attachments_failed_count = nil expect(subject.public_send(date_column)).to eq(now)
subject.lfs_objects_count = nil expect(subject.public_send(timestamp_column)).to eq(now.to_i)
subject.lfs_objects_synced_count = nil end
subject.lfs_objects_failed_count = nil end
subject.repositories_count = nil
subject.repositories_synced_count = nil describe '#last_successful_status_check_timestamp' do
subject.repositories_failed_count = nil it_behaves_like 'timestamp parameters', :last_successful_status_check_timestamp, :last_successful_status_check_at
subject.last_event_id = nil end
subject.last_event_timestamp = nil
subject.cursor_last_event_id = nil describe '#last_event_timestamp' do
subject.cursor_last_event_timestamp = nil it_behaves_like 'timestamp parameters', :last_event_timestamp, :last_event_date
end
expect(subject.db_replication_lag_seconds).to be_nil
expect(subject.repositories_count).to be_zero describe '#cursor_last_event_timestamp' do
expect(subject.repositories_synced_count).to be_zero it_behaves_like 'timestamp parameters', :cursor_last_event_timestamp, :cursor_last_event_date
expect(subject.repositories_synced_in_percentage).to be_zero end
expect(subject.repositories_failed_count).to be_zero
expect(subject.lfs_objects_count).to be_zero describe '#from_json' do
expect(subject.lfs_objects_synced_count).to be_zero it 'returns a new GeoNodeStatus excluding parameters' do
expect(subject.lfs_objects_failed_count).to be_zero status = create(:geo_node_status)
expect(subject.lfs_objects_synced_in_percentage).to be_zero
expect(subject.attachments_count).to be_zero data = status.as_json
expect(subject.attachments_synced_count).to be_zero data[:id] = 10000
expect(subject.attachments_failed_count).to be_zero
expect(subject.attachments_synced_in_percentage).to be_zero result = GeoNodeStatus.from_json(data)
expect(subject.last_event_id).to be_nil
expect(subject.last_event_timestamp).to be_nil expect(result.id).to be_nil
expect(subject.cursor_last_event_id).to be_nil expect(result.attachments_count).to eq(status.attachments_count)
expect(subject.cursor_last_event_timestamp).to be_nil expect(result.cursor_last_event_date).to eq(status.cursor_last_event_date)
end end
end end
end end
require 'spec_helper'
describe API::GeoNodes, :geo, api: true do
include ApiHelpers
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
set(:another_secondary) { create(:geo_node) }
set(:secondary_status) { create(:geo_node_status, :healthy, geo_node_id: secondary.id) }
set(:another_secondary_status) { create(:geo_node_status, :healthy, geo_node_id: another_secondary.id) }
let(:admin) { create(:admin) }
let(:user) { create(:user) }
describe 'GET /geo_nodes' do
it 'retrieves the Geo nodes if admin is logged in' do
get api("/geo_nodes", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_nodes')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/:id' do
it 'retrieves the Geo nodes if admin is logged in' do
get api("/geo_nodes/#{primary.id}", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/status' do
it 'retrieves the Geo nodes status if admin is logged in' do
get api("/geo_nodes/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_statuses')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/:id/status' do
it 'retrieves the Geo nodes status if admin is logged in' do
stub_current_geo_node(primary)
expect(GeoNodeStatus).not_to receive(:current_node_status)
get api("/geo_nodes/#{secondary.id}/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_status')
end
it 'fetches the current node status' do
stub_current_geo_node(secondary)
expect(GeoNode).to receive(:find).and_return(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
get api("/geo_nodes/#{secondary.id}/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_status')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
end
...@@ -3,7 +3,7 @@ require 'spec_helper' ...@@ -3,7 +3,7 @@ require 'spec_helper'
describe GeoNodeStatusEntity, :postgresql do describe GeoNodeStatusEntity, :postgresql do
let(:geo_node_status) do let(:geo_node_status) do
GeoNodeStatus.new( GeoNodeStatus.new(
id: 1, geo_node_id: 1,
health: '', health: '',
attachments_count: 329, attachments_count: 329,
attachments_failed_count: 25, attachments_failed_count: 25,
...@@ -13,7 +13,8 @@ describe GeoNodeStatusEntity, :postgresql do ...@@ -13,7 +13,8 @@ describe GeoNodeStatusEntity, :postgresql do
lfs_objects_synced_count: 123, lfs_objects_synced_count: 123,
repositories_count: 10, repositories_count: 10,
repositories_synced_count: 5, repositories_synced_count: 5,
repositories_failed_count: 0 repositories_failed_count: 0,
last_successful_status_check_timestamp: Time.now.beginning_of_day
) )
end end
...@@ -27,7 +28,7 @@ describe GeoNodeStatusEntity, :postgresql do ...@@ -27,7 +28,7 @@ describe GeoNodeStatusEntity, :postgresql do
subject { entity.as_json } subject { entity.as_json }
it { is_expected.to have_key(:id) } it { is_expected.to have_key(:geo_node_id) }
it { is_expected.to have_key(:healthy) } it { is_expected.to have_key(:healthy) }
it { is_expected.to have_key(:health) } it { is_expected.to have_key(:health) }
it { is_expected.to have_key(:attachments_count) } it { is_expected.to have_key(:attachments_count) }
...@@ -42,6 +43,7 @@ describe GeoNodeStatusEntity, :postgresql do ...@@ -42,6 +43,7 @@ describe GeoNodeStatusEntity, :postgresql do
it { is_expected.to have_key(:repositories_failed_count) } it { is_expected.to have_key(:repositories_failed_count) }
it { is_expected.to have_key(:repositories_synced_count)} it { is_expected.to have_key(:repositories_synced_count)}
it { is_expected.to have_key(:repositories_synced_in_percentage) } it { is_expected.to have_key(:repositories_synced_in_percentage) }
it { is_expected.to have_key(:last_successful_status_check_timestamp) }
describe '#healthy' do describe '#healthy' do
context 'when node is healthy' do context 'when node is healthy' do
......
...@@ -5,16 +5,12 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -5,16 +5,12 @@ describe Geo::MetricsUpdateService, :geo do
subject { described_class.new } subject { described_class.new }
let(:timestamp) { Time.now.to_i } let(:event_date) { Time.now.utc }
before do let(:data) do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true) {
end success: true,
status_message: nil,
describe '#execute' do
before do
data = {
health: 'OK',
db_replication_lag_seconds: 0, db_replication_lag_seconds: 0,
repositories_count: 10, repositories_count: 10,
repositories_synced_count: 1, repositories_synced_count: 1,
...@@ -26,12 +22,20 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -26,12 +22,20 @@ describe Geo::MetricsUpdateService, :geo do
attachments_synced_count: 30, attachments_synced_count: 30,
attachments_failed_count: 25, attachments_failed_count: 25,
last_event_id: 2, last_event_id: 2,
last_event_timestamp: timestamp, last_event_date: event_date,
cursor_last_event_id: 1, cursor_last_event_id: 1,
cursor_last_event_timestamp: timestamp cursor_last_event_date: event_date
} }
end
before do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true)
end
describe '#execute' do
before do
request = double(success?: true, parsed_response: data.stringify_keys, code: 200) request = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Geo::NodeStatusService).to receive(:get).and_return(request) allow(Geo::NodeStatusFetchService).to receive(:get).and_return(request)
end end
context 'when node is the primary' do context 'when node is the primary' do
...@@ -51,6 +55,23 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -51,6 +55,23 @@ describe Geo::MetricsUpdateService, :geo do
expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10) expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10)
expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10) expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10)
end end
it 'updates the GeoNodeStatus entry' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(2)
status = secondary.status.load_data_from_current_node
expect(status.geo_node_id).to eq(secondary.id)
expect(status.last_successful_status_check_at).not_to be_nil
end
it 'updates only the active node' do
secondary.update_attributes(enabled: false)
expect { subject.execute }.to change { GeoNodeStatus.count }.by(1)
expect(another_secondary.status).not_to be_nil
end
end end
context 'when node is a secondary' do context 'when node is a secondary' do
...@@ -60,6 +81,7 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -60,6 +81,7 @@ describe Geo::MetricsUpdateService, :geo do
before do before do
stub_current_geo_node(secondary) stub_current_geo_node(secondary)
allow(subject).to receive(:node_status).and_return(GeoNodeStatus.new(data))
end end
it 'adds gauges for various metrics' do it 'adds gauges for various metrics' do
...@@ -76,10 +98,10 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -76,10 +98,10 @@ describe Geo::MetricsUpdateService, :geo do
expect(metric_value(:geo_attachments_synced)).to eq(30) expect(metric_value(:geo_attachments_synced)).to eq(30)
expect(metric_value(:geo_attachments_failed)).to eq(25) expect(metric_value(:geo_attachments_failed)).to eq(25)
expect(metric_value(:geo_last_event_id)).to eq(2) expect(metric_value(:geo_last_event_id)).to eq(2)
expect(metric_value(:geo_last_event_timestamp)).to eq(timestamp.to_i) expect(metric_value(:geo_last_event_timestamp)).to eq(event_date.to_i)
expect(metric_value(:geo_cursor_last_event_id)).to eq(1) expect(metric_value(:geo_cursor_last_event_id)).to eq(1)
expect(metric_value(:geo_cursor_last_event_timestamp)).to eq(timestamp.to_i) expect(metric_value(:geo_cursor_last_event_timestamp)).to eq(event_date.to_i)
expect(metric_value(:geo_status_last_updated_timestamp)).to be_truthy expect(metric_value(:geo_last_successful_status_check_timestamp)).to be_truthy
end end
it 'increments a counter when metrics fail to retrieve' do it 'increments a counter when metrics fail to retrieve' do
...@@ -91,8 +113,12 @@ describe Geo::MetricsUpdateService, :geo do ...@@ -91,8 +113,12 @@ describe Geo::MetricsUpdateService, :geo do
expect { subject.execute }.to change { metric_value(:geo_status_failed_total) }.by(1) expect { subject.execute }.to change { metric_value(:geo_status_failed_total) }.by(1)
end end
it 'does not create GeoNodeStatus entries' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(0)
end
def metric_value(metric_name) def metric_value(metric_name)
Gitlab::Metrics.registry.get(metric_name).get({ url: secondary.url }) Gitlab::Metrics.registry.get(metric_name)&.get({ url: secondary.url })
end end
end end
end end
......
require 'spec_helper' require 'spec_helper'
describe Geo::NodeStatusService do describe Geo::NodeStatusFetchService, :geo do
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) } set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) } set(:secondary) { create(:geo_node) }
subject { described_class.new } subject { described_class.new }
describe '#status_keys' do
it 'matches the serializer keys' do
exceptions = %w[
id
healthy
repositories_synced_in_percentage
lfs_objects_synced_in_percentage
attachments_synced_in_percentage
]
expected = GeoNodeStatusEntity
.new(GeoNodeStatus.new)
.as_json
.keys
.map(&:to_s) - exceptions
expect(subject.status_keys).to match_array(expected)
end
end
describe '#call' do describe '#call' do
it 'parses a 401 response' do it 'parses a 401 response' do
request = double(success?: false, request = double(success?: false,
...@@ -36,10 +18,40 @@ describe Geo::NodeStatusService do ...@@ -36,10 +18,40 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest") expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest")
end
it 'always reload GeoNodeStatus if current node' do
stub_current_geo_node(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
status = subject.call(secondary)
expect(status).to be_a(GeoNodeStatus)
end
it 'ignores certain parameters' do
yesterday = Date.yesterday
request = double(success?: true,
code: 200,
message: 'Unauthorized',
parsed_response: {
'id' => 5000,
'last_successful_status_check_at' => yesterday,
'created_at' => yesterday,
'updated_at' => yesterday
})
allow(described_class).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.id).not_to be(5000)
expect(status.last_successful_status_check_at).not_to be(yesterday)
expect(status.created_at).not_to be(yesterday)
expect(status.updated_at).not_to be(yesterday)
end end
it 'parses a 200 response' do it 'parses a 200 legacy response' do
data = { health: 'OK', data = { health: 'OK',
db_replication_lag_seconds: 0, db_replication_lag_seconds: 0,
repositories_count: 10, repositories_count: 10,
...@@ -73,7 +85,7 @@ describe Geo::NodeStatusService do ...@@ -73,7 +85,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\n") expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\n")
expect(status.success).to be false expect(status.success).to be false
end end
...@@ -83,7 +95,7 @@ describe Geo::NodeStatusService do ...@@ -83,7 +95,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq(message) expect(status.status_message).to eq(message)
end end
it 'handles connection refused' do it 'handles connection refused' do
...@@ -91,7 +103,7 @@ describe Geo::NodeStatusService do ...@@ -91,7 +103,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq('Connection refused - bad connection') expect(status.status_message).to eq('Connection refused - bad connection')
end end
it 'returns meaningful error message when primary uses incorrect db key' do it 'returns meaningful error message when primary uses incorrect db key' do
...@@ -99,7 +111,7 @@ describe Geo::NodeStatusService do ...@@ -99,7 +111,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.') expect(status.status_message).to eq('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.')
end end
it 'gracefully handles case when primary is deleted' do it 'gracefully handles case when primary is deleted' do
...@@ -107,7 +119,7 @@ describe Geo::NodeStatusService do ...@@ -107,7 +119,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary) status = subject.call(secondary)
expect(status.health).to eq('This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.') expect(status.status_message).to eq('This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.')
end end
end end
end end
...@@ -56,23 +56,23 @@ describe Geo::PruneEventLogWorker, :geo do ...@@ -56,23 +56,23 @@ describe Geo::PruneEventLogWorker, :geo do
let(:healthy_status) { build(:geo_node_status, :healthy) } let(:healthy_status) { build(:geo_node_status, :healthy) }
let(:unhealthy_status) { build(:geo_node_status, :unhealthy) } let(:unhealthy_status) { build(:geo_node_status, :unhealthy) }
let(:node_status_service) do
service = double
allow(Geo::NodeStatusService).to receive(:new).and_return(service)
service
end
it 'contacts all secondary nodes for their status' do it 'contacts all secondary nodes for their status' do
expect(node_status_service).to receive(:call).twice { healthy_status } events = create_list(:geo_event_log, 5)
create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary.id)
create(:geo_node_status, :healthy, cursor_last_event_id: events[3].id, geo_node_id: secondary2.id)
expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', anything) expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', anything)
worker.perform worker.perform
end end
it 'aborts when there are unhealthy nodes' do it 'aborts when there are unhealthy nodes' do
create_list(:geo_event_log, 2) events = create_list(:geo_event_log, 2)
create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary.id)
create(:geo_node_status, :unhealthy, geo_node_id: secondary2.id)
expect(node_status_service).to receive(:call).twice.and_return(healthy_status, unhealthy_status)
expect(worker).to receive(:log_info).with('Could not get status of all nodes, not deleting any entries from Geo Event Log', unhealthy_node_count: 1) expect(worker).to receive(:log_info).with('Could not get status of all nodes, not deleting any entries from Geo Event Log', unhealthy_node_count: 1)
expect { worker.perform }.not_to change { Geo::EventLog.count } expect { worker.perform }.not_to change { Geo::EventLog.count }
...@@ -81,11 +81,9 @@ describe Geo::PruneEventLogWorker, :geo do ...@@ -81,11 +81,9 @@ describe Geo::PruneEventLogWorker, :geo do
it 'takes the integer-minimum value of all cursor_last_event_ids' do it 'takes the integer-minimum value of all cursor_last_event_ids' do
events = create_list(:geo_event_log, 12) events = create_list(:geo_event_log, 12)
allow(node_status_service).to receive(:call).twice.and_return( create(:geo_node_status, :healthy, cursor_last_event_id: events[3].id, geo_node_id: secondary.id)
build(:geo_node_status, :healthy, cursor_last_event_id: events[3]), create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary2.id)
build(:geo_node_status, :healthy, cursor_last_event_id: events.last) expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', geo_event_log_id: events[3].id)
)
expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', geo_event_log_id: events[3])
expect { worker.perform }.to change { Geo::EventLog.count }.by(-3) expect { worker.perform }.to change { Geo::EventLog.count }.by(-3)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment