Commit 53a5489f authored by Stan Hu's avatar Stan Hu Committed by Nick Thomas

Add API support and storage for GeoNode status in the database

parent 032a2e36
class Geo::FileRegistry < Geo::BaseRegistry
scope :failed, -> { where(success: false) }
scope :synced, -> { where(success: true) }
scope :lfs_objects, -> { where(file_type: :lfs) }
scope :attachments, -> { where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES) }
end
......@@ -6,6 +6,7 @@ class GeoNode < ActiveRecord::Base
has_many :geo_node_namespace_links
has_many :namespaces, through: :geo_node_namespace_links
has_one :status, class_name: 'GeoNodeStatus'
default_values schema: lambda { Gitlab.config.gitlab.protocol },
host: lambda { Gitlab.config.gitlab.host },
......@@ -41,7 +42,9 @@ class GeoNode < ActiveRecord::Base
encode: true
def current?
Gitlab::Geo.current_node == self
host == Gitlab.config.gitlab.host &&
port == Gitlab.config.gitlab.port &&
relative_url_root == Gitlab.config.gitlab.relative_url_root
end
def secondary?
......@@ -181,6 +184,43 @@ class GeoNode < ActiveRecord::Base
end
end
def lfs_objects_synced_count
return unless secondary?
relation = Geo::FileRegistry.lfs_objects.synced
if restricted_project_ids
relation = relation.where(file_id: lfs_objects.pluck(:id))
end
relation.count
end
def lfs_objects_failed_count
return unless secondary?
Geo::FileRegistry.lfs_objects.failed.count
end
def attachments_synced_count
return unless secondary?
upload_ids = uploads.pluck(:id)
synced_ids = Geo::FileRegistry.attachments.synced.pluck(:file_id)
(synced_ids & upload_ids).length
end
def attachments_failed_count
return unless secondary?
Geo::FileRegistry.attachments.failed.count
end
def find_or_build_status
status || build_status
end
private
def geo_api_url(suffix)
......
class GeoNodeStatus
include ActiveModel::Model
class GeoNodeStatus < ActiveRecord::Base
belongs_to :geo_node
attr_accessor :id, :success
attr_writer :health
# Whether we were successful in reaching this node
attr_accessor :success
def health
@health ||= HealthCheck::Utils.process_checks(['geo'])
rescue NotImplementedError => e
@health = e.to_s
end
def healthy?
health.blank?
end
def db_replication_lag_seconds
return @db_replication_lag_seconds if defined?(@db_replication_lag_seconds)
@db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds if Gitlab::Geo.secondary?
end
# Be sure to keep this consistent with Prometheus naming conventions
PROMETHEUS_METRICS = {
db_replication_lag_seconds: 'Database replication lag (seconds)',
repositories_count: 'Total number of repositories available on primary',
repositories_synced_count: 'Number of repositories synced on secondary',
repositories_failed_count: 'Number of repositories failed to sync on secondary',
lfs_objects_count: 'Total number of LFS objects available on primary',
lfs_objects_synced_count: 'Number of LFS objects synced on secondary',
lfs_objects_failed_count: 'Number of LFS objects failed to sync on secondary',
attachments_count: 'Total number of file attachments available on primary',
attachments_synced_count: 'Number of attachments synced on secondary',
attachments_failed_count: 'Number of attachments failed to sync on secondary',
last_event_id: 'Database ID of the latest event log entry on the primary',
last_event_timestamp: 'Time of the latest event log entry on the primary',
cursor_last_event_id: 'Last database ID of the event log processed by the secondary',
cursor_last_event_timestamp: 'Time of the event log processed by the secondary',
last_successful_status_check_timestamp: 'Time when Geo node status was updated internally',
status_message: 'Summary of health status'
}.freeze
def db_replication_lag_seconds=(value)
@db_replication_lag_seconds = value
end
def last_event_id
@last_event_id ||= latest_event&.id
end
def last_event_id=(value)
@last_event_id = value
end
def self.current_node_status
current_node = Gitlab::Geo.current_node
def last_event_timestamp
@last_event_timestamp ||= Geo::EventLog.latest_event&.created_at&.to_i
end
return unless current_node
def last_event_timestamp=(value)
@last_event_timestamp = value
end
status = current_node.find_or_build_status
def cursor_last_event_id
return @cursor_last_event_id if defined?(@cursor_last_event_id)
# Since we're retrieving our own data, we mark this as a successful load
status.success = true
status.load_data_from_current_node
@cursor_last_event_id = cursor_last_processed&.event_id if Gitlab::Geo.secondary?
end
status.save if Gitlab::Geo.primary?
def cursor_last_event_id=(value)
@cursor_last_event_id = value
status
end
def cursor_last_event_timestamp
event_id = cursor_last_event_id
return unless event_id
def self.from_json(json_data)
json_data.slice!(*allowed_params)
@cursor_last_event_timestamp ||= Geo::EventLog.find_by(id: event_id)&.created_at&.to_i
GeoNodeStatus.new(json_data)
end
def cursor_last_event_timestamp=(value)
@cursor_last_event_timestamp = value
end
def repositories_count
@repositories_count ||= repositories.count
def self.allowed_params
excluded_params = %w(id last_successful_status_check_at created_at updated_at).freeze
extra_params = %w(success health last_event_timestamp cursor_last_event_timestamp).freeze
self.column_names - excluded_params + extra_params
end
def repositories_count=(value)
@repositories_count = value.to_i
def load_data_from_current_node
self.status_message =
begin
HealthCheck::Utils.process_checks(['geo'])
rescue NotImplementedError => e
e.to_s
end
def repositories_synced_count
@repositories_synced_count ||= project_registries.synced.count
end
latest_event = Geo::EventLog.latest_event
self.last_event_id = latest_event&.id
self.last_event_date = latest_event&.created_at
self.repositories_count = geo_node.projects.count
self.lfs_objects_count = geo_node.lfs_objects.count
self.attachments_count = geo_node.uploads.count
self.last_successful_status_check_at = Time.now
def repositories_synced_count=(value)
@repositories_synced_count = value.to_i
if Gitlab::Geo.secondary?
self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds
self.cursor_last_event_id = Geo::EventLogState.last_processed&.event_id
self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at
self.repositories_synced_count = geo_node.project_registries.synced.count
self.repositories_failed_count = geo_node.project_registries.failed.count
self.lfs_objects_synced_count = geo_node.lfs_objects_synced_count
self.lfs_objects_failed_count = geo_node.lfs_objects_failed_count
self.attachments_synced_count = geo_node.attachments_synced_count
self.attachments_failed_count = geo_node.attachments_failed_count
end
def repositories_synced_in_percentage
sync_percentage(repositories_count, repositories_synced_count)
self
end
def repositories_failed_count
@repositories_failed_count ||= project_registries.failed.count
end
alias_attribute :health, :status_message
def repositories_failed_count=(value)
@repositories_failed_count = value.to_i
def healthy?
status_message.blank? || status_message == 'Healthy'.freeze
end
def lfs_objects_count
@lfs_objects_count ||= lfs_objects.count
def last_successful_status_check_timestamp
self.last_successful_status_check_at.to_i
end
def lfs_objects_count=(value)
@lfs_objects_count = value.to_i
def last_successful_status_check_timestamp=(value)
self.last_successful_status_check_at = Time.at(value)
end
def lfs_objects_synced_count
@lfs_objects_synced_count ||= begin
relation = Geo::FileRegistry.synced.where(file_type: :lfs)
if Gitlab::Geo.current_node.restricted_project_ids
relation = relation.where(file_id: lfs_objects.pluck(:id))
def last_event_timestamp
self.last_event_date.to_i
end
relation.count
end
def last_event_timestamp=(value)
self.last_event_date = Time.at(value)
end
def lfs_objects_synced_count=(value)
@lfs_objects_synced_count = value.to_i
def cursor_last_event_timestamp
self.cursor_last_event_date.to_i
end
def lfs_objects_failed_count
@lfs_objects_failed_count ||= Geo::FileRegistry.failed.where(file_type: :lfs).count
def cursor_last_event_timestamp=(value)
self.cursor_last_event_date = Time.at(value)
end
def lfs_objects_failed_count=(value)
@lfs_objects_failed_count = value.to_i
def repositories_synced_in_percentage
sync_percentage(repositories_count, repositories_synced_count)
end
def lfs_objects_synced_in_percentage
sync_percentage(lfs_objects_count, lfs_objects_synced_count)
end
def attachments_count
@attachments_count ||= attachments.count
end
def attachments_count=(value)
@attachments_count = value.to_i
end
def attachments_synced_count
@attachments_synced_count ||= begin
upload_ids = attachments.pluck(:id)
synced_ids = Geo::FileRegistry.synced.where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES).pluck(:file_id)
(synced_ids & upload_ids).length
end
end
def attachments_synced_count=(value)
@attachments_synced_count = value.to_i
end
def attachments_failed_count
@attachments_failed_count ||= Geo::FileRegistry.failed.where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES).count
end
def attachments_failed_count=(value)
@attachments_failed_count = value.to_i
end
def attachments_synced_in_percentage
sync_percentage(attachments_count, attachments_synced_count)
end
......@@ -170,28 +136,4 @@ class GeoNodeStatus
(synced.to_f / total.to_f) * 100.0
end
def attachments
@attachments ||= Gitlab::Geo.current_node.uploads
end
def lfs_objects
@lfs_objects ||= Gitlab::Geo.current_node.lfs_objects
end
def project_registries
@project_registries ||= Gitlab::Geo.current_node.project_registries
end
def repositories
@repositories ||= Gitlab::Geo.current_node.projects
end
def latest_event
Geo::EventLog.latest_event
end
def cursor_last_processed
Geo::EventLogState.last_processed
end
end
class GeoNodeStatusEntity < Grape::Entity
include ActionView::Helpers::NumberHelper
expose :id
expose :geo_node_id
expose :healthy?, as: :healthy
expose :health do |node|
......@@ -35,4 +35,6 @@ class GeoNodeStatusEntity < Grape::Entity
expose :last_event_timestamp
expose :cursor_last_event_id
expose :cursor_last_event_timestamp
expose :last_successful_status_check_timestamp
end
......@@ -23,6 +23,8 @@ module Geo
end
def fetch_geo_node_metrics(node)
return unless node.enabled?
status = node_status(node)
unless status.success
......@@ -30,30 +32,29 @@ module Geo
return
end
NodeStatusService::STATUS_DATA.each do |key, docstring|
value = status[key]
update_db_metrics(node, status) if Gitlab::Geo.primary?
update_prometheus_metrics(node, status)
end
def update_db_metrics(node, status)
db_status = node.find_or_build_status
db_status.update_attributes(status.attributes.compact.merge(last_successful_status_check_at: Time.now.utc))
end
def update_prometheus_metrics(node, status)
GeoNodeStatus::PROMETHEUS_METRICS.each do |column, docstring|
value = status[column]
next unless value.is_a?(Integer)
gauge = Gitlab::Metrics.gauge(gauge_metric_name(key), docstring, {}, :max)
gauge = Gitlab::Metrics.gauge(gauge_metric_name(column), docstring, {}, :max)
gauge.set(metric_labels(node), value)
end
set_last_updated_at(node)
end
def node_status(node)
NodeStatusService.new.call(node)
end
def set_last_updated_at(node)
gauge = Gitlab::Metrics.gauge(
:geo_status_last_updated_timestamp,
'UNIX timestamp of last time Geo node status was updated internally',
{},
:max)
gauge.set(metric_labels(node), Time.now.to_i)
NodeStatusFetchService.new.call(node)
end
def increment_failed_status_counter(node)
......
module Geo
class NodeStatusService
class NodeStatusFetchService
include Gitlab::CurrentSettings
include HTTParty
STATUS_DATA = {
health: 'Summary of health status',
db_replication_lag_seconds: 'Database replication lag (seconds)',
repositories_count: 'Total number of repositories available on primary',
repositories_synced_count: 'Number of repositories synced on secondary',
repositories_failed_count: 'Number of repositories failed to sync on secondary',
lfs_objects_count: 'Total number of LFS objects available on primary',
lfs_objects_synced_count: 'Number of LFS objects synced on secondary',
lfs_objects_failed_count: 'Number of LFS objects failed to sync on secondary',
attachments_count: 'Total number of file attachments available on primary',
attachments_synced_count: 'Number of attachments synced on secondary',
attachments_failed_count: 'Number of attachments failed to sync on secondary',
last_event_id: 'Database ID of the latest event log entry on the primary',
last_event_timestamp: 'UNIX timestamp of the latest event log entry on the primary',
cursor_last_event_id: 'Last database ID of the event log processed by the secondary',
cursor_last_event_timestamp: 'Last UNIX timestamp of the event log processed by the secondary'
}.freeze
def call(geo_node)
data = { id: geo_node.id }
return GeoNodeStatus.current_node_status if geo_node.current?
data = { success: false }
begin
response = self.class.get(geo_node.status_url, headers: headers, timeout: timeout)
data[:success] = response.success?
if response.success?
data.merge!(response.parsed_response.symbolize_keys.slice(*STATUS_DATA.keys))
data.merge!(response.parsed_response)
else
message = "Could not connect to Geo node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response
......@@ -51,11 +35,7 @@ module Geo
data[:health] = e.message
end
GeoNodeStatus.new(data)
end
def status_keys
STATUS_DATA.stringify_keys.keys
GeoNodeStatus.from_json(data.as_json)
end
private
......
......@@ -22,7 +22,7 @@ module Geo
end
cursor_last_event_ids = Gitlab::Geo.secondary_nodes.map do |node|
Geo::NodeStatusService.new.call(node).cursor_last_event_id
node.status&.cursor_last_event_id
end
if cursor_last_event_ids.include?(nil)
......
class CreateGeoNodeStatuses < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
create_table :geo_node_statuses do |t|
t.references :geo_node, null: false, index: { unique: true }, foreign_key: { on_delete: :cascade }
t.integer :db_replication_lag_seconds
t.integer :repositories_count
t.integer :repositories_synced_count
t.integer :repositories_failed_count
t.integer :lfs_objects_count
t.integer :lfs_objects_synced_count
t.integer :lfs_objects_failed_count
t.integer :attachments_count
t.integer :attachments_synced_count
t.integer :attachments_failed_count
t.integer :last_event_id
t.datetime_with_timezone :last_event_date
t.integer :cursor_last_event_id
t.datetime_with_timezone :cursor_last_event_date
t.datetime_with_timezone :created_at, null: false
t.datetime_with_timezone :updated_at, null: false
t.datetime_with_timezone :last_successful_status_check_at
t.string :status_message
end
end
end
......@@ -802,6 +802,30 @@ ActiveRecord::Schema.define(version: 20171107090120) do
add_index "geo_node_namespace_links", ["geo_node_id", "namespace_id"], name: "index_geo_node_namespace_links_on_geo_node_id_and_namespace_id", unique: true, using: :btree
add_index "geo_node_namespace_links", ["geo_node_id"], name: "index_geo_node_namespace_links_on_geo_node_id", using: :btree
create_table "geo_node_statuses", force: :cascade do |t|
t.integer "geo_node_id", null: false
t.integer "db_replication_lag_seconds"
t.integer "repositories_count"
t.integer "repositories_synced_count"
t.integer "repositories_failed_count"
t.integer "lfs_objects_count"
t.integer "lfs_objects_synced_count"
t.integer "lfs_objects_failed_count"
t.integer "attachments_count"
t.integer "attachments_synced_count"
t.integer "attachments_failed_count"
t.integer "last_event_id"
t.datetime_with_timezone "last_event_date"
t.integer "cursor_last_event_id"
t.datetime_with_timezone "cursor_last_event_date"
t.datetime_with_timezone "created_at", null: false
t.datetime_with_timezone "updated_at", null: false
t.datetime_with_timezone "last_successful_status_check_at"
t.string "status_message"
end
add_index "geo_node_statuses", ["geo_node_id"], name: "index_geo_node_statuses_on_geo_node_id", unique: true, using: :btree
create_table "geo_nodes", force: :cascade do |t|
t.string "schema"
t.string "host"
......@@ -2272,6 +2296,7 @@ ActiveRecord::Schema.define(version: 20171107090120) do
add_foreign_key "geo_event_log", "geo_repository_updated_events", column: "repository_updated_event_id", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_node_namespace_links", "namespaces", on_delete: :cascade
add_foreign_key "geo_node_statuses", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_repositories_changed_events", "geo_nodes", on_delete: :cascade
add_foreign_key "geo_repository_created_events", "projects", on_delete: :cascade
add_foreign_key "geo_repository_renamed_events", "projects", on_delete: :cascade
......
......@@ -69,8 +69,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
|geo_last_event_timestamp | Gauge | 10.2 | UNIX timestamp of the latest event log entry on the primary | url
|geo_cursor_last_event_id | Gauge | 10.2 | Last database ID of the event log processed by the secondary | url
|geo_cursor_last_event_timestamp | Gauge | 10.2 | Last UNIX timestamp of the event log processed by the secondary | url
|geo_status_last_updated_timestamp | Gauge | 10.2 | Last timestamp when the status was successfully updated | url
|geo_status_failed_total | Counter | 10.2 | Number of times retrieving the status from the Geo Node failed | url
|geo_last_successful_status_check_timestamp | Gauge | Last timestamp when the status was successfully updated | url
## Metrics shared directory
......
......@@ -68,7 +68,7 @@ class Admin::GeoNodesController < Admin::ApplicationController
end
def status
status = Geo::NodeStatusService.new.call(@node)
status = Geo::NodeStatusFetchService.new.call(@node)
respond_to do |format|
format.json do
......
......@@ -126,6 +126,7 @@ module API
mount ::API::Files
mount ::API::Groups
mount ::API::Geo
mount ::API::GeoNodes
mount ::API::Internal
mount ::API::Issues
mount ::API::IssueLinks
......
......@@ -1018,24 +1018,14 @@ module API
expose :active?, as: :active
end
class GeoNodeStatus < Grape::Entity
class GeoNode < Grape::Entity
expose :id
expose :db_replication_lag_seconds
expose :health
expose :healthy?, as: :healthy
expose :repositories_count
expose :repositories_synced_count
expose :repositories_failed_count
expose :lfs_objects_count
expose :lfs_objects_synced_count
expose :lfs_objects_failed_count
expose :attachments_count
expose :attachments_synced_count
expose :attachments_failed_count
expose :last_event_id
expose :last_event_timestamp
expose :cursor_last_event_id
expose :cursor_last_event_timestamp
expose :url
expose :primary?, as: :primary
expose :enabled
expose :files_max_capacity
expose :repos_max_capacity
expose :clone_protocol
end
class PersonalAccessToken < Grape::Entity
......
......@@ -36,7 +36,8 @@ module API
authenticate_by_gitlab_geo_node_token!
require_node_to_be_secondary!
present GeoNodeStatus.new(id: Gitlab::Geo.current_node.id), with: Entities::GeoNodeStatus
status = ::GeoNodeStatus.current_node_status
present status, with: GeoNodeStatusEntity
end
end
......
module API
class GeoNodes < Grape::API
include PaginationParams
include APIGuard
before { authenticated_as_admin! }
resource :geo_nodes do
# Get all Geo node information
#
# Example request:
# GET /geo_nodes
desc 'Retrieves the available Geo nodes' do
success Entities::GeoNode
end
get do
nodes = GeoNode.all
present paginate(nodes), with: Entities::GeoNode
end
# Get all Geo node statuses
#
# Example request:
# GET /geo_nodes/status
desc 'Get status for all Geo nodes' do
success GeoNodeStatusEntity
end
get '/status' do
status = GeoNodeStatus.all
present paginate(status), with: GeoNodeStatusEntity
end
# Get all Geo node information
#
# Example request:
# GET /geo_nodes/:id
desc 'Get a single GeoNode' do
success Entities::GeoNode
end
params do
requires :id, type: Integer, desc: 'The ID of the node'
end
get ':id' do
node = GeoNode.find_by(id: params[:id])
not_found!('GeoNode') unless node
present node, with: Entities::GeoNode
end
# Get Geo metrics for a single node
#
# Example request:
# GET /geo_nodes/:id/status
desc 'Get metrics for a single Geo node' do
success Entities::GeoNode
end
params do
requires :id, type: Integer, desc: 'The ID of the node'
end
get ':id/status' do
geo_node = GeoNode.find(params[:id])
not_found('Geo node not found') unless geo_node
status =
if geo_node.current?
GeoNodeStatus.current_node_status
else
geo_node.status
end
not_found!('Status for Geo node not found') unless status
present status, with: ::GeoNodeStatusEntity
end
end
end
end
......@@ -279,11 +279,11 @@ describe Admin::GeoNodesController, :postgresql do
end
context 'with add-on license' do
let(:geo_node_status) { build(:geo_node_status, :healthy) }
let(:geo_node_status) { build(:geo_node_status, :healthy, geo_node: geo_node) }
before do
allow(Gitlab::Geo).to receive(:license_allows?).and_return(true)
allow_any_instance_of(Geo::NodeStatusService).to receive(:call).and_return(geo_node_status)
allow_any_instance_of(Geo::NodeStatusFetchService).to receive(:call).and_return(geo_node_status)
end
it 'returns the status' do
......
......@@ -2,6 +2,7 @@ module EE
module GeoHelpers
def stub_current_geo_node(node)
allow(::Gitlab::Geo).to receive(:current_node).and_return(node)
allow(node).to receive(:current?).and_return(true)
end
end
end
FactoryGirl.define do
factory :geo_node_status do
skip_create
sequence(:id)
geo_node
trait :healthy do
health nil
......
{
"type": "object",
"required" : [
"id",
"url",
"primary",
"enabled",
"files_max_capacity",
"repos_max_capacity",
"clone_protocol"
],
"properties" : {
"id": { "type": "integer" },
"url": { "type": ["string", "null"] },
"primary": { "type": "boolean" },
"enabled": { "type": "boolean" },
"files_max_capacity": { "type": "integer" },
"repos_max_capacity": { "type": "integer" },
"clone_protocol": { "type": ["string"] }
},
"additionalProperties": false
}
{
"type": "object",
"required" : [
"id",
"geo_node_id",
"healthy",
"health",
"attachments_count",
......@@ -20,9 +20,9 @@
"cursor_last_event_timestamp"
],
"properties" : {
"id": { "type": "integer" },
"geo_node_id": { "type": "integer" },
"healthy": { "type": "boolean" },
"health": { "type": "string" },
"health": { "type": ["string", "null"] },
"attachments_count": { "type": "integer" },
"attachments_failed_count": { "type": "integer" },
"attachments_synced_count": { "type": "integer" },
......@@ -39,7 +39,8 @@
"last_event_id": { "type": ["integer", "null"] },
"last_event_timestamp": { "type": ["integer", "null"] },
"cursor_last_event_id": { "type": ["integer", "null"] },
"cursor_last_event_timestamp": { "type": ["integer", "null"] }
"cursor_last_event_timestamp": { "type": ["integer", "null"] },
"last_successful_status_check_timestamp": { "type": ["integer", "null"] }
},
"additionalProperties": false
}
{
"type": "array",
"items": { "$ref": "geo_node_status.json" }
}
{
"type": "array",
"items": { "$ref": "geo_node.json" }
}
......@@ -138,7 +138,7 @@ describe GeoNode, type: :model do
end
it 'returns false when node is not the current node' do
stub_current_geo_node(double)
subject.port = Gitlab.config.gitlab.port + 1
expect(subject.current?).to eq false
end
......@@ -238,6 +238,18 @@ describe GeoNode, type: :model do
end
end
describe '#find_or_build_status' do
it 'returns a new status' do
status = new_node.find_or_build_status
expect(status).to be_a(GeoNodeStatus)
status.save
expect(new_node.find_or_build_status).to eq(status)
end
end
describe '#oauth_callback_url' do
let(:oauth_callback_url) { 'https://localhost:3000/gitlab/oauth/geo/callback' }
......@@ -332,6 +344,78 @@ describe GeoNode, type: :model do
end
end
describe '#lfs_objects_synced_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.lfs_objects_synced_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.lfs_objects_synced_count).to eq(0)
end
end
end
describe '#lfs_objects_failed_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.lfs_objects_failed_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.lfs_objects_failed_count).to eq(0)
end
end
end
describe '#attachments_synced_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.attachments_synced_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.attachments_synced_count).to eq(0)
end
end
end
describe '#attachments_failed_count' do
context 'primary node' do
subject { primary_node }
it 'returns nil' do
expect(subject.attachments_failed_count).to be_nil
end
end
context 'secondary node' do
subject { node }
it 'returns a value' do
expect(subject.attachments_failed_count).to eq(0)
end
end
end
describe '#geo_node_key' do
context 'primary node' do
it 'cannot be set' do
......
require 'spec_helper'
describe GeoNodeStatus do
set(:geo_node) { create(:geo_node, :primary) }
describe GeoNodeStatus, :geo do
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
set(:group) { create(:group) }
set(:project_1) { create(:project, group: group) }
set(:project_2) { create(:project, group: group) }
set(:project_3) { create(:project) }
set(:project_4) { create(:project) }
subject { described_class.new }
subject { described_class.current_node_status }
before do
stub_current_geo_node(secondary)
end
describe '#healthy?' do
context 'when health is blank' do
it 'returns true' do
subject.health = ''
subject.status_message = ''
expect(subject.healthy?).to eq true
expect(subject.healthy?).to be true
end
end
context 'when health is present' do
it 'returns true' do
subject.status_message = 'Healthy'
expect(subject.healthy?).to be true
end
it 'returns false' do
subject.health = 'something went wrong'
subject.status_message = 'something went wrong'
expect(subject.healthy?).to eq false
expect(subject.healthy?).to be false
end
end
end
describe '#health' do
describe '#status_message' do
it 'delegates to the HealthCheck' do
subject.health = nil
expect(HealthCheck::Utils).to receive(:process_checks).with(['geo']).once
subject.health
subject
end
end
......@@ -53,27 +65,29 @@ describe GeoNodeStatus do
it 'does not count synced files that were replaced' do
user = create(:user, avatar: fixture_file_upload(Rails.root + 'spec/fixtures/dk.png', 'image/png'))
subject = described_class.new
expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new
subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(1)
user.update(avatar: fixture_file_upload(Rails.root + 'spec/fixtures/rails_sample.jpg', 'image/jpg'))
subject = described_class.new
subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(0)
upload = Upload.find_by(model: user, uploader: 'AvatarUploader')
create(:geo_file_registry, :avatar, file_id: upload.id)
subject = described_class.new
subject = described_class.current_node_status
expect(subject.attachments_count).to eq(1)
expect(subject.attachments_synced_count).to eq(1)
end
......@@ -116,7 +130,7 @@ describe GeoNodeStatus do
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
secondary.update_attribute(:namespaces, [group])
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
......@@ -133,6 +147,7 @@ describe GeoNodeStatus do
end
it "doesn't attempt to set replication lag if primary" do
stub_current_geo_node(primary)
expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds)
expect(subject.db_replication_lag_seconds).to eq(nil)
......@@ -168,14 +183,14 @@ describe GeoNodeStatus do
end
it 'returns the right percentage with no group restrictions' do
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id)
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id, success: true)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(25)
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id)
secondary.update_attribute(:namespaces, [group])
create(:geo_file_registry, :lfs, file_id: lfs_object_project.lfs_object_id, success: true)
expect(subject.lfs_objects_synced_in_percentage).to be_within(0.0001).of(50)
end
......@@ -192,7 +207,7 @@ describe GeoNodeStatus do
end
it 'returns the right number of failed repos with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
secondary.update_attribute(:namespaces, [group])
expect(subject.repositories_failed_count).to eq(1)
end
......@@ -210,17 +225,17 @@ describe GeoNodeStatus do
end
it 'returns the right percentage with group restrictions' do
geo_node.update_attribute(:namespaces, [group])
secondary.update_attribute(:namespaces, [group])
create(:geo_project_registry, :synced, project: project_1)
expect(subject.repositories_synced_in_percentage).to be_within(0.0001).of(50)
end
end
describe '#last_event_id and #last_event_timestamp' do
describe '#last_event_id and #last_event_date' do
it 'returns nil when no events are available' do
expect(subject.last_event_id).to be_nil
expect(subject.last_event_timestamp).to be_nil
expect(subject.last_event_date).to be_nil
end
it 'returns the latest event' do
......@@ -228,14 +243,14 @@ describe GeoNodeStatus do
event = create(:geo_event_log, created_at: created_at)
expect(subject.last_event_id).to eq(event.id)
expect(subject.last_event_timestamp).to eq(created_at.to_i)
expect(subject.last_event_date).to eq(created_at)
end
end
describe '#cursor_last_event_id and #cursor_last_event_timestamp' do
describe '#cursor_last_event_id and #cursor_last_event_date' do
it 'returns nil when no events are available' do
expect(subject.cursor_last_event_id).to be_nil
expect(subject.cursor_last_event_timestamp).to be_nil
expect(subject.cursor_last_event_date).to be_nil
end
it 'returns the latest event ID if secondary' do
......@@ -246,9 +261,10 @@ describe GeoNodeStatus do
end
it "doesn't attempt to retrieve cursor if primary" do
stub_current_geo_node(primary)
create(:geo_event_log_state)
expect(subject.cursor_last_event_timestamp).to eq(nil)
expect(subject.cursor_last_event_date).to eq(nil)
expect(subject.cursor_last_event_id).to eq(nil)
end
end
......@@ -264,40 +280,40 @@ describe GeoNodeStatus do
end
end
context 'when no values are available' do
it 'returns 0 for each attribute' do
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag_seconds).and_return(nil)
subject.attachments_count = nil
subject.attachments_synced_count = nil
subject.attachments_failed_count = nil
subject.lfs_objects_count = nil
subject.lfs_objects_synced_count = nil
subject.lfs_objects_failed_count = nil
subject.repositories_count = nil
subject.repositories_synced_count = nil
subject.repositories_failed_count = nil
subject.last_event_id = nil
subject.last_event_timestamp = nil
subject.cursor_last_event_id = nil
subject.cursor_last_event_timestamp = nil
expect(subject.db_replication_lag_seconds).to be_nil
expect(subject.repositories_count).to be_zero
expect(subject.repositories_synced_count).to be_zero
expect(subject.repositories_synced_in_percentage).to be_zero
expect(subject.repositories_failed_count).to be_zero
expect(subject.lfs_objects_count).to be_zero
expect(subject.lfs_objects_synced_count).to be_zero
expect(subject.lfs_objects_failed_count).to be_zero
expect(subject.lfs_objects_synced_in_percentage).to be_zero
expect(subject.attachments_count).to be_zero
expect(subject.attachments_synced_count).to be_zero
expect(subject.attachments_failed_count).to be_zero
expect(subject.attachments_synced_in_percentage).to be_zero
expect(subject.last_event_id).to be_nil
expect(subject.last_event_timestamp).to be_nil
expect(subject.cursor_last_event_id).to be_nil
expect(subject.cursor_last_event_timestamp).to be_nil
shared_examples 'timestamp parameters' do |timestamp_column, date_column|
it 'returns the value it was assigned via UNIX timestamp' do
now = Time.now.beginning_of_day.utc
subject.update_attribute(timestamp_column, now.to_i)
expect(subject.public_send(date_column)).to eq(now)
expect(subject.public_send(timestamp_column)).to eq(now.to_i)
end
end
describe '#last_successful_status_check_timestamp' do
it_behaves_like 'timestamp parameters', :last_successful_status_check_timestamp, :last_successful_status_check_at
end
describe '#last_event_timestamp' do
it_behaves_like 'timestamp parameters', :last_event_timestamp, :last_event_date
end
describe '#cursor_last_event_timestamp' do
it_behaves_like 'timestamp parameters', :cursor_last_event_timestamp, :cursor_last_event_date
end
describe '#from_json' do
it 'returns a new GeoNodeStatus excluding parameters' do
status = create(:geo_node_status)
data = status.as_json
data[:id] = 10000
result = GeoNodeStatus.from_json(data)
expect(result.id).to be_nil
expect(result.attachments_count).to eq(status.attachments_count)
expect(result.cursor_last_event_date).to eq(status.cursor_last_event_date)
end
end
end
require 'spec_helper'
describe API::GeoNodes, :geo, api: true do
include ApiHelpers
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
set(:another_secondary) { create(:geo_node) }
set(:secondary_status) { create(:geo_node_status, :healthy, geo_node_id: secondary.id) }
set(:another_secondary_status) { create(:geo_node_status, :healthy, geo_node_id: another_secondary.id) }
let(:admin) { create(:admin) }
let(:user) { create(:user) }
describe 'GET /geo_nodes' do
it 'retrieves the Geo nodes if admin is logged in' do
get api("/geo_nodes", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_nodes')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/:id' do
it 'retrieves the Geo nodes if admin is logged in' do
get api("/geo_nodes/#{primary.id}", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/status' do
it 'retrieves the Geo nodes status if admin is logged in' do
get api("/geo_nodes/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_statuses')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
describe 'GET /geo_nodes/:id/status' do
it 'retrieves the Geo nodes status if admin is logged in' do
stub_current_geo_node(primary)
expect(GeoNodeStatus).not_to receive(:current_node_status)
get api("/geo_nodes/#{secondary.id}/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_status')
end
it 'fetches the current node status' do
stub_current_geo_node(secondary)
expect(GeoNode).to receive(:find).and_return(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
get api("/geo_nodes/#{secondary.id}/status", admin)
expect(response.status).to eq 200
expect(response).to match_response_schema('geo_node_status')
end
it 'denies access if not admin' do
get api('/geo_nodes', user)
expect(response.status).to eq 403
end
end
end
......@@ -3,7 +3,7 @@ require 'spec_helper'
describe GeoNodeStatusEntity, :postgresql do
let(:geo_node_status) do
GeoNodeStatus.new(
id: 1,
geo_node_id: 1,
health: '',
attachments_count: 329,
attachments_failed_count: 25,
......@@ -13,7 +13,8 @@ describe GeoNodeStatusEntity, :postgresql do
lfs_objects_synced_count: 123,
repositories_count: 10,
repositories_synced_count: 5,
repositories_failed_count: 0
repositories_failed_count: 0,
last_successful_status_check_timestamp: Time.now.beginning_of_day
)
end
......@@ -27,7 +28,7 @@ describe GeoNodeStatusEntity, :postgresql do
subject { entity.as_json }
it { is_expected.to have_key(:id) }
it { is_expected.to have_key(:geo_node_id) }
it { is_expected.to have_key(:healthy) }
it { is_expected.to have_key(:health) }
it { is_expected.to have_key(:attachments_count) }
......@@ -42,6 +43,7 @@ describe GeoNodeStatusEntity, :postgresql do
it { is_expected.to have_key(:repositories_failed_count) }
it { is_expected.to have_key(:repositories_synced_count)}
it { is_expected.to have_key(:repositories_synced_in_percentage) }
it { is_expected.to have_key(:last_successful_status_check_timestamp) }
describe '#healthy' do
context 'when node is healthy' do
......
......@@ -5,16 +5,12 @@ describe Geo::MetricsUpdateService, :geo do
subject { described_class.new }
let(:timestamp) { Time.now.to_i }
let(:event_date) { Time.now.utc }
before do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true)
end
describe '#execute' do
before do
data = {
health: 'OK',
let(:data) do
{
success: true,
status_message: nil,
db_replication_lag_seconds: 0,
repositories_count: 10,
repositories_synced_count: 1,
......@@ -26,12 +22,20 @@ describe Geo::MetricsUpdateService, :geo do
attachments_synced_count: 30,
attachments_failed_count: 25,
last_event_id: 2,
last_event_timestamp: timestamp,
last_event_date: event_date,
cursor_last_event_id: 1,
cursor_last_event_timestamp: timestamp
cursor_last_event_date: event_date
}
end
before do
allow(Gitlab::Metrics).to receive(:prometheus_metrics_enabled?).and_return(true)
end
describe '#execute' do
before do
request = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Geo::NodeStatusService).to receive(:get).and_return(request)
allow(Geo::NodeStatusFetchService).to receive(:get).and_return(request)
end
context 'when node is the primary' do
......@@ -51,6 +55,23 @@ describe Geo::MetricsUpdateService, :geo do
expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10)
expect(Gitlab::Metrics.registry.get(:geo_repositories).get({ url: secondary.url })).to eq(10)
end
it 'updates the GeoNodeStatus entry' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(2)
status = secondary.status.load_data_from_current_node
expect(status.geo_node_id).to eq(secondary.id)
expect(status.last_successful_status_check_at).not_to be_nil
end
it 'updates only the active node' do
secondary.update_attributes(enabled: false)
expect { subject.execute }.to change { GeoNodeStatus.count }.by(1)
expect(another_secondary.status).not_to be_nil
end
end
context 'when node is a secondary' do
......@@ -60,6 +81,7 @@ describe Geo::MetricsUpdateService, :geo do
before do
stub_current_geo_node(secondary)
allow(subject).to receive(:node_status).and_return(GeoNodeStatus.new(data))
end
it 'adds gauges for various metrics' do
......@@ -76,10 +98,10 @@ describe Geo::MetricsUpdateService, :geo do
expect(metric_value(:geo_attachments_synced)).to eq(30)
expect(metric_value(:geo_attachments_failed)).to eq(25)
expect(metric_value(:geo_last_event_id)).to eq(2)
expect(metric_value(:geo_last_event_timestamp)).to eq(timestamp.to_i)
expect(metric_value(:geo_last_event_timestamp)).to eq(event_date.to_i)
expect(metric_value(:geo_cursor_last_event_id)).to eq(1)
expect(metric_value(:geo_cursor_last_event_timestamp)).to eq(timestamp.to_i)
expect(metric_value(:geo_status_last_updated_timestamp)).to be_truthy
expect(metric_value(:geo_cursor_last_event_timestamp)).to eq(event_date.to_i)
expect(metric_value(:geo_last_successful_status_check_timestamp)).to be_truthy
end
it 'increments a counter when metrics fail to retrieve' do
......@@ -91,8 +113,12 @@ describe Geo::MetricsUpdateService, :geo do
expect { subject.execute }.to change { metric_value(:geo_status_failed_total) }.by(1)
end
it 'does not create GeoNodeStatus entries' do
expect { subject.execute }.to change { GeoNodeStatus.count }.by(0)
end
def metric_value(metric_name)
Gitlab::Metrics.registry.get(metric_name).get({ url: secondary.url })
Gitlab::Metrics.registry.get(metric_name)&.get({ url: secondary.url })
end
end
end
......
require 'spec_helper'
describe Geo::NodeStatusService do
describe Geo::NodeStatusFetchService, :geo do
include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary) }
set(:secondary) { create(:geo_node) }
subject { described_class.new }
describe '#status_keys' do
it 'matches the serializer keys' do
exceptions = %w[
id
healthy
repositories_synced_in_percentage
lfs_objects_synced_in_percentage
attachments_synced_in_percentage
]
expected = GeoNodeStatusEntity
.new(GeoNodeStatus.new)
.as_json
.keys
.map(&:to_s) - exceptions
expect(subject.status_keys).to match_array(expected)
end
end
describe '#call' do
it 'parses a 401 response' do
request = double(success?: false,
......@@ -36,10 +18,40 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest")
expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest")
end
it 'always reload GeoNodeStatus if current node' do
stub_current_geo_node(secondary)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
status = subject.call(secondary)
expect(status).to be_a(GeoNodeStatus)
end
it 'ignores certain parameters' do
yesterday = Date.yesterday
request = double(success?: true,
code: 200,
message: 'Unauthorized',
parsed_response: {
'id' => 5000,
'last_successful_status_check_at' => yesterday,
'created_at' => yesterday,
'updated_at' => yesterday
})
allow(described_class).to receive(:get).and_return(request)
status = subject.call(secondary)
expect(status.id).not_to be(5000)
expect(status.last_successful_status_check_at).not_to be(yesterday)
expect(status.created_at).not_to be(yesterday)
expect(status.updated_at).not_to be(yesterday)
end
it 'parses a 200 response' do
it 'parses a 200 legacy response' do
data = { health: 'OK',
db_replication_lag_seconds: 0,
repositories_count: 10,
......@@ -73,7 +85,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\n")
expect(status.status_message).to eq("Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\n")
expect(status.success).to be false
end
......@@ -83,7 +95,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq(message)
expect(status.status_message).to eq(message)
end
it 'handles connection refused' do
......@@ -91,7 +103,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq('Connection refused - bad connection')
expect(status.status_message).to eq('Connection refused - bad connection')
end
it 'returns meaningful error message when primary uses incorrect db key' do
......@@ -99,7 +111,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.')
expect(status.status_message).to eq('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.')
end
it 'gracefully handles case when primary is deleted' do
......@@ -107,7 +119,7 @@ describe Geo::NodeStatusService do
status = subject.call(secondary)
expect(status.health).to eq('This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.')
expect(status.status_message).to eq('This GitLab instance does not appear to be configured properly as a Geo node. Make sure the URLs are using the correct fully-qualified domain names.')
end
end
end
......@@ -56,23 +56,23 @@ describe Geo::PruneEventLogWorker, :geo do
let(:healthy_status) { build(:geo_node_status, :healthy) }
let(:unhealthy_status) { build(:geo_node_status, :unhealthy) }
let(:node_status_service) do
service = double
allow(Geo::NodeStatusService).to receive(:new).and_return(service)
service
end
it 'contacts all secondary nodes for their status' do
expect(node_status_service).to receive(:call).twice { healthy_status }
events = create_list(:geo_event_log, 5)
create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary.id)
create(:geo_node_status, :healthy, cursor_last_event_id: events[3].id, geo_node_id: secondary2.id)
expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', anything)
worker.perform
end
it 'aborts when there are unhealthy nodes' do
create_list(:geo_event_log, 2)
events = create_list(:geo_event_log, 2)
create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary.id)
create(:geo_node_status, :unhealthy, geo_node_id: secondary2.id)
expect(node_status_service).to receive(:call).twice.and_return(healthy_status, unhealthy_status)
expect(worker).to receive(:log_info).with('Could not get status of all nodes, not deleting any entries from Geo Event Log', unhealthy_node_count: 1)
expect { worker.perform }.not_to change { Geo::EventLog.count }
......@@ -81,11 +81,9 @@ describe Geo::PruneEventLogWorker, :geo do
it 'takes the integer-minimum value of all cursor_last_event_ids' do
events = create_list(:geo_event_log, 12)
allow(node_status_service).to receive(:call).twice.and_return(
build(:geo_node_status, :healthy, cursor_last_event_id: events[3]),
build(:geo_node_status, :healthy, cursor_last_event_id: events.last)
)
expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', geo_event_log_id: events[3])
create(:geo_node_status, :healthy, cursor_last_event_id: events[3].id, geo_node_id: secondary.id)
create(:geo_node_status, :healthy, cursor_last_event_id: events.last.id, geo_node_id: secondary2.id)
expect(worker).to receive(:log_info).with('Delete Geo Event Log entries up to id', geo_event_log_id: events[3].id)
expect { worker.perform }.to change { Geo::EventLog.count }.by(-3)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment