Use Sampler instead of worker for monitoring Global Search queue

Introduced in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/27384 we are tracking the size of the buffer queue used for incremental Elasticsearch updates. This was implemented as a worker that runs every minute but this has some problems as it runs on random sidekiq nodes and causes the data in Prometheus to appear as multiple unpredictable lines and there is not way to easily know which is the most up to date metric. For this reason it makes more sense to run it predictably and also on all sidekiq nodes so that even if one is down we'd still see the queue size growing.

Use Sampler instead of worker for monitoring Global Search queue
Introduced in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/27384 we are tracking the size of the buffer queue used for incremental Elasticsearch updates. This was implemented as a worker that runs every minute but this has some problems as it runs on random sidekiq nodes and causes the data in Prometheus to appear as multiple unpredictable lines and there is not way to easily know which is the most up to date metric. For this reason it makes more sense to run it predictably and also on all sidekiq nodes so that even if one is down we'd still see the queue size growing.
6eb87b64 · Dylan Griffith · 9aa7b50e · 6eb87b64 · 6eb87b64 · 6eb87b64
Commit 6eb87b64 authored Mar 30, 2020 by Dylan Griffith
7 changed files
--- a/config/gitlab.yml.example
+++ b/config/gitlab.yml.example
@@ -477,11 +477,6 @@ production: &base
    elastic_index_bulk_cron_worker:
      cron: "*/1 * * * *"

-    # Elasticsearch metrics
-    # NOTE: This will only take effect if Elasticsearch is enabled.
-    elastic_metrics_update_worker:
-      cron: "*/1 * * * *"
-
  registry:
    # enabled: true
    # host: registry.example.com

--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -546,9 +546,6 @@ Gitlab.ee do
  Settings.cron_jobs['elastic_index_bulk_cron_worker'] ||= Settingslogic.new({})
  Settings.cron_jobs['elastic_index_bulk_cron_worker']['cron'] ||= '*/1 * * * *'
  Settings.cron_jobs['elastic_index_bulk_cron_worker']['job_class'] ||= 'ElasticIndexBulkCronWorker'
-  Settings.cron_jobs['elastic_metrics_update_worker'] ||= Settingslogic.new({})
-  Settings.cron_jobs['elastic_metrics_update_worker']['cron'] ||= '*/1 * * * *'
-  Settings.cron_jobs['elastic_metrics_update_worker']['job_class'] ||= 'ElasticMetricsUpdateWorker'
  Settings.cron_jobs['sync_seat_link_worker'] ||= Settingslogic.new({})
  Settings.cron_jobs['sync_seat_link_worker']['cron'] ||= "#{rand(60)} 0 * * *"
  Settings.cron_jobs['sync_seat_link_worker']['job_class'] = 'SyncSeatLinkWorker'
@@ -725,6 +722,7 @@ Settings.monitoring['ip_whitelist'] ||= ['127.0.0.1/8']
 Settings.monitoring['unicorn_sampler_interval'] ||= 10
 Settings.monitoring['puma_sampler_interval'] ||= 5
 Settings.monitoring['ruby_sampler_interval'] ||= 60
+Settings.monitoring['global_search_sampler_interval'] ||= 60
 Settings.monitoring['sidekiq_exporter'] ||= Settingslogic.new({})
 Settings.monitoring.sidekiq_exporter['enabled'] ||= false
 Settings.monitoring.sidekiq_exporter['address'] ||= 'localhost'

--- a/config/initializers/7_prometheus_metrics.rb
+++ b/config/initializers/7_prometheus_metrics.rb
@@ -43,6 +43,10 @@ if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
    defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change

    Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
+
+    if Gitlab.ee? && Gitlab::Runtime.sidekiq?
+      Gitlab::Metrics::Samplers::GlobalSearchSampler.instance(Settings.monitoring.global_search_sampler_interval).start
+    end
  rescue IOError => e
    Gitlab::ErrorTracking.track_exception(e)
    Gitlab::Metrics.error_detected!

--- a/ee/app/workers/all_queues.yml
+++ b/ee/app/workers/all_queues.yml
@@ -31,13 +31,6 @@
  :resource_boundary: :unknown
  :weight: 1
  :idempotent: true
- :name: cronjob:elastic_metrics_update
-  :feature_category: :global_search
-  :has_external_dependencies: 
-  :urgency: :low
-  :resource_boundary: :unknown
-  :weight: 1
-  :idempotent: true
 - :name: cronjob:geo_container_repository_sync_dispatch
  :feature_category: :geo_replication
  :has_external_dependencies: 

--- a/ee/app/workers/elastic_metrics_update_worker.rb
+++ b/ee/app/workers/elastic_metrics_update_worker.rb
-# frozen_string_literal: true
-
-class ElasticMetricsUpdateWorker
-  include ApplicationWorker
-  include ExclusiveLeaseGuard
-  # rubocop:disable Scalability/CronWorkerContext
-  # This worker does not perform work scoped to a context
-  include CronjobQueue
-  # rubocop:enable Scalability/CronWorkerContext
-
-  feature_category :global_search
-  idempotent!
-
-  LEASE_TIMEOUT = 5.minutes
-
-  def perform
-    try_obtain_lease { Elastic::MetricsUpdateService.new.execute }
-  end
-
-  private
-
-  def lease_timeout
-    LEASE_TIMEOUT
-  end
-end
--- a/ee/lib/gitlab/metrics/samplers/global_search_sampler.rb
+++ b/ee/lib/gitlab/metrics/samplers/global_search_sampler.rb
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Samplers
+      class GlobalSearchSampler < BaseSampler
+        def sample
+          ::Elastic::MetricsUpdateService.new.execute
+        end
+      end
+    end
+  end
+end
--- a/ee/spec/workers/elastic_metrics_update_worker_spec.rb
+++ b/ee/spec/workers/elastic_metrics_update_worker_spec.rb
@@ -2,18 +2,16 @@

 require 'spec_helper'

-describe ElasticMetricsUpdateWorker do
-  include ExclusiveLeaseHelpers
-
-  describe '.perform' do
-    it 'executes the service under an exclusive lease' do
-      expect_to_obtain_exclusive_lease('elastic_metrics_update_worker')
+describe Gitlab::Metrics::Samplers::GlobalSearchSampler do
+  subject { described_class.new(60.seconds) }

+  describe '#sample' do
+    it 'invokes the Elastic::MetricsUpdateService' do
      expect_next_instance_of(::Elastic::MetricsUpdateService) do |service|
        expect(service).to receive(:execute)
      end

-      described_class.new.perform
+      subject.sample
    end
  end
 end