Commit dc14c91d authored by Andrew Newdigate's avatar Andrew Newdigate

Adds chaos endpoints to Sidekiq

This allows the chaos endpoints to be invoked in Sidekiq so that this
environment can be tested for resilience.
parent f97a73fa
# frozen_string_literal: true # frozen_string_literal: true
class ChaosController < ActionController::Base class ChaosController < ActionController::Base
before_action :validate_chaos_secret, unless: :development? before_action :validate_chaos_secret, unless: :development_or_test?
before_action :request_start_time
def leakmem def leakmem
retainer = [] do_chaos :leak_mem, Chaos::LeakMemWorker, memory_mb, duration_s
# Add `n` 1mb chunks of memory to the retainer array
memory_mb.times { retainer << "x" * 1.megabyte }
Kernel.sleep(duration_left)
render plain: "OK"
end end
def cpu_spin def cpu_spin
rand while Time.now < expected_end_time do_chaos :cpu_spin, Chaos::CpuSpinWorker, duration_s
render plain: "OK"
end end
def db_spin def db_spin
while Time.now < expected_end_time do_chaos :db_spin, Chaos::DbSpinWorker, duration_s, interval_s
ActiveRecord::Base.connection.execute("SELECT 1")
end_interval_time = Time.now + [duration_s, interval_s].min
rand while Time.now < end_interval_time
end
end end
def sleep def sleep
Kernel.sleep(duration_left) do_chaos :sleep, Chaos::SleepWorker, duration_s
render plain: "OK"
end end
def kill def kill
Process.kill("KILL", Process.pid) do_chaos :kill, Chaos::KillWorker
end end
private private
def request_start_time def do_chaos(method, worker, *args)
@start_time ||= Time.now if async
end worker.perform_async(*args)
else
def expected_end_time Gitlab::Chaos.public_send(method, *args) # rubocop: disable GitlabSecurity/PublicSend
request_start_time + duration_s end
end
def duration_left render plain: "OK"
# returns 0 if over time
[expected_end_time - Time.now, 0].max
end end
def validate_chaos_secret def validate_chaos_secret
...@@ -91,7 +72,12 @@ class ChaosController < ActionController::Base ...@@ -91,7 +72,12 @@ class ChaosController < ActionController::Base
memory_mb.to_i memory_mb.to_i
end end
def development? def async
Rails.env.development? async = params[:async] || false
Gitlab::Utils.to_boolean(async)
end
def development_or_test?
Rails.env.development? || Rails.env.test?
end end
end end
...@@ -3,6 +3,12 @@ ...@@ -3,6 +3,12 @@
- auto_merge:auto_merge_process - auto_merge:auto_merge_process
- chaos:chaos_cpu_spin
- chaos:chaos_db_spin
- chaos:chaos_kill
- chaos:chaos_leak_mem
- chaos:chaos_sleep
- cronjob:admin_email - cronjob:admin_email
- cronjob:expire_build_artifacts - cronjob:expire_build_artifacts
- cronjob:gitlab_usage_ping - cronjob:gitlab_usage_ping
......
# frozen_string_literal: true
module Chaos
class CpuSpinWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s)
Gitlab::Chaos.cpu_spin(duration_s)
end
end
end
# frozen_string_literal: true
module Chaos
class DbSpinWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s, interval_s)
Gitlab::Chaos.db_spin(duration_s, interval_s)
end
end
end
# frozen_string_literal: true
module Chaos
class KillWorker
include ApplicationWorker
include ChaosQueue
def perform
Gitlab::Chaos.kill
end
end
end
# frozen_string_literal: true
module Chaos
class LeakMemWorker
include ApplicationWorker
include ChaosQueue
def perform(memory_mb, duration_s)
Gitlab::Chaos.leak_mem(memory_mb, duration_s)
end
end
end
# frozen_string_literal: true
module Chaos
class SleepWorker
include ApplicationWorker
include ChaosQueue
def perform(duration_s)
Gitlab::Chaos.sleep(duration_s)
end
end
end
# frozen_string_literal: true
#
module ChaosQueue
extend ActiveSupport::Concern
included do
queue_namespace :chaos
end
end
---
title: Adds chaos endpoints to Sidekiq
merge_request: 30814
author:
type: other
...@@ -116,7 +116,7 @@ Rails.application.routes.draw do ...@@ -116,7 +116,7 @@ Rails.application.routes.draw do
end end
end end
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development? if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development? || Rails.env.test?
resource :chaos, only: [] do resource :chaos, only: [] do
get :leakmem get :leakmem
get :cpu_spin get :cpu_spin
......
...@@ -95,6 +95,7 @@ ...@@ -95,6 +95,7 @@
- [update_project_statistics, 1] - [update_project_statistics, 1]
- [phabricator_import_import_tasks, 1] - [phabricator_import_import_tasks, 1]
- [update_namespace_statistics, 1] - [update_namespace_statistics, 1]
- [chaos, 2]
# EE-specific queues # EE-specific queues
- [ldap_group_sync, 2] - [ldap_group_sync, 2]
......
...@@ -36,6 +36,10 @@ Replace `secret` with your own secret token. ...@@ -36,6 +36,10 @@ Replace `secret` with your own secret token.
Once you have enabled the chaos endpoints and restarted the application, you can start testing using the endpoints. Once you have enabled the chaos endpoints and restarted the application, you can start testing using the endpoints.
By default, when invoking a chaos endpoint, the web worker process which receives the request will handle it. This means, for example, that if the Kill
operation is invoked, the Puma or Unicorn worker process handling the request will be killed. To test these operations in Sidekiq, the `async` parameter on
each endpoint can be set to `true`. This will run the chaos process in a Sidekiq worker.
## Memory leaks ## Memory leaks
To simulate a memory leak in your application, use the `/-/chaos/leakmem` endpoint. To simulate a memory leak in your application, use the `/-/chaos/leakmem` endpoint.
...@@ -47,12 +51,14 @@ The memory is not retained after the request finishes. Once the request has comp ...@@ -47,12 +51,14 @@ The memory is not retained after the request finishes. Once the request has comp
GET /-/chaos/leakmem GET /-/chaos/leakmem
GET /-/chaos/leakmem?memory_mb=1024 GET /-/chaos/leakmem?memory_mb=1024
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50 GET /-/chaos/leakmem?memory_mb=1024&duration_s=50
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50&async=true
``` ```
| Attribute | Type | Required | Description | | Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------------------- | | ------------ | ------- | -------- | ------------------------------------------------------------------------------------ |
| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. | | `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
| `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. | | `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. |
| `async` | boolean | no | Set to true to leak memory in a Sidekiq background worker process |
```bash ```bash
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret' curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret'
...@@ -69,11 +75,13 @@ If you're using Unicorn, this is done by killing the worker process. ...@@ -69,11 +75,13 @@ If you're using Unicorn, this is done by killing the worker process.
``` ```
GET /-/chaos/cpu_spin GET /-/chaos/cpu_spin
GET /-/chaos/cpu_spin?duration_s=50 GET /-/chaos/cpu_spin?duration_s=50
GET /-/chaos/cpu_spin?duration_s=50&async=true
``` ```
| Attribute | Type | Required | Description | | Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- | | ------------ | ------- | -------- | --------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s | | `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
| `async` | boolean | no | Set to true to consume CPU in a Sidekiq background worker process |
```bash ```bash
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret' curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret'
...@@ -91,12 +99,14 @@ If you're using Unicorn, this is done by killing the worker process. ...@@ -91,12 +99,14 @@ If you're using Unicorn, this is done by killing the worker process.
``` ```
GET /-/chaos/db_spin GET /-/chaos/db_spin
GET /-/chaos/db_spin?duration_s=50 GET /-/chaos/db_spin?duration_s=50
GET /-/chaos/db_spin?duration_s=50&async=true
``` ```
| Attribute | Type | Required | Description | | Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- | | ------------ | ------- | -------- | --------------------------------------------------------------------------- |
| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s | | `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s | | `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
| `async` | boolean | no | Set to true to perform the operation in a Sidekiq background worker process |
```bash ```bash
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret' curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret'
...@@ -112,11 +122,13 @@ As with the CPU Spin endpoint, this may lead to your request timing out if durat ...@@ -112,11 +122,13 @@ As with the CPU Spin endpoint, this may lead to your request timing out if durat
``` ```
GET /-/chaos/sleep GET /-/chaos/sleep
GET /-/chaos/sleep?duration_s=50 GET /-/chaos/sleep?duration_s=50
GET /-/chaos/sleep?duration_s=50&async=true
``` ```
| Attribute | Type | Required | Description | | Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------- | | ------------ | ------- | -------- | ---------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the request will sleep for. Defaults to 30s | | `duration_s` | integer | no | Duration, in seconds, that the request will sleep for. Defaults to 30s |
| `async` | boolean | no | Set to true to sleep in a Sidekiq background worker process |
```bash ```bash
curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret' curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret'
...@@ -132,8 +144,13 @@ Since this endpoint uses the `KILL` signal, the worker is not given a chance to ...@@ -132,8 +144,13 @@ Since this endpoint uses the `KILL` signal, the worker is not given a chance to
``` ```
GET /-/chaos/kill GET /-/chaos/kill
GET /-/chaos/kill?async=true
``` ```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
| `async` | boolean | no | Set to true to kill a Sidekiq background worker process |
```bash ```bash
curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret' curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/kill?token=secret curl http://localhost:3000/-/chaos/kill?token=secret
......
# frozen_string_literal: true
module Gitlab
# Chaos methods for GitLab.
# See https://docs.gitlab.com/ee/development/chaos_endpoints.html for more details.
class Chaos
# leak_mem will retain the specified amount of memory and sleep.
# On return, the memory will be released.
def self.leak_mem(memory_mb, duration_s)
start_time = Time.now
retainer = []
# Add `n` 1mb chunks of memory to the retainer array
memory_mb.times { retainer << "x" * 1.megabyte }
duration_left = [start_time + duration_s - Time.now, 0].max
Kernel.sleep(duration_left)
end
# cpu_spin will consume all CPU on a single core for the specified duration
def self.cpu_spin(duration_s)
expected_end_time = Time.now + duration_s
rand while Time.now < expected_end_time
end
# db_spin will query the database in a tight loop for the specified duration
def self.db_spin(duration_s, interval_s)
expected_end_time = Time.now + duration_s
while Time.now < expected_end_time
ActiveRecord::Base.connection.execute("SELECT 1")
end_interval_time = Time.now + [duration_s, interval_s].min
rand while Time.now < end_interval_time
end
end
# sleep will sleep for the specified duration
def self.sleep(duration_s)
Kernel.sleep(duration_s)
end
# Kill will send a SIGKILL signal to the current process
def self.kill
Process.kill("KILL", Process.pid)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe ChaosController do
describe '#leakmem' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:leak_mem).with(100, 30.seconds)
get :leakmem
expect(response).to have_gitlab_http_status(200)
end
it 'call synchronously with params' do
expect(Gitlab::Chaos).to receive(:leak_mem).with(1, 2.seconds)
get :leakmem, params: { memory_mb: 1, duration_s: 2 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::LeakMemWorker).to receive(:perform_async).with(100, 30.seconds)
get :leakmem, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#cpu_spin' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:cpu_spin).with(30.seconds)
get :cpu_spin
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:cpu_spin).with(3.seconds)
get :cpu_spin, params: { duration_s: 3 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::CpuSpinWorker).to receive(:perform_async).with(30.seconds)
get :cpu_spin, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#db_spin' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:db_spin).with(30.seconds, 1.second)
get :db_spin
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:db_spin).with(4.seconds, 5.seconds)
get :db_spin, params: { duration_s: 4, interval_s: 5 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::DbSpinWorker).to receive(:perform_async).with(30.seconds, 1.second)
get :db_spin, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#sleep' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:sleep).with(30.seconds)
get :sleep
expect(response).to have_gitlab_http_status(200)
end
it 'calls synchronously with params' do
expect(Gitlab::Chaos).to receive(:sleep).with(5.seconds)
get :sleep, params: { duration_s: 5 }
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::SleepWorker).to receive(:perform_async).with(30.seconds)
get :sleep, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
describe '#kill' do
it 'calls synchronously' do
expect(Gitlab::Chaos).to receive(:kill).with(no_args)
get :kill
expect(response).to have_gitlab_http_status(200)
end
it 'calls asynchronously' do
expect(Chaos::KillWorker).to receive(:perform_async).with(no_args)
get :kill, params: { async: 1 }
expect(response).to have_gitlab_http_status(200)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment