Commit 06132caf authored by GitLab Bot's avatar GitLab Bot

Automatic merge of gitlab-org/gitlab master

parents 7da88988 e13a197c
......@@ -36,7 +36,7 @@
}
.with-performance-bar .whats-new-drawer {
margin-top: calc(#{$performance-bar-height} + #{$header-height});
margin-top: $performance-bar-height + $header-height;
}
.with-system-header .whats-new-drawer {
......
......@@ -95,10 +95,6 @@ module ProjectFeaturesCompatibility
# attribute.
def container_registry_enabled=(value)
write_feature_attribute_boolean(:container_registry_access_level, value)
# TODO: Remove this when we remove the projects.container_registry_enabled
# column. https://gitlab.com/gitlab-org/gitlab/-/issues/335425
super
end
private
......
......@@ -43,6 +43,8 @@ class Project < ApplicationRecord
extend Gitlab::ConfigHelper
ignore_columns :container_registry_enabled, remove_after: '2021-09-22', remove_with: '14.4'
BoardLimitExceeded = Class.new(StandardError)
ignore_columns :mirror_last_update_at, :mirror_last_successful_update_at, remove_after: '2021-09-22', remove_with: '14.4'
......
.gl-alert.gl-alert-warning.js-recovery-settings-callout{ role: 'alert', data: { feature_id: "account_recovery_regular_check", dismiss_endpoint: user_callouts_path, defer_links: "true" } }
%button.js-close.gl-alert-dismiss.gl-cursor-pointer{ type: 'button', 'aria-label' => _('Dismiss') }
= sprite_icon('close', css_class: 'gl-icon')
= render 'shared/global_alert',
variant: :warning,
alert_class: 'js-recovery-settings-callout',
alert_data: { feature_id: 'account_recovery_regular_check', dismiss_endpoint: user_callouts_path, defer_links: 'true' } do
.gl-alert-body
- account_link_start = '<a class="deferred-link" href="%{url}">'.html_safe % { url: profile_account_path }
= _("Please ensure your account's %{account_link_start}recovery settings%{account_link_end} are up to date.").html_safe % { account_link_start: account_link_start, account_link_end: '</a>'.html_safe }
= s_('Profiles|We recommend you ensure two-factor authentication is enabled and the settings are up to date.')
= link_to _('Learn more.'), help_page_path('user/profile/account/two_factor_authentication'), target: '_blank', rel: 'noopener noreferrer'
.gl-alert-actions
= link_to profile_two_factor_auth_path, class: 'deferred-link btn gl-alert-action btn-confirm btn-md gl-button' do
= s_('Profiles|Manage two-factor authentication')
---
stage: Enablement
group: Database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
comments: false
description: 'Learn how to scale the database through the use of best-of-class database scalability patterns'
---
# Database Scalability Patterns
- [Read-mostly](read_mostly.md)
- [Time-decay](time_decay.md)
---
stage: Enablement
group: database
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
comments: false
description: 'Learn how to scale operating on read-mostly data at scale'
---
# Read-mostly data
[Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/326037) in GitLab 14.0.
This document describes the *read-mostly* pattern introduced in the
[Database Scalability Working Group](https://about.gitlab.com/company/team/structure/working-groups/database-scalability/#read-mostly-data).
We discuss the characteristics of *read-mostly* data and propose best practices for GitLab development
to consider in this context.
## Characteristics of read-mostly data
As the name already suggests, *read-mostly* data is about data that is much more often read than
updated. Writing this data through updates, inserts, or deletes is a very rare event compared to
reading this data.
In addition, *read-mostly* data in this context is typically a small dataset. We explicitly don't deal
with large datasets here, even though they often have a "write once, read often" characteristic, too.
### Example: license data
Let's introduce a canonical example: license data in GitLab. A GitLab instance may have a license
attached to use GitLab enterprise features. This license data is held instance-wide, that
is, there typically only exist a few relevant records. This information is kept in a table
`licenses` which is very small.
We consider this *read-mostly* data, because it follows above outlined characteristics:
- **Rare writes:** license data very rarely sees any writes after having inserted the license.
- **Frequent reads:** license data is read extremely often to check if enterprise features can be used.
- **Small size:** this dataset is very small. On GitLab.com we have 5 records at < 50 kB total relation size.
### Effects of *read-mostly* data at scale
Given this dataset is small and read very often, we can expect data to nearly always reside in
database caches and/or database disk caches. Thus, the concern with *read-mostly* data is typically
not around database I/O overhead, because we typically don't read data from disk anyway.
However, considering the high frequency reads, this has potential to incur overhead in terms of
database CPU load and database context switches. Additionally, those high frequency queries go
through the whole database stack. They also cause overhead on the database connection
multiplexing components and load balancers. Also, the application spends cycles in preparing and
sending queries to retrieve the data, deserialize the results and allocate new objects to represent
the information gathered - all in a high frequency fashion.
In the example of license data above, the query to read license data was
[identified](https://gitlab.com/gitlab-org/gitlab/-/issues/292900) to stand out in terms of query
frequency. In fact, we were seeing around 6,000 queries per second (QPS) on the cluster during peak
times. With the cluster size at that time, we were seeing about 1,000 QPS on each replica, and fewer
than 400 QPS on the primary at peak times. The difference is explained by our
[database load balancing for scaling reads](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/lib/gitlab/database/load_balancing.rb),
which favors replicas for pure read-only transactions.
![Licenses Calls](img/read_mostly_licenses_calls_v14_2.png)
The overall transaction throughput on the database primary at the time varied between 50,000 and
70,000 transactions per second (TPS). In comparison, this query frequency only takes a small
portion of the overall query frequency. However, we do expect this to still have considerable
overhead in terms of context switches. It is worth removing this overhead, if we can.
## How to recognize read-mostly data
It can be difficult to recognize *read-mostly* data, even though there are clear cases like in our
example.
One approach is to look at the [read/write ratio and statistics from, for example, the primary](https://bit.ly/3frdtyz). Here, we look at the TOP20 tables by their read/write ratio over 60 minutes (taken in a peak traffic time):
```plaintext
bottomk(20,
avg by (relname, fqdn) (
(
rate(pg_stat_user_tables_seq_tup_read{env="gprd"}[1h])
+
rate(pg_stat_user_tables_idx_tup_fetch{env="gprd"}[1h])
) /
(
rate(pg_stat_user_tables_seq_tup_read{env="gprd"}[1h])
+ rate(pg_stat_user_tables_idx_tup_fetch{env="gprd"}[1h])
+ rate(pg_stat_user_tables_n_tup_ins{env="gprd"}[1h])
+ rate(pg_stat_user_tables_n_tup_upd{env="gprd"}[1h])
+ rate(pg_stat_user_tables_n_tup_del{env="gprd"}[1h])
)
) and on (fqdn) (pg_replication_is_replica == 0)
)
```
This yields a good impression of which tables are much more often read than written (on the database
primary):
![Read Write Ratio TOP20](img/read_mostly_readwriteratio_v14_2.png)
From here, we can [zoom](https://bit.ly/2VmloX1) into for example `gitlab_subscriptions` and realize that index reads peak at above 10k tuples per second overall (there are no seq scans):
![Subscriptions: reads](img/read_mostly_subscriptions_reads_v14_2.png)
We very rarely write to the table (there are no seq scans):
![Subscriptions: writes](img/read_mostly_subscriptions_writes_v14_2.png)
Additionally, the table is only 400 MB in size - so this may be another candidate we may want to
consider in this pattern (see [#327483](https://gitlab.com/gitlab-org/gitlab/-/issues/327483)).
## Best practices for handling read-mostly data at scale
### Cache read-mostly data
To reduce the database overhead, we implement a cache for the data and thus significantly
reduce the query frequency on the database side. There are different scopes for caching available:
- `RequestStore`: per-request in-memory cache (based on [request_store gem](https://github.com/steveklabnik/request_store))
- [`ProcessMemoryCache`](https://gitlab.com/gitlab-org/gitlab/blob/master/lib/gitlab/process_memory_cache.rb#L4): per-process in-memory cache (a `ActiveSupport::Cache::MemoryStore`)
- [`Gitlab::Redis::Cache`](https://gitlab.com/gitlab-org/gitlab/blob/master/lib/gitlab/redis/cache.rb) and `Rails.cache`: full-blown cache in Redis
Continuing the above example, we had a `RequestStore` in place to cache license information on a
per-request basis. However, that still leads to one query per request. When we started to cache license information
[using a process-wide in-memory cache](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/50318)
for 1 second, query frequency dramatically dropped:
![Licenses Calls - Fixed](img/read_mostly_licenses_fixed_v14_2.png)
The choice of caching here highly depends on the characteristics of data in question. A very small
dataset like license data that is nearly never updated is a good candidate for in-memory caching.
A per-process cache is favorable here, because this unties the cache refresh rate from the incoming
request rate.
A caveat here is that our Redis setup is currently not using Redis secondaries and we rely on a
single node for caching. That is, we need to strike a balance to avoid Redis falling over due to
increased pressure. In comparison, reading data from PostgreSQL replicas can be distributed across
several read-only replicas. Even though a query to the database might be more expensive, the
load is balanced across more nodes.
### Read read-mostly data from replica
With or without caching implemented, we also must make sure to read data from database replicas if
we can. This supports our efforts to scale reads across many database replicas and removes
unnecessary workload from the database primary.
GitLab [database load balancing for reads](https://gitlab.com/gitlab-org/gitlab/-/blob/master/ee/lib/gitlab/database/load_balancing.rb)
sticks to the primary after a first write or when opening an
explicit transaction. In the context of *read-mostly* data, we strive to read this data outside of a
transaction scope and before doing any writes. This is often possible given that this data is only
seldom updated (and thus we're often not concerned with reading slightly stale data, for example).
However, it can be non-obvious that this query cannot be sent to a replica because of a previous
write or transaction. Hence, when we encounter *read-mostly* data, it is a good practice to check the
wider context and make sure this data can be read from a replica.
- breadcrumb_title _("Scan Policies")
- breadcrumb_title _("Policies")
- disable_security_policy_project = !can_update_security_orchestration_policy_project?(project)
#js-security-policies-list{ data: { assigned_policy_project: assigned_policy_project(project).to_json,
......
......@@ -149,7 +149,7 @@ module EE
end
::Sidebars::MenuItem.new(
title: _('Scan Policies'),
title: _('Policies'),
link: project_security_policy_path(context.project),
active_routes: { controller: ['projects/security/policies'] },
item_id: :scan_policies
......
......@@ -5,25 +5,27 @@ require 'spec_helper'
RSpec.describe 'Account recovery regular check callout' do
context 'when signed in' do
let(:user) { create(:user, created_at: 4.months.ago ) }
let(:message) { "Please ensure your account's recovery settings are up to date." }
let(:message) { "We recommend you ensure two-factor authentication is enabled and the settings are up to date." }
let(:action_button) { 'Manage two-factor authentication' }
before do
allow(Gitlab).to receive(:com?) { true }
gitlab_sign_in(user)
sign_in(user)
end
it 'shows callout if not dismissed' do
visit root_dashboard_path
expect(page).to have_content(message)
expect(page).to have_link(action_button, href: profile_two_factor_auth_path)
end
it 'hides callout when user opens profile', :js do
it 'hides callout when user clicks action button', :js do
visit root_dashboard_path
expect(page).to have_content(message)
click_link 'recovery settings'
click_link action_button
wait_for_requests
expect(page).not_to have_content(message)
......
......@@ -193,7 +193,7 @@ RSpec.describe Sidebars::Projects::Menus::SecurityComplianceMenu do
end
end
describe 'Scan Policies' do
describe 'Policies' do
let(:item_id) { :scan_policies }
context 'when feature flag :security_orchestration_policies_configuration is enabled' do
......@@ -202,11 +202,11 @@ RSpec.describe Sidebars::Projects::Menus::SecurityComplianceMenu do
stub_licensed_features(security_orchestration_policies: true)
end
context 'when user can access scan policies' do
context 'when user can access policies tab' do
it { is_expected.not_to be_nil }
end
context 'when user cannot access scan policies' do
context 'when user cannot access policies tab' do
let(:user) { nil }
it { is_expected.to be_nil }
......
......@@ -203,8 +203,8 @@ RSpec.describe 'layouts/nav/sidebar/_project' do
expect(rendered).to have_link('Threat Monitoring', href: project_threat_monitoring_path(project))
end
it 'scan policies link is visible' do
expect(rendered).to have_link('Scan Policies', href: project_security_policy_path(project))
it 'policies link is visible' do
expect(rendered).to have_link('Policies', href: project_security_policy_path(project))
end
it 'security configuration link is visible' do
......
......@@ -39,7 +39,7 @@ module Gitlab
def save_markdown(updates)
return unless persisted? && Gitlab::Database.read_write?
return if cached_markdown_version < cached_markdown_version_in_database
return if cached_markdown_version.to_i < cached_markdown_version_in_database.to_i
update_columns(updates)
end
......
......@@ -24720,9 +24720,6 @@ msgstr ""
msgid "Please enable and migrate to hashed storage to avoid security issues and ensure data integrity. %{migrate_link}"
msgstr ""
msgid "Please ensure your account's %{account_link_start}recovery settings%{account_link_end} are up to date."
msgstr ""
msgid "Please enter a non-negative number"
msgstr ""
......@@ -24870,6 +24867,9 @@ msgstr ""
msgid "Point to any links you like: documentation, built binaries, or other related materials. These can be internal or external links from your GitLab instance. Duplicate URLs are not allowed."
msgstr ""
msgid "Policies"
msgstr ""
msgid "Policy project doesn't exist"
msgstr ""
......@@ -25368,6 +25368,9 @@ msgstr ""
msgid "Profiles|Main settings"
msgstr ""
msgid "Profiles|Manage two-factor authentication"
msgstr ""
msgid "Profiles|No file chosen."
msgstr ""
......@@ -25473,6 +25476,9 @@ msgstr ""
msgid "Profiles|Using emojis in names seems fun, but please try to set a status message instead"
msgstr ""
msgid "Profiles|We recommend you ensure two-factor authentication is enabled and the settings are up to date."
msgstr ""
msgid "Profiles|What's your status?"
msgstr ""
......@@ -28817,9 +28823,6 @@ msgstr ""
msgid "Saving project."
msgstr ""
msgid "Scan Policies"
msgstr ""
msgid "Scanner"
msgstr ""
......
......@@ -228,4 +228,16 @@ RSpec.describe Gitlab::MarkdownCache::ActiveRecord::Extension do
thing.refresh_markdown_cache!
end
end
context 'when persisted cache is nil' do
before do
thing.update_column(:cached_markdown_version, nil)
end
it 'does not save the generated HTML' do
expect(thing).to receive(:update_columns)
thing.refresh_markdown_cache!
end
end
end
......@@ -2421,39 +2421,20 @@ RSpec.describe Project, factory_default: :keep do
let_it_be_with_reload(:project) { create(:project) }
it 'updates project_feature', :aggregate_failures do
# Simulate an existing project that has container_registry enabled
project.update_column(:container_registry_enabled, true)
project.project_feature.update_column(:container_registry_access_level, ProjectFeature::ENABLED)
project.update!(container_registry_enabled: false)
expect(project.read_attribute(:container_registry_enabled)).to eq(false)
expect(project.project_feature.container_registry_access_level).to eq(ProjectFeature::DISABLED)
project.update!(container_registry_enabled: true)
expect(project.read_attribute(:container_registry_enabled)).to eq(true)
expect(project.project_feature.container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
it 'rollsback both projects and project_features row in case of error', :aggregate_failures do
project.update_column(:container_registry_enabled, true)
project.project_feature.update_column(:container_registry_access_level, ProjectFeature::ENABLED)
allow(project).to receive(:valid?).and_return(false)
expect { project.update!(container_registry_enabled: false) }.to raise_error(ActiveRecord::RecordInvalid)
expect(project.reload.read_attribute(:container_registry_enabled)).to eq(true)
expect(project.project_feature.reload.container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
end
describe '#container_registry_enabled' do
let_it_be_with_reload(:project) { create(:project) }
it 'delegates to project_feature', :aggregate_failures do
project.update_column(:container_registry_enabled, true)
project.project_feature.update_column(:container_registry_access_level, ProjectFeature::DISABLED)
expect(project.container_registry_enabled).to eq(false)
......
......@@ -231,7 +231,6 @@ RSpec.describe API::Projects do
end
it 'includes correct value of container_registry_enabled', :aggregate_failures do
project.update_column(:container_registry_enabled, true)
project.project_feature.update!(container_registry_access_level: ProjectFeature::DISABLED)
get api('/projects', user)
......@@ -1113,6 +1112,16 @@ RSpec.describe API::Projects do
expect(Project.find_by(path: project[:path]).container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
it 'assigns container_registry_enabled to project' do
project = attributes_for(:project, { container_registry_enabled: true })
post api('/projects', user), params: project
expect(response).to have_gitlab_http_status(:created)
expect(json_response['container_registry_enabled']).to eq(true)
expect(Project.find_by(path: project[:path]).container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
it 'creates a project using a template' do
expect { post api('/projects', user), params: { template_name: 'rails', name: 'rails-test' } }
.to change { Project.count }.by(1)
......@@ -1560,6 +1569,18 @@ RSpec.describe API::Projects do
expect(json_response['error']).to eq('name is missing')
end
it 'sets container_registry_enabled' do
project = attributes_for(:project).tap do |attrs|
attrs[:container_registry_enabled] = true
end
post api("/projects/user/#{user.id}", admin), params: project
expect(response).to have_gitlab_http_status(:created)
expect(json_response['container_registry_enabled']).to eq(true)
expect(Project.find_by(path: project[:path]).container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
it 'assigns attributes to project' do
project = attributes_for(:project, {
issues_enabled: false,
......@@ -3050,6 +3071,16 @@ RSpec.describe API::Projects do
expect(Project.find_by(path: project[:path]).container_registry_access_level).to eq(ProjectFeature::PRIVATE)
end
it 'sets container_registry_enabled' do
project.project_feature.update!(container_registry_access_level: ProjectFeature::DISABLED)
put(api("/projects/#{project.id}", user), params: { container_registry_enabled: true })
expect(response).to have_gitlab_http_status(:ok)
expect(json_response['container_registry_enabled']).to eq(true)
expect(project.reload.container_registry_access_level).to eq(ProjectFeature::ENABLED)
end
it 'returns 400 when nothing sent' do
project_param = {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment