Commit ff986c70 authored by Douwe Maan's avatar Douwe Maan

Merge branch '30672-versioned-markdown-cache' into 'master'

Start versioning cached markdown fields

Closes #30672

See merge request !10597
parents 37a91bf9 d2b883b7
......@@ -8,6 +8,14 @@
#
# Corresponding foo_html, bar_html and baz_html fields should exist.
module CacheMarkdownField
extend ActiveSupport::Concern
# Increment this number every time the renderer changes its output
CACHE_VERSION = 1
# changes to these attributes cause the cache to be invalidates
INVALIDATED_BY = %w[author project].freeze
# Knows about the relationship between markdown and html field names, and
# stores the rendering contexts for the latter
class FieldData
......@@ -30,60 +38,71 @@ module CacheMarkdownField
end
end
# Dynamic registries don't really work in Rails as it's not guaranteed that
# every class will be loaded, so hardcode the list.
CACHING_CLASSES = %w[
AbuseReport
Appearance
ApplicationSetting
BroadcastMessage
Issue
Label
MergeRequest
Milestone
Namespace
Note
Project
Release
Snippet
].freeze
def self.caching_classes
CACHING_CLASSES.map(&:constantize)
end
def skip_project_check?
false
end
extend ActiveSupport::Concern
# Returns the default Banzai render context for the cached markdown field.
def banzai_render_context(field)
raise ArgumentError.new("Unknown field: #{field.inspect}") unless
cached_markdown_fields.markdown_fields.include?(field)
included do
cattr_reader :cached_markdown_fields do
FieldData.new
end
# Always include a project key, or Banzai complains
project = self.project if self.respond_to?(:project)
context = cached_markdown_fields[field].merge(project: project)
# Returns the default Banzai render context for the cached markdown field.
def banzai_render_context(field)
raise ArgumentError.new("Unknown field: #{field.inspect}") unless
cached_markdown_fields.markdown_fields.include?(field)
# Banzai is less strict about authors, so don't always have an author key
context[:author] = self.author if self.respond_to?(:author)
# Always include a project key, or Banzai complains
project = self.project if self.respond_to?(:project)
context = cached_markdown_fields[field].merge(project: project)
context
end
# Banzai is less strict about authors, so don't always have an author key
context[:author] = self.author if self.respond_to?(:author)
# Update every column in a row if any one is invalidated, as we only store
# one version per row
def refresh_markdown_cache!(do_update: false)
options = { skip_project_check: skip_project_check? }
context
end
updates = cached_markdown_fields.markdown_fields.map do |markdown_field|
[
cached_markdown_fields.html_field(markdown_field),
Banzai::Renderer.cacheless_render_field(self, markdown_field, options)
]
end.to_h
updates['cached_markdown_version'] = CacheMarkdownField::CACHE_VERSION
# Allow callers to look up the cache field name, rather than hardcoding it
def markdown_cache_field_for(field)
raise ArgumentError.new("Unknown field: #{field}") unless
cached_markdown_fields.markdown_fields.include?(field)
updates.each {|html_field, data| write_attribute(html_field, data) }
cached_markdown_fields.html_field(field)
update_columns(updates) if persisted? && do_update
end
def cached_html_up_to_date?(markdown_field)
html_field = cached_markdown_fields.html_field(markdown_field)
markdown_changed = attribute_changed?(markdown_field) || false
html_changed = attribute_changed?(html_field) || false
CacheMarkdownField::CACHE_VERSION == cached_markdown_version &&
(html_changed || markdown_changed == html_changed)
end
def invalidated_markdown_cache?
cached_markdown_fields.html_fields.any? {|html_field| attribute_invalidated?(html_field) }
end
def attribute_invalidated?(attr)
__send__("#{attr}_invalidated?")
end
def cached_html_for(markdown_field)
raise ArgumentError.new("Unknown field: #{field}") unless
cached_markdown_fields.markdown_fields.include?(markdown_field)
__send__(cached_markdown_fields.html_field(markdown_field))
end
included do
cattr_reader :cached_markdown_fields do
FieldData.new
end
# Always exclude _html fields from attributes (including serialization).
......@@ -92,12 +111,16 @@ module CacheMarkdownField
def attributes
attrs = attributes_before_markdown_cache
attrs.delete('cached_markdown_version')
cached_markdown_fields.html_fields.each do |field|
attrs.delete(field)
end
attrs
end
before_save :refresh_markdown_cache!, if: :invalidated_markdown_cache?
end
class_methods do
......@@ -107,31 +130,18 @@ module CacheMarkdownField
# a corresponding _html field. Any custom rendering options may be provided
# as a context.
def cache_markdown_field(markdown_field, context = {})
raise "Add #{self} to CacheMarkdownField::CACHING_CLASSES" unless
CacheMarkdownField::CACHING_CLASSES.include?(self.to_s)
cached_markdown_fields[markdown_field] = context
html_field = cached_markdown_fields.html_field(markdown_field)
cache_method = "#{markdown_field}_cache_refresh".to_sym
invalidation_method = "#{html_field}_invalidated?".to_sym
define_method(cache_method) do
options = { skip_project_check: skip_project_check? }
html = Banzai::Renderer.cacheless_render_field(self, markdown_field, options)
__send__("#{html_field}=", html)
true
end
# The HTML becomes invalid if any dependent fields change. For now, assume
# author and project invalidate the cache in all circumstances.
define_method(invalidation_method) do
changed_fields = changed_attributes.keys
invalidations = changed_fields & [markdown_field.to_s, "author", "project"]
!invalidations.empty?
invalidations = changed_fields & [markdown_field.to_s, *INVALIDATED_BY]
!invalidations.empty? || !cached_html_up_to_date?(markdown_field)
end
before_save cache_method, if: invalidation_method
end
end
end
# This worker clears all cache fields in the database, working in batches.
class ClearDatabaseCacheWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
BATCH_SIZE = 1000
def perform
CacheMarkdownField.caching_classes.each do |kls|
fields = kls.cached_markdown_fields.html_fields
clear_cache_fields = fields.each_with_object({}) do |field, memo|
memo[field] = nil
end
Rails.logger.debug("Clearing Markdown cache for #{kls}: #{fields.inspect}")
kls.unscoped.in_batches(of: BATCH_SIZE) do |relation|
relation.update_all(clear_cache_fields)
end
end
nil
end
end
---
title: Replace rake cache:clear:db with an automatic mechanism
merge_request: 10597
author:
......@@ -34,7 +34,6 @@
- [repository_fork, 1]
- [repository_import, 1]
- [project_service, 1]
- [clear_database_cache, 1]
- [delete_user, 1]
- [delete_merged_branches, 1]
- [authorized_projects, 1]
......
class AddVersionFieldToMarkdownCache < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
%i[
abuse_reports
appearances
application_settings
broadcast_messages
issues
labels
merge_requests
milestones
namespaces
notes
projects
releases
snippets
].each do |table|
add_column table, :cached_markdown_version, :integer, limit: 4
end
end
end
......@@ -24,6 +24,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.datetime "created_at"
t.datetime "updated_at"
t.text "message_html"
t.integer "cached_markdown_version"
end
create_table "appearances", force: :cascade do |t|
......@@ -34,6 +35,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.text "description_html"
t.integer "cached_markdown_version"
end
create_table "application_settings", force: :cascade do |t|
......@@ -116,6 +118,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "unique_ips_limit_time_window"
t.boolean "unique_ips_limit_enabled", default: false, null: false
t.decimal "polling_interval_multiplier", default: 1.0, null: false
t.integer "cached_markdown_version"
t.boolean "usage_ping_enabled", default: true, null: false
t.string "uuid"
end
......@@ -161,6 +164,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.string "color"
t.string "font"
t.text "message_html"
t.integer "cached_markdown_version"
end
create_table "chat_names", force: :cascade do |t|
......@@ -479,6 +483,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "time_estimate"
t.integer "relative_position"
t.datetime "closed_at"
t.integer "cached_markdown_version"
end
add_index "issues", ["assignee_id"], name: "index_issues_on_assignee_id", using: :btree
......@@ -543,6 +548,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.text "description_html"
t.string "type"
t.integer "group_id"
t.integer "cached_markdown_version"
end
add_index "labels", ["group_id", "project_id", "title"], name: "index_labels_on_group_id_and_project_id_and_title", unique: true, using: :btree
......@@ -663,6 +669,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.text "title_html"
t.text "description_html"
t.integer "time_estimate"
t.integer "cached_markdown_version"
end
add_index "merge_requests", ["assignee_id"], name: "index_merge_requests_on_assignee_id", using: :btree
......@@ -700,6 +707,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.text "title_html"
t.text "description_html"
t.date "start_date"
t.integer "cached_markdown_version"
end
add_index "milestones", ["description"], name: "index_milestones_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"}
......@@ -726,6 +734,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "parent_id"
t.boolean "require_two_factor_authentication", default: false, null: false
t.integer "two_factor_grace_period", default: 48, null: false
t.integer "cached_markdown_version"
end
add_index "namespaces", ["created_at"], name: "index_namespaces_on_created_at", using: :btree
......@@ -760,6 +769,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "resolved_by_id"
t.string "discussion_id"
t.text "note_html"
t.integer "cached_markdown_version"
end
add_index "notes", ["author_id"], name: "index_notes_on_author_id", using: :btree
......@@ -956,6 +966,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "auto_cancel_pending_pipelines", default: 0, null: false
t.boolean "printing_merge_request_link_enabled", default: true, null: false
t.string "import_jid"
t.integer "cached_markdown_version"
end
add_index "projects", ["ci_id"], name: "index_projects_on_ci_id", using: :btree
......@@ -1028,6 +1039,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.datetime "created_at"
t.datetime "updated_at"
t.text "description_html"
t.integer "cached_markdown_version"
end
add_index "releases", ["project_id", "tag"], name: "index_releases_on_project_id_and_tag", using: :btree
......@@ -1099,6 +1111,7 @@ ActiveRecord::Schema.define(version: 20170419001229) do
t.integer "visibility_level", default: 0, null: false
t.text "title_html"
t.text "content_html"
t.integer "cached_markdown_version"
end
add_index "snippets", ["author_id"], name: "index_snippets_on_author_id", using: :btree
......
module Banzai
module Renderer
module_function
# Convert a Markdown String into an HTML-safe String of HTML
#
# Note that while the returned HTML will have been sanitized of dangerous
......@@ -16,7 +14,7 @@ module Banzai
# context - Hash of context options passed to our HTML Pipeline
#
# Returns an HTML-safe String
def render(text, context = {})
def self.render(text, context = {})
cache_key = context.delete(:cache_key)
cache_key = full_cache_key(cache_key, context[:pipeline])
......@@ -35,24 +33,16 @@ module Banzai
# of HTML. This method is analogous to calling render(object.field), but it
# can cache the rendered HTML in the object, rather than Redis.
#
# The context to use is learned from the passed-in object by calling
# #banzai_render_context(field), and cannot be changed. Use #render, passing
# it the field text, if a custom rendering is needed. The generated context
# is returned along with the HTML.
def render_field(object, field)
html_field = object.markdown_cache_field_for(field)
html = object.__send__(html_field)
return html if html.present?
html = cacheless_render_field(object, field)
update_object(object, html_field, html) unless object.new_record? || object.destroyed?
# The context to use is managed by the object and cannot be changed.
# Use #render, passing it the field text, if a custom rendering is needed.
def self.render_field(object, field)
object.refresh_markdown_cache!(do_update: update_object?(object)) unless object.cached_html_up_to_date?(field)
html
object.cached_html_for(field)
end
# Same as +render_field+, but without consulting or updating the cache field
def cacheless_render_field(object, field, options = {})
def self.cacheless_render_field(object, field, options = {})
text = object.__send__(field)
context = object.banzai_render_context(field).merge(options)
......@@ -82,7 +72,7 @@ module Banzai
# texts_and_contexts
# => [{ text: '### Hello',
# context: { cache_key: [note, :note] } }]
def cache_collection_render(texts_and_contexts)
def self.cache_collection_render(texts_and_contexts)
items_collection = texts_and_contexts.each_with_index do |item, index|
context = item[:context]
cache_key = full_cache_multi_key(context.delete(:cache_key), context[:pipeline])
......@@ -111,7 +101,7 @@ module Banzai
items_collection.map { |item| item[:rendered] }
end
def render_result(text, context = {})
def self.render_result(text, context = {})
text = Pipeline[:pre_process].to_html(text, context) if text
Pipeline[context[:pipeline]].call(text, context)
......@@ -130,7 +120,7 @@ module Banzai
# :user - User object
#
# Returns an HTML-safe String
def post_process(html, context)
def self.post_process(html, context)
context = Pipeline[context[:pipeline]].transform_context(context)
pipeline = Pipeline[:post_process]
......@@ -141,7 +131,7 @@ module Banzai
end.html_safe
end
def cacheless_render(text, context = {})
def self.cacheless_render(text, context = {})
Gitlab::Metrics.measure(:banzai_cacheless_render) do
result = render_result(text, context)
......@@ -154,7 +144,7 @@ module Banzai
end
end
def full_cache_key(cache_key, pipeline_name)
def self.full_cache_key(cache_key, pipeline_name)
return unless cache_key
["banzai", *cache_key, pipeline_name || :full]
end
......@@ -162,13 +152,14 @@ module Banzai
# To map Rails.cache.read_multi results we need to know the Rails.cache.expanded_key.
# Other option will be to generate stringified keys on our side and don't delegate to Rails.cache.expanded_key
# method.
def full_cache_multi_key(cache_key, pipeline_name)
def self.full_cache_multi_key(cache_key, pipeline_name)
return unless cache_key
Rails.cache.send(:expanded_key, full_cache_key(cache_key, pipeline_name))
end
def update_object(object, html_field, html)
object.update_column(html_field, html)
# GitLab EE needs to disable updates on GET requests in Geo
def self.update_object?(object)
true
end
end
end
......@@ -138,6 +138,11 @@ module Gitlab
@series_prefix ||= Sidekiq.server? ? 'sidekiq_' : 'rails_'
end
# Allow access from other metrics related middlewares
def self.current_transaction
Transaction.current
end
# When enabled this should be set before being used as the usual pattern
# "@foo ||= bar" is _not_ thread-safe.
if enabled?
......@@ -149,10 +154,5 @@ module Gitlab
new(udp: { host: host, port: port })
end
end
# Allow access from other metrics related middlewares
def self.current_transaction
Transaction.current
end
end
end
......@@ -21,12 +21,7 @@ namespace :cache do
end
end
desc "GitLab | Clear database cache (in the background)"
task db: :environment do
ClearDatabaseCacheWorker.perform_async
end
task all: [:db, :redis]
task all: [:redis]
end
task clear: 'cache:clear:redis'
......
......@@ -4,13 +4,13 @@ describe Banzai::ObjectRenderer do
let(:project) { create(:empty_project) }
let(:user) { project.owner }
let(:renderer) { described_class.new(project, user, custom_value: 'value') }
let(:object) { Note.new(note: 'hello', note_html: '<p>hello</p>') }
let(:object) { Note.new(note: 'hello', note_html: '<p dir="auto">hello</p>', cached_markdown_version: CacheMarkdownField::CACHE_VERSION) }
describe '#render' do
it 'renders and redacts an Array of objects' do
renderer.render([object], :note)
expect(object.redacted_note_html).to eq '<p>hello</p>'
expect(object.redacted_note_html).to eq '<p dir="auto">hello</p>'
expect(object.user_visible_reference_count).to eq 0
end
......
require 'spec_helper'
describe Banzai::Renderer do
def expect_render(project = :project)
expected_context = { project: project }
expect(renderer).to receive(:cacheless_render) { :html }.with(:markdown, expected_context)
end
def expect_cache_update
expect(object).to receive(:update_column).with("field_html", :html)
end
def fake_object(*features)
markdown = :markdown if features.include?(:markdown)
html = :html if features.include?(:html)
object = double(
"object",
banzai_render_context: { project: :project },
field: markdown,
field_html: html
)
def fake_object(fresh:)
object = double('object')
allow(object).to receive(:markdown_cache_field_for).with(:field).and_return("field_html")
allow(object).to receive(:new_record?).and_return(features.include?(:new))
allow(object).to receive(:destroyed?).and_return(features.include?(:destroyed))
allow(object).to receive(:cached_html_up_to_date?).with(:field).and_return(fresh)
allow(object).to receive(:cached_html_for).with(:field).and_return('field_html')
object
end
describe "#render_field" do
describe '#render_field' do
let(:renderer) { Banzai::Renderer }
let(:subject) { renderer.render_field(object, :field) }
subject { renderer.render_field(object, :field) }
context "with an empty cache" do
let(:object) { fake_object(:markdown) }
it "caches and returns the result" do
expect_render
expect_cache_update
expect(subject).to eq(:html)
end
end
context 'with a stale cache' do
let(:object) { fake_object(fresh: false) }
context "with a filled cache" do
let(:object) { fake_object(:markdown, :html) }
it 'caches and returns the result' do
expect(object).to receive(:refresh_markdown_cache!).with(do_update: true)
it "uses the cache" do
expect_render.never
expect_cache_update.never
should eq(:html)
is_expected.to eq('field_html')
end
end
context "new object" do
let(:object) { fake_object(:new, :markdown) }
it "doesn't cache the result" do
expect_render
expect_cache_update.never
expect(subject).to eq(:html)
end
end
context 'with an up-to-date cache' do
let(:object) { fake_object(fresh: true) }
context "destroyed object" do
let(:object) { fake_object(:destroyed, :markdown) }
it 'uses the cache' do
expect(object).to receive(:refresh_markdown_cache!).never
it "doesn't cache the result" do
expect_render
expect_cache_update.never
expect(subject).to eq(:html)
is_expected.to eq('field_html')
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment