Commit c87734e1 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch '30550-cache-highlight-mr-diffs-file-by-file' into 'master'

Add redis caching of individual highlighted diffs

Closes #30550

See merge request gitlab-org/gitlab!19917
parents 2a94363a 51b8a760
# frozen_string_literal: true
#
module Gitlab
module Diff
class DeprecatedHighlightCache
delegate :diffable, to: :@diff_collection
delegate :diff_options, to: :@diff_collection
def initialize(diff_collection, backend: Rails.cache)
@backend = backend
@diff_collection = diff_collection
end
# - Reads from cache
# - Assigns DiffFile#highlighted_diff_lines for cached files
def decorate(diff_file)
if content = read_file(diff_file)
diff_file.highlighted_diff_lines = content.map do |line|
Gitlab::Diff::Line.init_from_hash(line)
end
end
end
# It populates a Hash in order to submit a single write to the memory
# cache. This avoids excessive IO generated by N+1's (1 writing for
# each highlighted line or file).
def write_if_empty
return if cached_content.present?
@diff_collection.diff_files.each do |diff_file|
next unless cacheable?(diff_file)
diff_file_id = diff_file.file_identifier
cached_content[diff_file_id] = diff_file.highlighted_diff_lines.map(&:to_hash)
end
cache.write(key, cached_content, expires_in: 1.week)
end
def clear
cache.delete(key)
end
def key
[diffable, 'highlighted-diff-files', Gitlab::Diff::Line::SERIALIZE_KEYS, diff_options]
end
private
def read_file(diff_file)
cached_content[diff_file.file_identifier]
end
def cache
@backend
end
def cached_content
@cached_content ||= cache.read(key) || {}
end
def cacheable?(diff_file)
diffable.present? && diff_file.text? && diff_file.diffable?
end
end
end
end
...@@ -37,7 +37,11 @@ module Gitlab ...@@ -37,7 +37,11 @@ module Gitlab
private private
def cache def cache
@cache ||= Gitlab::Diff::HighlightCache.new(self) @cache ||= if Feature.enabled?(:hset_redis_diff_caching, project)
Gitlab::Diff::HighlightCache.new(self)
else
Gitlab::Diff::DeprecatedHighlightCache.new(self)
end
end end
end end
end end
......
...@@ -3,16 +3,19 @@ ...@@ -3,16 +3,19 @@
module Gitlab module Gitlab
module Diff module Diff
class HighlightCache class HighlightCache
delegate :diffable, to: :@diff_collection EXPIRATION = 1.week
VERSION = 1
delegate :diffable, to: :@diff_collection
delegate :diff_options, to: :@diff_collection delegate :diff_options, to: :@diff_collection
def initialize(diff_collection, backend: Rails.cache) def initialize(diff_collection)
@backend = backend
@diff_collection = diff_collection @diff_collection = diff_collection
end end
# - Reads from cache # - Reads from cache
# - Assigns DiffFile#highlighted_diff_lines for cached files # - Assigns DiffFile#highlighted_diff_lines for cached files
#
def decorate(diff_file) def decorate(diff_file)
if content = read_file(diff_file) if content = read_file(diff_file)
diff_file.highlighted_diff_lines = content.map do |line| diff_file.highlighted_diff_lines = content.map do |line|
...@@ -21,43 +24,95 @@ module Gitlab ...@@ -21,43 +24,95 @@ module Gitlab
end end
end end
# It populates a Hash in order to submit a single write to the memory # For every file that isn't already contained in the redis hash, store the
# cache. This avoids excessive IO generated by N+1's (1 writing for # result of #highlighted_diff_lines, then submit the uncached content
# each highlighted line or file). # to #write_to_redis_hash to submit a single write. This avoids excessive
# IO generated by N+1's (1 writing for each highlighted line or file).
#
def write_if_empty def write_if_empty
return if cached_content.present? return if uncached_files.empty?
@diff_collection.diff_files.each do |diff_file| new_cache_content = {}
uncached_files.each do |diff_file|
next unless cacheable?(diff_file) next unless cacheable?(diff_file)
diff_file_id = diff_file.file_identifier new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash)
cached_content[diff_file_id] = diff_file.highlighted_diff_lines.map(&:to_hash)
end end
cache.write(key, cached_content, expires_in: 1.week) write_to_redis_hash(new_cache_content)
end end
def clear def clear
cache.delete(key) Gitlab::Redis::Cache.with do |redis|
redis.del(key)
end
end end
def key def key
[diffable, 'highlighted-diff-files', Gitlab::Diff::Line::SERIALIZE_KEYS, diff_options] @redis_key ||= ['highlighted-diff-files', diffable.cache_key, VERSION, diff_options].join(":")
end end
private private
def read_file(diff_file) def uncached_files
cached_content[diff_file.file_identifier] diff_files = @diff_collection.diff_files
diff_files.select { |file| read_cache[file.file_path].nil? }
end end
def cache # Given a hash of:
@backend # { "file/to/cache" =>
# [ { line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_19_19",
# rich_text: " <span id=\"LC19\" class=\"line\" lang=\"plaintext\">config/initializers/secret_token.rb</span>\n",
# text: " config/initializers/secret_token.rb",
# type: nil,
# index: 3,
# old_pos: 19,
# new_pos: 19 }
# ] }
#
# ...it will write/update a Gitlab::Redis hash (HSET)
#
def write_to_redis_hash(hash)
Gitlab::Redis::Cache.with do |redis|
redis.pipelined do
hash.each do |diff_file_id, highlighted_diff_lines_hash|
redis.hset(key, diff_file_id, highlighted_diff_lines_hash.to_json)
end
# HSETs have to have their expiration date manually updated
#
redis.expire(key, EXPIRATION)
end
end
end
def file_paths
@file_paths ||= @diff_collection.diffs.collect(&:file_path)
end
def read_file(diff_file)
cached_content[diff_file.file_path]
end end
def cached_content def cached_content
@cached_content ||= cache.read(key) || {} @cached_content ||= read_cache
end
def read_cache
return {} unless file_paths.any?
results = []
Gitlab::Redis::Cache.with do |redis|
results = redis.hmget(key, file_paths)
end
results.map! do |result|
JSON.parse(result, symbolize_names: true) unless result.nil?
end
file_paths.zip(results).to_h
end end
def cacheable?(diff_file) def cacheable?(diff_file)
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
module Gitlab module Gitlab
module Diff module Diff
class Line class Line
# When SERIALIZE_KEYS is updated, to reset the redis cache entries you'll
# need to bump the VERSION constant on Gitlab::Diff::HighlightCache
#
SERIALIZE_KEYS = %i(line_code rich_text text type index old_pos new_pos).freeze SERIALIZE_KEYS = %i(line_code rich_text text type index old_pos new_pos).freeze
attr_reader :line_code, :type, :old_pos, :new_pos attr_reader :line_code, :type, :old_pos, :new_pos
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Diff::DeprecatedHighlightCache do
let(:merge_request) { create(:merge_request_with_diffs) }
subject(:cache) { described_class.new(merge_request.diffs, backend: backend) }
describe '#decorate' do
let(:backend) { double('backend').as_null_object }
# Manually creates a Diff::File object to avoid triggering the cache on
# the FileCollection::MergeRequestDiff
let(:diff_file) do
diffs = merge_request.diffs
raw_diff = diffs.diffable.raw_diffs(diffs.diff_options.merge(paths: ['CHANGELOG'])).first
Gitlab::Diff::File.new(raw_diff,
repository: diffs.project.repository,
diff_refs: diffs.diff_refs,
fallback_diff_refs: diffs.fallback_diff_refs)
end
it 'does not calculate highlighting when reading from cache' do
cache.write_if_empty
cache.decorate(diff_file)
expect_any_instance_of(Gitlab::Diff::Highlight).not_to receive(:highlight)
diff_file.highlighted_diff_lines
end
it 'assigns highlighted diff lines to the DiffFile' do
cache.write_if_empty
cache.decorate(diff_file)
expect(diff_file.highlighted_diff_lines.size).to be > 5
end
it 'submits a single reading from the cache' do
cache.decorate(diff_file)
cache.decorate(diff_file)
expect(backend).to have_received(:read).with(cache.key).once
end
end
describe '#write_if_empty' do
let(:backend) { double('backend', read: {}).as_null_object }
it 'submits a single writing to the cache' do
cache.write_if_empty
cache.write_if_empty
expect(backend).to have_received(:write).with(cache.key,
hash_including('CHANGELOG-false-false-false'),
expires_in: 1.week).once
end
end
describe '#clear' do
let(:backend) { double('backend').as_null_object }
it 'clears cache' do
cache.clear
expect(backend).to have_received(:delete).with(cache.key)
end
end
end
...@@ -29,13 +29,19 @@ describe Gitlab::Diff::FileCollection::MergeRequestDiff do ...@@ -29,13 +29,19 @@ describe Gitlab::Diff::FileCollection::MergeRequestDiff do
let(:diffable) { merge_request.merge_request_diff } let(:diffable) { merge_request.merge_request_diff }
end end
it 'uses a different cache key if diff line keys change' do context 'using Gitlab::Diff::DeprecatedHighlightCache' do
mr_diff = described_class.new(merge_request.merge_request_diff, diff_options: nil) before do
key = mr_diff.cache_key stub_feature_flags(hset_redis_diff_caching: false)
end
it 'uses a different cache key if diff line keys change' do
mr_diff = described_class.new(merge_request.merge_request_diff, diff_options: nil)
key = mr_diff.cache_key
stub_const('Gitlab::Diff::Line::SERIALIZE_KEYS', [:foo]) stub_const('Gitlab::Diff::Line::SERIALIZE_KEYS', [:foo])
expect(mr_diff.cache_key).not_to eq(key) expect(mr_diff.cache_key).not_to eq(key)
end
end end
it_behaves_like 'diff statistics' do it_behaves_like 'diff statistics' do
......
...@@ -2,14 +2,46 @@ ...@@ -2,14 +2,46 @@
require 'spec_helper' require 'spec_helper'
describe Gitlab::Diff::HighlightCache do describe Gitlab::Diff::HighlightCache, :clean_gitlab_redis_cache do
let(:merge_request) { create(:merge_request_with_diffs) } let(:merge_request) { create(:merge_request_with_diffs) }
let(:diff_hash) do
{ ".gitignore-false-false-false" =>
[{ line_code: nil, rich_text: nil, text: "@@ -17,3 +17,4 @@ rerun.txt", type: "match", index: 0, old_pos: 17, new_pos: 17 },
{ line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_17_17",
rich_text: " <span id=\"LC17\" class=\"line\" lang=\"plaintext\">pickle-email-*.html</span>\n",
text: " pickle-email-*.html",
type: nil,
index: 1,
old_pos: 17,
new_pos: 17 },
{ line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_18_18",
rich_text: " <span id=\"LC18\" class=\"line\" lang=\"plaintext\">.project</span>\n",
text: " .project",
type: nil,
index: 2,
old_pos: 18,
new_pos: 18 },
{ line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_19_19",
rich_text: " <span id=\"LC19\" class=\"line\" lang=\"plaintext\">config/initializers/secret_token.rb</span>\n",
text: " config/initializers/secret_token.rb",
type: nil,
index: 3,
old_pos: 19,
new_pos: 19 },
{ line_code: "a5cc2925ca8258af241be7e5b0381edf30266302_20_20",
rich_text: "+<span id=\"LC20\" class=\"line\" lang=\"plaintext\">.DS_Store</span>",
text: "+.DS_Store",
type: "new",
index: 4,
old_pos: 20,
new_pos: 20 }] }
end
subject(:cache) { described_class.new(merge_request.diffs, backend: backend) } let(:cache_key) { cache.key }
describe '#decorate' do subject(:cache) { described_class.new(merge_request.diffs) }
let(:backend) { double('backend').as_null_object }
describe '#decorate' do
# Manually creates a Diff::File object to avoid triggering the cache on # Manually creates a Diff::File object to avoid triggering the cache on
# the FileCollection::MergeRequestDiff # the FileCollection::MergeRequestDiff
let(:diff_file) do let(:diff_file) do
...@@ -36,35 +68,43 @@ describe Gitlab::Diff::HighlightCache do ...@@ -36,35 +68,43 @@ describe Gitlab::Diff::HighlightCache do
expect(diff_file.highlighted_diff_lines.size).to be > 5 expect(diff_file.highlighted_diff_lines.size).to be > 5
end end
end
it 'submits a single reading from the cache' do describe '#write_if_empty' do
cache.decorate(diff_file) it 'filters the key/value list of entries to be caches for each invocation' do
cache.decorate(diff_file) expect(cache).to receive(:write_to_redis_hash)
.once.with(hash_including(".gitignore")).and_call_original
expect(cache).to receive(:write_to_redis_hash).once.with({}).and_call_original
expect(backend).to have_received(:read).with(cache.key).once 2.times { cache.write_if_empty }
end end
end
describe '#write_if_empty' do context 'different diff_collections for the same diffable' do
let(:backend) { double('backend', read: {}).as_null_object } before do
cache.write_if_empty
end
it 'submits a single writing to the cache' do it 'writes an uncached files in the collection to the same redis hash' do
cache.write_if_empty Gitlab::Redis::Cache.with { |r| r.hdel(cache_key, "files/whitespace") }
cache.write_if_empty
expect(backend).to have_received(:write).with(cache.key, expect { cache.write_if_empty }
hash_including('CHANGELOG-false-false-false'), .to change { Gitlab::Redis::Cache.with { |r| r.hgetall(cache_key) } }
expires_in: 1.week).once end
end end
end end
describe '#clear' do describe '#write_to_redis_hash' do
let(:backend) { double('backend').as_null_object } it 'creates or updates a Redis hash' do
expect { cache.send(:write_to_redis_hash, diff_hash) }
.to change { Gitlab::Redis::Cache.with { |r| r.hgetall(cache_key) } }
end
end
describe '#clear' do
it 'clears cache' do it 'clears cache' do
cache.clear expect_any_instance_of(Redis).to receive(:del).with(cache_key)
expect(backend).to have_received(:delete).with(cache.key) cache.clear
end end
end end
end end
...@@ -33,13 +33,34 @@ describe MergeRequests::ReloadDiffsService, :use_clean_rails_memory_store_cachin ...@@ -33,13 +33,34 @@ describe MergeRequests::ReloadDiffsService, :use_clean_rails_memory_store_cachin
end end
context 'cache clearing' do context 'cache clearing' do
it 'clears the cache for older diffs on the merge request' do context 'using Gitlab::Diff::DeprecatedHighlightCache' do
old_diff = merge_request.merge_request_diff before do
old_cache_key = old_diff.diffs_collection.cache_key stub_feature_flags(hset_redis_diff_caching: false)
end
expect(Rails.cache).to receive(:delete).with(old_cache_key).and_call_original it 'clears the cache for older diffs on the merge request' do
old_diff = merge_request.merge_request_diff
old_cache_key = old_diff.diffs_collection.cache_key
subject.execute expect(Rails.cache).to receive(:delete).with(old_cache_key).and_call_original
subject.execute
end
end
context 'using Gitlab::Diff::HighlightCache' do
before do
stub_feature_flags(hset_redis_diff_caching: true)
end
it 'clears the cache for older diffs on the merge request' do
old_diff = merge_request.merge_request_diff
old_cache_key = old_diff.diffs_collection.cache_key
expect_any_instance_of(Redis).to receive(:del).with(old_cache_key).and_call_original
subject.execute
end
end end
it 'avoids N+1 queries', :request_store do it 'avoids N+1 queries', :request_store do
......
...@@ -87,10 +87,28 @@ describe Notes::CreateService do ...@@ -87,10 +87,28 @@ describe Notes::CreateService do
.to receive(:unfolded_diff?) { true } .to receive(:unfolded_diff?) { true }
end end
it 'clears noteable diff cache when it was unfolded for the note position' do context 'using Gitlab::Diff::DeprecatedHighlightCache' do
expect_any_instance_of(Gitlab::Diff::HighlightCache).to receive(:clear) before do
stub_feature_flags(hset_redis_diff_caching: false)
end
it 'clears noteable diff cache when it was unfolded for the note position' do
expect_any_instance_of(Gitlab::Diff::DeprecatedHighlightCache).to receive(:clear)
described_class.new(project_with_repo, user, new_opts).execute
end
end
described_class.new(project_with_repo, user, new_opts).execute context 'using Gitlab::Diff::HighlightCache' do
before do
stub_feature_flags(hset_redis_diff_caching: true)
end
it 'clears noteable diff cache when it was unfolded for the note position' do
expect_any_instance_of(Gitlab::Diff::HighlightCache).to receive(:clear)
described_class.new(project_with_repo, user, new_opts).execute
end
end end
it 'does not clear cache when note is not the first of the discussion' do it 'does not clear cache when note is not the first of the discussion' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment