Commit d549c1e2 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '212661-compress-highlight-diffs-for-caching' into 'master'

Gzip diffs to improve storage usage

See merge request gitlab-org/gitlab!29616
parents 4f06814b 5b407194
...@@ -94,7 +94,11 @@ module Gitlab ...@@ -94,7 +94,11 @@ module Gitlab
Gitlab::Redis::Cache.with do |redis| Gitlab::Redis::Cache.with do |redis|
redis.pipelined do redis.pipelined do
hash.each do |diff_file_id, highlighted_diff_lines_hash| hash.each do |diff_file_id, highlighted_diff_lines_hash|
redis.hset(key, diff_file_id, highlighted_diff_lines_hash.to_json) redis.hset(
key,
diff_file_id,
compose_data(highlighted_diff_lines_hash.to_json)
)
end end
# HSETs have to have their expiration date manually updated # HSETs have to have their expiration date manually updated
...@@ -152,12 +156,45 @@ module Gitlab ...@@ -152,12 +156,45 @@ module Gitlab
end end
results.map! do |result| results.map! do |result|
JSON.parse(result, symbolize_names: true) unless result.nil? JSON.parse(extract_data(result), symbolize_names: true) unless result.nil?
end end
file_paths.zip(results).to_h file_paths.zip(results).to_h
end end
def compose_data(json_data)
if ::Feature.enabled?(:gzip_diff_cache, default_enabled: true)
# #compress returns ASCII-8BIT, so we need to force the encoding to
# UTF-8 before caching it in redis, else we risk encoding mismatch
# errors.
#
ActiveSupport::Gzip.compress(json_data).force_encoding("UTF-8")
else
json_data
end
rescue Zlib::GzipFile::Error
json_data
end
def extract_data(data)
# Since when we deploy this code, we'll be dealing with an already
# populated cache full of data that isn't gzipped, we want to also
# check to see if the data is gzipped before we attempt to #decompress
# it, thus we check the first 2 bytes for "\x1F\x8B" to confirm it is
# a gzipped string. While a non-gzipped string will raise a
# Zlib::GzipFile::Error, which we're rescuing, we don't want to count
# on rescue for control flow. This check can be removed in the release
# after this change is released.
#
if ::Feature.enabled?(:gzip_diff_cache, default_enabled: true) && data[0..1] == "\x1F\x8B"
ActiveSupport::Gzip.decompress(data)
else
data
end
rescue Zlib::GzipFile::Error
data
end
def cacheable?(diff_file) def cacheable?(diff_file)
diffable.present? && diff_file.text? && diff_file.diffable? diffable.present? && diff_file.text? && diff_file.diffable?
end end
......
...@@ -113,7 +113,7 @@ describe Gitlab::Diff::HighlightCache, :clean_gitlab_redis_cache do ...@@ -113,7 +113,7 @@ describe Gitlab::Diff::HighlightCache, :clean_gitlab_redis_cache do
allow(redis).to receive(:info).and_return({ "redis_version" => "3.0.0" }) allow(redis).to receive(:info).and_return({ "redis_version" => "3.0.0" })
expect(described_class.gitlab_redis_diff_caching_memory_usage_bytes) expect(described_class.gitlab_redis_diff_caching_memory_usage_bytes)
.not_to receive(:observe).and_call_original .not_to receive(:observe)
cache.send(:write_to_redis_hash, diff_hash) cache.send(:write_to_redis_hash, diff_hash)
end end
...@@ -163,6 +163,56 @@ describe Gitlab::Diff::HighlightCache, :clean_gitlab_redis_cache do ...@@ -163,6 +163,56 @@ describe Gitlab::Diff::HighlightCache, :clean_gitlab_redis_cache do
end end
end end
describe "GZip usage" do
let(:diff_file) do
diffs = merge_request.diffs
raw_diff = diffs.diffable.raw_diffs(diffs.diff_options.merge(paths: ['CHANGELOG'])).first
Gitlab::Diff::File.new(raw_diff,
repository: diffs.project.repository,
diff_refs: diffs.diff_refs,
fallback_diff_refs: diffs.fallback_diff_refs)
end
context "feature flag :gzip_diff_cache disabled" do
before do
stub_feature_flags(gzip_diff_cache: true)
end
it "uses ActiveSupport::Gzip when reading from the cache" do
expect(ActiveSupport::Gzip).to receive(:decompress).at_least(:once).and_call_original
cache.write_if_empty
cache.decorate(diff_file)
end
it "uses ActiveSupport::Gzip to compress data when writing to cache" do
expect(ActiveSupport::Gzip).to receive(:compress).and_call_original
cache.send(:write_to_redis_hash, diff_hash)
end
end
context "feature flag :gzip_diff_cache disabled" do
before do
stub_feature_flags(gzip_diff_cache: false)
end
it "doesn't use ActiveSupport::Gzip when reading from the cache" do
expect(ActiveSupport::Gzip).not_to receive(:decompress)
cache.write_if_empty
cache.decorate(diff_file)
end
it "doesn't use ActiveSupport::Gzip to compress data when writing to cache" do
expect(ActiveSupport::Gzip).not_to receive(:compress)
expect { cache.send(:write_to_redis_hash, diff_hash) }
.to change { Gitlab::Redis::Cache.with { |r| r.hgetall(cache_key) } }
end
end
end
describe 'metrics' do describe 'metrics' do
it 'defines :gitlab_redis_diff_caching_memory_usage_bytes histogram' do it 'defines :gitlab_redis_diff_caching_memory_usage_bytes histogram' do
expect(described_class).to respond_to(:gitlab_redis_diff_caching_memory_usage_bytes) expect(described_class).to respond_to(:gitlab_redis_diff_caching_memory_usage_bytes)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment