Commit cf057764 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '225600-add-gzip-compression-to-discussion-diffs' into 'master'

Apply GZip compression to discussion diffs

Closes #225600

See merge request gitlab-org/gitlab!40778
parents afba310e 767737df
---
title: Apply GZip compression to discussion diffs
merge_request: 40778
author:
type: performance
......@@ -3,6 +3,7 @@
module Gitlab
module Diff
class HighlightCache
include Gitlab::Utils::Gzip
include Gitlab::Utils::StrongMemoize
EXPIRATION = 1.week
......@@ -83,7 +84,7 @@ module Gitlab
redis.hset(
key,
diff_file_id,
compose_data(highlighted_diff_lines_hash.to_json)
gzip_compress(highlighted_diff_lines_hash.to_json)
)
end
......@@ -145,35 +146,12 @@ module Gitlab
end
results.map! do |result|
Gitlab::Json.parse(extract_data(result), symbolize_names: true) unless result.nil?
Gitlab::Json.parse(gzip_decompress(result), symbolize_names: true) unless result.nil?
end
file_paths.zip(results).to_h
end
def compose_data(json_data)
# #compress returns ASCII-8BIT, so we need to force the encoding to
# UTF-8 before caching it in redis, else we risk encoding mismatch
# errors.
#
ActiveSupport::Gzip.compress(json_data).force_encoding("UTF-8")
rescue Zlib::GzipFile::Error
json_data
end
def extract_data(data)
# Since we could be dealing with an already populated cache full of data
# that isn't gzipped, we want to also check to see if the data is
# gzipped before we attempt to #decompress it, thus we check the first
# 2 bytes for "\x1F\x8B" to confirm it is a gzipped string. While a
# non-gzipped string will raise a Zlib::GzipFile::Error, which we're
# rescuing, we don't want to count on rescue for control flow.
#
data[0..1] == "\x1F\x8B" ? ActiveSupport::Gzip.decompress(data) : data
rescue Zlib::GzipFile::Error
data
end
def cacheable?(diff_file)
diffable.present? && diff_file.text? && diff_file.diffable?
end
......
......@@ -3,6 +3,8 @@
module Gitlab
module DiscussionsDiff
class HighlightCache
extend Gitlab::Utils::Gzip
class << self
VERSION = 1
EXPIRATION = 1.week
......@@ -17,7 +19,7 @@ module Gitlab
mapping.each do |raw_key, value|
key = cache_key_for(raw_key)
multi.set(key, value.to_json, ex: EXPIRATION)
multi.set(key, gzip_compress(value.to_json), ex: EXPIRATION)
end
end
end
......@@ -44,7 +46,7 @@ module Gitlab
content.map! do |lines|
next unless lines
Gitlab::Json.parse(lines).map! do |line|
Gitlab::Json.parse(gzip_decompress(lines)).map! do |line|
Gitlab::Diff::Line.safe_init_from_hash(line)
end
end
......
# frozen_string_literal: true
module Gitlab
module Utils
module Gzip
def gzip_compress(data)
# .compress returns ASCII-8BIT, so we need to force the encoding to
# UTF-8 before caching it in redis, else we risk encoding mismatch
# errors.
#
ActiveSupport::Gzip.compress(data).force_encoding("UTF-8")
rescue Zlib::GzipFile::Error
data
end
def gzip_decompress(data)
# Since we could be dealing with an already populated cache full of data
# that isn't gzipped, we want to also check to see if the data is
# gzipped before we attempt to .decompress it, thus we check the first
# 2 bytes for "\x1F\x8B" to confirm it is a gzipped string. While a
# non-gzipped string will raise a Zlib::GzipFile::Error, which we're
# rescuing, we don't want to count on rescue for control flow.
#
data[0..1] == "\x1F\x8B" ? ActiveSupport::Gzip.decompress(data) : data
rescue Zlib::GzipFile::Error
data
end
end
end
end
......@@ -33,9 +33,9 @@ RSpec.describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cach
mapping.each do |key, value|
full_key = described_class.cache_key_for(key)
found = Gitlab::Redis::Cache.with { |r| r.get(full_key) }
found_key = Gitlab::Redis::Cache.with { |r| r.get(full_key) }
expect(found).to eq(value.to_json)
expect(described_class.gzip_decompress(found_key)).to eq(value.to_json)
end
end
end
......
# frozen_string_literal: true
require 'fast_spec_helper'
RSpec.describe Gitlab::Utils::Gzip do
before do
example_class = Class.new do
include Gitlab::Utils::Gzip
def lorem_ipsum
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "\
"tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "\
"veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea "\
"commodo consequat. Duis aute irure dolor in reprehenderit in voluptate "\
"velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat "\
"cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id "\
"est laborum."
end
end
stub_const('ExampleClass', example_class)
end
subject { ExampleClass.new }
let(:sample_string) { subject.lorem_ipsum }
let(:compressed_string) { subject.gzip_compress(sample_string) }
describe "#gzip_compress" do
it "compresses data passed to it" do
expect(compressed_string.length).to be < sample_string.length
end
it "returns uncompressed data when encountering Zlib::GzipFile::Error" do
expect(ActiveSupport::Gzip).to receive(:compress).and_raise(Zlib::GzipFile::Error)
expect(compressed_string.length).to eq sample_string.length
end
end
describe "#gzip_decompress" do
let(:decompressed_string) { subject.gzip_decompress(compressed_string) }
it "decompresses encoded data" do
expect(decompressed_string).to eq sample_string
end
it "returns compressed data when encountering Zlib::GzipFile::Error" do
expect(ActiveSupport::Gzip).to receive(:decompress).and_raise(Zlib::GzipFile::Error)
expect(decompressed_string).not_to eq sample_string.length
end
it "returns unmodified data when it is determined to be uncompressed" do
expect(subject.gzip_decompress(sample_string)).to eq sample_string
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment