Commit e20c483a authored by Stan Hu's avatar Stan Hu

Roll back support for caching encoding detection

This feature can lead to correctness issues with binary detection (see
https://gitlab.com/gitlab-org/gitlab/-/issues/340013) since blob data is
not guaranteed to be loaded entirely when detection is run.

Benchmarks in https://gitlab.com/gitlab-org/gitlab/-/issues/34001 show
on staging that the cost of Redis lookups can be 80x slower than just
running Charlock Holmes of up to 8000 characters per file.

Relates to:

* https://gitlab.com/gitlab-org/gitlab/-/issues/340013
* https://gitlab.com/gitlab-org/gitlab/-/issues/328819

Changelog: changed
parent 535c8ba6
---
name: cached_encoding_detection
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60128
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/328819
milestone: '13.12'
type: development
group: group::source code
default_enabled: false
...@@ -40,15 +40,7 @@ module Gitlab ...@@ -40,15 +40,7 @@ module Gitlab
def detect_encoding(data, limit: CharlockHolmes::EncodingDetector::DEFAULT_BINARY_SCAN_LEN, cache_key: nil) def detect_encoding(data, limit: CharlockHolmes::EncodingDetector::DEFAULT_BINARY_SCAN_LEN, cache_key: nil)
return if data.nil? return if data.nil?
if Feature.enabled?(:cached_encoding_detection, type: :development, default_enabled: :yaml) CharlockHolmes::EncodingDetector.new(limit).detect(data)
return CharlockHolmes::EncodingDetector.new(limit).detect(data) unless cache_key.present?
Rails.cache.fetch([:detect_binary, CharlockHolmes::VERSION, cache_key], expires_in: 1.week) do
CharlockHolmes::EncodingDetector.new(limit).detect(data)
end
else
CharlockHolmes::EncodingDetector.new(limit).detect(data)
end
end end
def detect_binary?(data, detect = nil) def detect_binary?(data, detect = nil)
......
...@@ -241,7 +241,7 @@ RSpec.describe Gitlab::EncodingHelper do ...@@ -241,7 +241,7 @@ RSpec.describe Gitlab::EncodingHelper do
let(:data) { binary_string } let(:data) { binary_string }
let(:kwargs) { {} } let(:kwargs) { {} }
shared_examples 'detects encoding' do context 'detects encoding' do
it { is_expected.to be_a(Hash) } it { is_expected.to be_a(Hash) }
it 'correctly detects the binary' do it 'correctly detects the binary' do
...@@ -264,33 +264,5 @@ RSpec.describe Gitlab::EncodingHelper do ...@@ -264,33 +264,5 @@ RSpec.describe Gitlab::EncodingHelper do
end end
end end
end end
context 'cached_encoding_detection is enabled' do
before do
stub_feature_flags(cached_encoding_detection: true)
end
it_behaves_like 'detects encoding'
context 'cache_key is provided' do
let(:kwargs) do
{ cache_key: %w(foo bar) }
end
it 'uses that cache_key to serve from the cache' do
expect(Rails.cache).to receive(:fetch).with([:detect_binary, CharlockHolmes::VERSION, %w(foo bar)], expires_in: 1.week).and_call_original
expect(subject[:type]).to eq(:binary)
end
end
end
context 'cached_encoding_detection is disabled' do
before do
stub_feature_flags(cached_encoding_detection: false)
end
it_behaves_like 'detects encoding'
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment