Commit e20c483a authored by Stan Hu's avatar Stan Hu

Roll back support for caching encoding detection

This feature can lead to correctness issues with binary detection (see
https://gitlab.com/gitlab-org/gitlab/-/issues/340013) since blob data is
not guaranteed to be loaded entirely when detection is run.

Benchmarks in https://gitlab.com/gitlab-org/gitlab/-/issues/34001 show
on staging that the cost of Redis lookups can be 80x slower than just
running Charlock Holmes of up to 8000 characters per file.

Relates to:

* https://gitlab.com/gitlab-org/gitlab/-/issues/340013
* https://gitlab.com/gitlab-org/gitlab/-/issues/328819

Changelog: changed
parent 535c8ba6
---
name: cached_encoding_detection
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60128
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/328819
milestone: '13.12'
type: development
group: group::source code
default_enabled: false
......@@ -40,16 +40,8 @@ module Gitlab
def detect_encoding(data, limit: CharlockHolmes::EncodingDetector::DEFAULT_BINARY_SCAN_LEN, cache_key: nil)
return if data.nil?
if Feature.enabled?(:cached_encoding_detection, type: :development, default_enabled: :yaml)
return CharlockHolmes::EncodingDetector.new(limit).detect(data) unless cache_key.present?
Rails.cache.fetch([:detect_binary, CharlockHolmes::VERSION, cache_key], expires_in: 1.week) do
CharlockHolmes::EncodingDetector.new(limit).detect(data)
end
else
CharlockHolmes::EncodingDetector.new(limit).detect(data)
end
end
def detect_binary?(data, detect = nil)
detect ||= detect_encoding(data)
......
......@@ -241,7 +241,7 @@ RSpec.describe Gitlab::EncodingHelper do
let(:data) { binary_string }
let(:kwargs) { {} }
shared_examples 'detects encoding' do
context 'detects encoding' do
it { is_expected.to be_a(Hash) }
it 'correctly detects the binary' do
......@@ -264,33 +264,5 @@ RSpec.describe Gitlab::EncodingHelper do
end
end
end
context 'cached_encoding_detection is enabled' do
before do
stub_feature_flags(cached_encoding_detection: true)
end
it_behaves_like 'detects encoding'
context 'cache_key is provided' do
let(:kwargs) do
{ cache_key: %w(foo bar) }
end
it 'uses that cache_key to serve from the cache' do
expect(Rails.cache).to receive(:fetch).with([:detect_binary, CharlockHolmes::VERSION, %w(foo bar)], expires_in: 1.week).and_call_original
expect(subject[:type]).to eq(:binary)
end
end
end
context 'cached_encoding_detection is disabled' do
before do
stub_feature_flags(cached_encoding_detection: false)
end
it_behaves_like 'detects encoding'
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment