Commit b6f86010 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch 'limit-grape-param-log-size' into 'master'

Limit size of params array in JSON logs to 10 KiB

See merge request gitlab-org/gitlab!25158
parents 67788cdf f2d677ac
---
title: Limit size of params array in JSON logs to 10 KiB
merge_request: 25158
author:
type: changed
...@@ -28,7 +28,7 @@ unless Gitlab::Runtime.sidekiq? ...@@ -28,7 +28,7 @@ unless Gitlab::Runtime.sidekiq?
payload = { payload = {
time: Time.now.utc.iso8601(3), time: Time.now.utc.iso8601(3),
params: params, params: Gitlab::Utils::LogLimitedArray.log_limited_array(params),
remote_ip: event.payload[:remote_ip], remote_ip: event.payload[:remote_ip],
user_id: event.payload[:user_id], user_id: event.payload[:user_id],
username: event.payload[:username], username: event.payload[:username],
......
...@@ -25,9 +25,12 @@ module Gitlab ...@@ -25,9 +25,12 @@ module Gitlab
def process_params(data) def process_params(data)
return [] unless data.has_key?(:params) return [] unless data.has_key?(:params)
data[:params] params_array =
.each_pair data[:params]
.map { |k, v| { key: k, value: utf8_encode_values(v) } } .each_pair
.map { |k, v| { key: k, value: utf8_encode_values(v) } }
Gitlab::Utils::LogLimitedArray.log_limited_array(params_array)
end end
def utf8_encode_values(data) def utf8_encode_values(data)
......
...@@ -6,8 +6,6 @@ require 'active_record/log_subscriber' ...@@ -6,8 +6,6 @@ require 'active_record/log_subscriber'
module Gitlab module Gitlab
module SidekiqLogging module SidekiqLogging
class StructuredLogger class StructuredLogger
MAXIMUM_JOB_ARGUMENTS_LENGTH = 10.kilobytes
def call(job, queue) def call(job, queue)
started_time = get_time started_time = get_time
base_payload = parse_job(job) base_payload = parse_job(job)
...@@ -85,7 +83,7 @@ module Gitlab ...@@ -85,7 +83,7 @@ module Gitlab
job['pid'] = ::Process.pid job['pid'] = ::Process.pid
job.delete('args') unless ENV['SIDEKIQ_LOG_ARGUMENTS'] job.delete('args') unless ENV['SIDEKIQ_LOG_ARGUMENTS']
job['args'] = limited_job_args(job['args']) if job['args'] job['args'] = Gitlab::Utils::LogLimitedArray.log_limited_array(job['args']) if job['args']
job job
end end
...@@ -108,21 +106,6 @@ module Gitlab ...@@ -108,21 +106,6 @@ module Gitlab
def current_time def current_time
Gitlab::Metrics::System.monotonic_time Gitlab::Metrics::System.monotonic_time
end end
def limited_job_args(args)
return unless args.is_a?(Array)
total_length = 0
limited_args = args.take_while do |arg|
total_length += arg.to_json.length
total_length <= MAXIMUM_JOB_ARGUMENTS_LENGTH
end
limited_args.push('...') if total_length > MAXIMUM_JOB_ARGUMENTS_LENGTH
limited_args
end
end end
end end
end end
# frozen_string_literal: true
module Gitlab
module Utils
module LogLimitedArray
MAXIMUM_ARRAY_LENGTH = 10.kilobytes
# Prepare an array for logging by limiting its JSON representation
# to around 10 kilobytes. Once we hit the limit, add "..." as the
# last item in the returned array.
def self.log_limited_array(array)
return [] unless array.is_a?(Array)
total_length = 0
limited_array = array.take_while do |arg|
total_length += arg.to_json.length
total_length <= MAXIMUM_ARRAY_LENGTH
end
limited_array.push('...') if total_length > MAXIMUM_ARRAY_LENGTH
limited_array
end
end
end
end
...@@ -5,16 +5,37 @@ require 'spec_helper' ...@@ -5,16 +5,37 @@ require 'spec_helper'
describe 'lograge', type: :request do describe 'lograge', type: :request do
let(:headers) { { 'X-Request-ID' => 'new-correlation-id' } } let(:headers) { { 'X-Request-ID' => 'new-correlation-id' } }
context 'for API requests' do let(:large_params) do
subject { get("/api/v4/endpoint", params: {}, headers: headers) } half_limit = Gitlab::Utils::LogLimitedArray::MAXIMUM_ARRAY_LENGTH / 2
{
a: 'a',
b: 'b' * half_limit,
c: 'c' * half_limit,
d: 'd'
}
end
let(:limited_params) do
large_params.slice(:a, :b).map { |k, v| { key: k.to_s, value: v } } + ['...']
end
context 'for API requests' do
it 'logs to api_json log' do it 'logs to api_json log' do
# we assert receiving parameters by grape logger # we assert receiving parameters by grape logger
expect_any_instance_of(Gitlab::GrapeLogging::Formatters::LogrageWithTimestamp).to receive(:call) expect_any_instance_of(Gitlab::GrapeLogging::Formatters::LogrageWithTimestamp).to receive(:call)
.with(anything, anything, anything, a_hash_including("correlation_id" => "new-correlation-id")) .with(anything, anything, anything, a_hash_including("correlation_id" => "new-correlation-id"))
.and_call_original .and_call_original
subject get("/api/v4/endpoint", params: {}, headers: headers)
end
it 'limits param size' do
expect(Lograge.formatter).to receive(:call)
.with(a_hash_including(params: limited_params))
.and_call_original
get("/api/v4/endpoint", params: large_params, headers: headers)
end end
end end
...@@ -67,6 +88,14 @@ describe 'lograge', type: :request do ...@@ -67,6 +88,14 @@ describe 'lograge', type: :request do
subject subject
end end
it 'limits param size' do
expect(Lograge.formatter).to receive(:call)
.with(a_hash_including(params: limited_params))
.and_call_original
get("/", params: large_params, headers: headers)
end
end end
context 'with a log subscriber' do context 'with a log subscriber' do
......
...@@ -99,13 +99,8 @@ describe Gitlab::SidekiqLogging::StructuredLogger do ...@@ -99,13 +99,8 @@ describe Gitlab::SidekiqLogging::StructuredLogger do
context 'when the job args are bigger than the maximum allowed' do context 'when the job args are bigger than the maximum allowed' do
it 'keeps args from the front until they exceed the limit' do it 'keeps args from the front until they exceed the limit' do
Timecop.freeze(timestamp) do Timecop.freeze(timestamp) do
job['args'] = [ half_limit = Gitlab::Utils::LogLimitedArray::MAXIMUM_ARRAY_LENGTH / 2
1, job['args'] = [1, 2, 'a' * half_limit, 'b' * half_limit, 3]
2,
'a' * (described_class::MAXIMUM_JOB_ARGUMENTS_LENGTH / 2),
'b' * (described_class::MAXIMUM_JOB_ARGUMENTS_LENGTH / 2),
3
]
expected_args = job['args'].take(3) + ['...'] expected_args = job['args'].take(3) + ['...']
......
# frozen_string_literal: true
require 'fast_spec_helper'
describe Gitlab::Utils::LogLimitedArray do
describe '.log_limited_array' do
context 'when the argument is not an array' do
it 'returns an empty array' do
expect(described_class.log_limited_array('aa')).to eq([])
end
end
context 'when the argument is an array' do
context 'when the array is under the limit' do
it 'returns the array unchanged' do
expect(described_class.log_limited_array(%w(a b))).to eq(%w(a b))
end
end
context 'when the array exceeds the limit' do
it 'replaces arguments after the limit with an ellipsis string' do
half_limit = described_class::MAXIMUM_ARRAY_LENGTH / 2
long_array = ['a' * half_limit, 'b' * half_limit, 'c']
expect(described_class.log_limited_array(long_array))
.to eq(long_array.take(1) + ['...'])
end
end
context 'when the array contains arrays and hashes' do
it 'calculates the size based on the JSON representation' do
long_array = [
'a',
['b'] * 10,
{ c: 'c' * 10 },
# Each character in the array takes up four characters: the
# character itself, the two quotes, and the comma (closing
# square bracket for the last item)
['d'] * (described_class::MAXIMUM_ARRAY_LENGTH / 4),
'e'
]
expect(described_class.log_limited_array(long_array))
.to eq(long_array.take(3) + ['...'])
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment