Commit 14835eb9 authored by Brian Williams's avatar Brian Williams Committed by Alper Akgun

Fix invalid raw_metadata being persisted

`CreateServiceBase` passes a hash to `raw_metadata` when it should
actually be receiving JSON instead. This change updates
`CreateServiceBase` to pass JSON instead, and also cleans up the invalid
data in the DB with a background migration.

Changelog: fixed
parent 1f194e56
# frozen_string_literal: true
class AddTmpIndexOnReportType < Gitlab::Database::Migration[1.0]
# Temporary index to perform migration fixing invalid vulnerability_occurrences.raw_metadata rows
# Will be removed with https://gitlab.com/gitlab-org/gitlab/-/issues/349605
INDEX_NAME = 'tmp_idx_vulnerability_occurrences_on_id_where_report_type_7_99'
disable_ddl_transaction!
def up
add_concurrent_index :vulnerability_occurrences, :id, where: 'report_type IN (7, 99)', name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :vulnerability_occurrences, INDEX_NAME
end
end
# frozen_string_literal: true
class ConvertStringifiedRawMetadataHashToJson < Gitlab::Database::Migration[1.0]
MIGRATION_CLASS = Gitlab::BackgroundMigration::FixVulnerabilityOccurrencesWithHashesAsRawMetadata
MODEL_CLASS = MIGRATION_CLASS::Finding
DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 500
disable_ddl_transaction!
def up
queue_background_migration_jobs_by_range_at_intervals(
MODEL_CLASS.by_api_report_types,
MIGRATION_CLASS,
DELAY_INTERVAL,
batch_size: BATCH_SIZE
)
end
def down
# no-op
# up fixes invalid data by updating columns in-place.
# It is a backwards-compatible change, and reversing it in a downgrade would not be desirable.
end
end
c501bc857cef21a8657508e9a286feb3c6f5db873247707e051a8702b1e80e79
\ No newline at end of file
4e8e0917bcfcadf288425e82eeb3747d775bb301017a9b320b694cd43ed0d60a
\ No newline at end of file
...@@ -28023,6 +28023,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON term_agreements USING btree ...@@ -28023,6 +28023,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON term_agreements USING btree
CREATE INDEX tmp_idx_deduplicate_vulnerability_occurrences ON vulnerability_occurrences USING btree (project_id, report_type, location_fingerprint, primary_identifier_id, id); CREATE INDEX tmp_idx_deduplicate_vulnerability_occurrences ON vulnerability_occurrences USING btree (project_id, report_type, location_fingerprint, primary_identifier_id, id);
CREATE INDEX tmp_idx_vulnerability_occurrences_on_id_where_report_type_7_99 ON vulnerability_occurrences USING btree (id) WHERE (report_type = ANY (ARRAY[7, 99]));
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_child_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NOT NULL) AND (traversal_ids = '{}'::integer[])); CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_child_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NOT NULL) AND (traversal_ids = '{}'::integer[]));
CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_root_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NULL) AND (traversal_ids = '{}'::integer[])); CREATE INDEX tmp_index_namespaces_empty_traversal_ids_with_root_namespaces ON namespaces USING btree (id) WHERE ((parent_id IS NULL) AND (traversal_ids = '{}'::integer[]));
...@@ -112,6 +112,9 @@ module Vulnerabilities ...@@ -112,6 +112,9 @@ module Vulnerabilities
project_id: @project.id project_id: @project.id
) )
raw_metadata = {}
raw_metadata['location'] = location if location
Vulnerabilities::Finding.new( Vulnerabilities::Finding.new(
project: @project, project: @project,
identifiers: identifiers, identifiers: identifiers,
...@@ -122,11 +125,14 @@ module Vulnerabilities ...@@ -122,11 +125,14 @@ module Vulnerabilities
confidence: vulnerability.confidence, confidence: vulnerability.confidence,
report_type: vulnerability.report_type, report_type: vulnerability.report_type,
project_fingerprint: Digest::SHA1.hexdigest(identifiers.first.name), project_fingerprint: Digest::SHA1.hexdigest(identifiers.first.name),
location: location,
location_fingerprint: loc_fingerprint, location_fingerprint: loc_fingerprint,
metadata_version: metadata_version, metadata_version: metadata_version,
raw_metadata: {
location: location # raw_metadata is a text field rather than jsonb,
}, # so it is important to convert data to JSON.
# It will be removed in https://gitlab.com/groups/gitlab-org/-/epics/4239.
raw_metadata: raw_metadata.to_json,
scanner: scanner, scanner: scanner,
uuid: uuid, uuid: uuid,
message: message, message: message,
......
...@@ -141,6 +141,8 @@ RSpec.describe Vulnerabilities::ManuallyCreateService do ...@@ -141,6 +141,8 @@ RSpec.describe Vulnerabilities::ManuallyCreateService do
expect(finding.message).to eq(params.dig(:vulnerability, :message)) expect(finding.message).to eq(params.dig(:vulnerability, :message))
expect(finding.description).to eq(params.dig(:vulnerability, :description)) expect(finding.description).to eq(params.dig(:vulnerability, :description))
expect(finding.solution).to eq(params.dig(:vulnerability, :solution)) expect(finding.solution).to eq(params.dig(:vulnerability, :solution))
expect(finding.location).to be_empty
expect(finding.raw_metadata).to eq("{}")
scanner = finding.scanner scanner = finding.scanner
expect(scanner.name).to eq(params.dig(:vulnerability, :scanner, :name)) expect(scanner.name).to eq(params.dig(:vulnerability, :scanner, :name))
......
...@@ -15,6 +15,13 @@ RSpec.describe Vulnerabilities::StarboardVulnerabilityCreateService do ...@@ -15,6 +15,13 @@ RSpec.describe Vulnerabilities::StarboardVulnerabilityCreateService do
severity: 'high', severity: 'high',
confidence: 'unknown', confidence: 'unknown',
location: { location: {
image: 'alpine:latest',
dependency: {
version: '0.1.0',
package: {
name: 'libc'
}
},
kubernetes_resource: { kubernetes_resource: {
namespace: 'production', namespace: 'production',
kind: 'deployment', kind: 'deployment',
...@@ -67,6 +74,19 @@ RSpec.describe Vulnerabilities::StarboardVulnerabilityCreateService do ...@@ -67,6 +74,19 @@ RSpec.describe Vulnerabilities::StarboardVulnerabilityCreateService do
expect(finding.description).to eq(params.dig(:vulnerability, :description)) expect(finding.description).to eq(params.dig(:vulnerability, :description))
expect(finding.severity).to eq(params.dig(:vulnerability, :severity)) expect(finding.severity).to eq(params.dig(:vulnerability, :severity))
expect(finding.confidence).to eq(params.dig(:vulnerability, :confidence)) expect(finding.confidence).to eq(params.dig(:vulnerability, :confidence))
expect(finding.location['image']).to eq('alpine:latest')
expect(finding.location.dig('dependency', 'package', 'name')).to eq('libc')
expect(finding.location.dig('dependency', 'version')).to eq('0.1.0')
expect(finding.location['kubernetes_resource']).to eq(
{
'namespace' => 'production',
'kind' => 'deployment',
'name' => 'nginx',
'container' => 'nginx'
}
)
expect(finding.metadata['location']).to eq(finding.location)
scanner = finding.scanner scanner = finding.scanner
expect(scanner.external_id).to eq(params.dig(:scanner, :id)) expect(scanner.external_id).to eq(params.dig(:scanner, :id))
......
# frozen_string_literal: true
require 'parser/ruby27'
module Gitlab
module BackgroundMigration
# This migration fixes raw_metadata entries which have incorrectly been passed a Ruby Hash instead of JSON data.
class FixVulnerabilityOccurrencesWithHashesAsRawMetadata
CLUSTER_IMAGE_SCANNING_REPORT_TYPE = 7
GENERIC_REPORT_TYPE = 99
# Type error is used to handle unexpected types when parsing stringified hashes.
class TypeError < ::StandardError
attr_reader :message, :type
def initialize(message, type)
@message = message
@type = type
end
end
# Migration model namespace isolated from application code.
class Finding < ActiveRecord::Base
include EachBatch
self.table_name = 'vulnerability_occurrences'
scope :by_api_report_types, -> { where(report_type: [CLUSTER_IMAGE_SCANNING_REPORT_TYPE, GENERIC_REPORT_TYPE]) }
end
def perform(start_id, end_id)
Finding.by_api_report_types.where(id: start_id..end_id).each do |finding|
next if valid_json?(finding.raw_metadata)
metadata = hash_from_s(finding.raw_metadata)
finding.update(raw_metadata: metadata.to_json) if metadata
end
mark_job_as_succeeded(start_id, end_id)
end
def hash_from_s(str_hash)
ast = Parser::Ruby27.parse(str_hash)
unless ast.type == :hash
::Gitlab::AppLogger.error(message: "expected raw_metadata to be a hash", type: ast.type)
return
end
parse_hash(ast)
rescue Parser::SyntaxError => e
::Gitlab::AppLogger.error(message: "error parsing raw_metadata", error: e.message)
nil
rescue TypeError => e
::Gitlab::AppLogger.error(message: "error parsing raw_metadata", error: e.message, type: e.type)
nil
end
private
def mark_job_as_succeeded(*arguments)
Gitlab::Database::BackgroundMigrationJob.mark_all_as_succeeded(
'FixVulnerabilityOccurrencesWithHashesAsRawMetadata',
arguments
)
end
def valid_json?(metadata)
Oj.load(metadata)
true
rescue Oj::ParseError, Encoding::UndefinedConversionError
false
end
def parse_hash(hash)
out = {}
hash.children.each do |node|
unless node.type == :pair
raise TypeError.new("expected child of hash to be a `pair`", node.type)
end
key, value = node.children
key = parse_key(key)
value = parse_value(value)
out[key] = value
end
out
end
def parse_key(key)
case key.type
when :sym, :str, :int
key.children.first
else
raise TypeError.new("expected key to be either symbol, string, or integer", key.type)
end
end
def parse_value(value)
case value.type
when :sym, :str, :int
value.children.first
# rubocop:disable Lint/BooleanSymbol
when :true
true
when :false
false
# rubocop:enable Lint/BooleanSymbol
when :nil
nil
when :array
value.children.map { |c| parse_value(c) }
when :hash
parse_hash(value)
else
raise TypeError.new("value of a pair was an unexpected type", value.type)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::FixVulnerabilityOccurrencesWithHashesAsRawMetadata, schema: 20211209203821 do
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:scanners) { table(:vulnerability_scanners) }
let(:identifiers) { table(:vulnerability_identifiers) }
let(:findings) { table(:vulnerability_occurrences) }
let(:user) { users.create!(name: 'Test User', projects_limit: 10, username: 'test-user', email: '1') }
let(:namespace) do
namespaces.create!(
owner_id: user.id,
name: user.name,
path: user.username
)
end
let(:project) do
projects.create!(namespace_id: namespace.id, name: 'Test Project')
end
let(:scanner) do
scanners.create!(
project_id: project.id,
external_id: 'test-scanner',
name: 'Test Scanner',
vendor: 'GitLab'
)
end
let(:primary_identifier) do
identifiers.create!(
project_id: project.id,
external_type: 'cve',
name: 'CVE-2021-1234',
external_id: 'CVE-2021-1234',
fingerprint: '4c0fe491999f94701ee437588554ef56322ae276'
)
end
let(:finding) do
findings.create!(
raw_metadata: raw_metadata,
project_id: project.id,
scanner_id: scanner.id,
primary_identifier_id: primary_identifier.id,
uuid: '4deb090a-bedf-5ccc-aa9a-ac8055a1ea81',
project_fingerprint: '1caa750a6dad769a18ad6f40b413b3b6ab1c8d77',
location_fingerprint: '6d1f35f53b065238abfcadc01336ce65d112a2bd',
name: 'name',
report_type: 7,
severity: 0,
confidence: 0,
detection_method: 'gitlab_security_report',
metadata_version: 'cluster_image_scanning:1.0',
created_at: "2021-12-10 14:27:42 -0600",
updated_at: "2021-12-10 14:27:42 -0600"
)
end
subject(:perform) { described_class.new.perform(finding.id, finding.id) }
context 'with stringified hash as raw_metadata' do
let(:raw_metadata) do
'{:location=>{"image"=>"index.docker.io/library/nginx:latest", "kubernetes_resource"=>{"namespace"=>"production", "kind"=>"deployment", "name"=>"nginx", "container_name"=>"nginx", "agent_id"=>"2"}, "dependency"=>{"package"=>{"name"=>"libc"}, "version"=>"v1.2.3"}}}'
end
it 'converts stringified hash to JSON' do
expect { perform }.not_to raise_error
result = finding.reload.raw_metadata
metadata = Oj.load(result)
expect(metadata).to eq(
{
'location' => {
'image' => 'index.docker.io/library/nginx:latest',
'kubernetes_resource' => {
'namespace' => 'production',
'kind' => 'deployment',
'name' => 'nginx',
'container_name' => 'nginx',
'agent_id' => '2'
},
'dependency' => {
'package' => { 'name' => 'libc' },
'version' => 'v1.2.3'
}
}
}
)
end
end
context 'with valid raw_metadata' do
where(:raw_metadata) do
[
'{}',
'{"location":null}',
'{"location":{"image":"index.docker.io/library/nginx:latest","kubernetes_resource":{"namespace":"production","kind":"deployment","name":"nginx","container_name":"nginx","agent_id":"2"},"dependency":{"package":{"name":"libc"},"version":"v1.2.3"}}}'
]
end
with_them do
it 'does not change the raw_metadata' do
expect { perform }.not_to raise_error
result = finding.reload.raw_metadata
expect(result).to eq(raw_metadata)
end
end
end
context 'when raw_metadata contains forbidden types' do
using RSpec::Parameterized::TableSyntax
where(:raw_metadata, :type) do
'def foo; "bar"; end' | :def
'`cat somefile`' | :xstr
'exec("cat /etc/passwd")' | :send
end
with_them do
it 'does not change the raw_metadata' do
expect(Gitlab::AppLogger).to receive(:error).with(message: "expected raw_metadata to be a hash", type: type)
expect { perform }.not_to raise_error
result = finding.reload.raw_metadata
expect(result).to eq(raw_metadata)
end
end
end
context 'when forbidden types are nested inside a hash' do
using RSpec::Parameterized::TableSyntax
where(:raw_metadata, :type) do
'{:location=>Env.fetch("SOME_VAR")}' | :send
'{:location=>{:image=>Env.fetch("SOME_VAR")}}' | :send
# rubocop:disable Lint/InterpolationCheck
'{"key"=>"value: #{send}"}' | :dstr
# rubocop:enable Lint/InterpolationCheck
end
with_them do
it 'does not change the raw_metadata' do
expect(Gitlab::AppLogger).to receive(:error).with(
message: "error parsing raw_metadata",
error: "value of a pair was an unexpected type",
type: type
)
expect { perform }.not_to raise_error
result = finding.reload.raw_metadata
expect(result).to eq(raw_metadata)
end
end
end
context 'when key is an unexpected type' do
let(:raw_metadata) { "{nil=>nil}" }
it 'logs error' do
expect(Gitlab::AppLogger).to receive(:error).with(
message: "error parsing raw_metadata",
error: "expected key to be either symbol, string, or integer",
type: :nil
)
expect { perform }.not_to raise_error
end
end
context 'when raw_metadata cannot be parsed' do
let(:raw_metadata) { "{" }
it 'logs error' do
expect(Gitlab::AppLogger).to receive(:error).with(message: "error parsing raw_metadata", error: "unexpected token $end")
expect { perform }.not_to raise_error
end
end
describe '#hash_from_s' do
subject { described_class.new.hash_from_s(input) }
context 'with valid input' do
let(:input) { '{:location=>{"image"=>"index.docker.io/library/nginx:latest", "kubernetes_resource"=>{"namespace"=>"production", "kind"=>"deployment", "name"=>"nginx", "container_name"=>"nginx", "agent_id"=>2}, "dependency"=>{"package"=>{"name"=>"libc"}, "version"=>"v1.2.3"}}}' }
it 'converts string to a hash' do
expect(subject).to eq({
location: {
'image' => 'index.docker.io/library/nginx:latest',
'kubernetes_resource' => {
'namespace' => 'production',
'kind' => 'deployment',
'name' => 'nginx',
'container_name' => 'nginx',
'agent_id' => 2
},
'dependency' => {
'package' => { 'name' => 'libc' },
'version' => 'v1.2.3'
}
}
})
end
end
using RSpec::Parameterized::TableSyntax
where(:input, :expected) do
'{}' | {}
'{"bool"=>true}' | { 'bool' => true }
'{"bool"=>false}' | { 'bool' => false }
'{"nil"=>nil}' | { 'nil' => nil }
'{"array"=>[1, "foo", nil]}' | { 'array' => [1, "foo", nil] }
'{foo: :bar}' | { foo: :bar }
'{foo: {bar: "bin"}}' | { foo: { bar: "bin" } }
end
with_them do
specify { expect(subject).to eq(expected) }
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment