Commit a47e4a01 authored by Erick Bajao's avatar Erick Bajao

Implement smart cobertura class path correction

As we parse the cobertura XML, based on the given project full path and
pipeline worktree paths, we will make some assumptions on how to
determine the correct path of filenames that are not relative to the
project root.
parent 2978c49f
......@@ -916,8 +916,20 @@ module Ci
end
def collect_coverage_reports!(coverage_report)
project_path, worktree_paths = if Feature.enabled?(:smart_cobertura_parser, project)
# If the flag is disabled, we intentionally pass nil
# for both project_path and worktree_paths to fallback
# to the non-smart behavior of the parser
[project.full_path, pipeline.all_worktree_paths]
end
each_report(Ci::JobArtifact::COVERAGE_REPORT_FILE_TYPES) do |file_type, blob|
Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, coverage_report)
Gitlab::Ci::Parsers.fabricate!(file_type).parse!(
blob,
coverage_report,
project_path: project_path,
worktree_paths: worktree_paths
)
end
coverage_report
......
......@@ -975,7 +975,7 @@ module Ci
def coverage_reports
Gitlab::Ci::Reports::CoverageReports.new.tap do |coverage_reports|
latest_report_builds(Ci::JobArtifact.coverage_reports).each do |build|
latest_report_builds(Ci::JobArtifact.coverage_reports).includes(:project).find_each do |build|
build.collect_coverage_reports!(coverage_reports)
end
end
......
---
title: Implement smart cobertura class path correction
merge_request: 48048
author:
type: changed
---
name: smart_cobertura_parser
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/48048
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/284822
milestone: '13.7'
type: development
group: group::testing
default_enabled: false
......@@ -5,50 +5,113 @@ module Gitlab
module Parsers
module Coverage
class Cobertura
CoberturaParserError = Class.new(Gitlab::Ci::Parsers::ParserError)
InvalidXMLError = Class.new(Gitlab::Ci::Parsers::ParserError)
InvalidLineInformationError = Class.new(Gitlab::Ci::Parsers::ParserError)
def parse!(xml_data, coverage_report)
GO_SOURCE_PATTERN = '/usr/local/go/src'
MAX_SOURCES = 100
def parse!(xml_data, coverage_report, project_path: nil, worktree_paths: nil)
root = Hash.from_xml(xml_data)
parse_all(root, coverage_report)
context = {
project_path: project_path,
paths: worktree_paths&.to_set,
sources: []
}
parse_all(root, coverage_report, context)
rescue Nokogiri::XML::SyntaxError
raise CoberturaParserError, "XML parsing failed"
rescue
raise CoberturaParserError, "Cobertura parsing failed"
raise InvalidXMLError, "XML parsing failed"
end
private
def parse_all(root, coverage_report)
def parse_all(root, coverage_report, context)
return unless root.present?
root.each do |key, value|
parse_node(key, value, coverage_report)
parse_node(key, value, coverage_report, context)
end
end
def parse_node(key, value, coverage_report)
return if key == 'sources'
if key == 'class'
def parse_node(key, value, coverage_report, context)
if key == 'sources' && value['source'].present?
parse_sources(value['source'], context)
elsif key == 'package'
Array.wrap(value).each do |item|
parse_class(item, coverage_report)
parse_package(item, coverage_report, context)
end
elsif key == 'class'
# This means the cobertura XML does not have classes within package nodes.
# This is possible in some cases like in simple JS project structures
# running Jest.
Array.wrap(value).each do |item|
parse_class(item, coverage_report, context)
end
elsif value.is_a?(Hash)
parse_all(value, coverage_report)
parse_all(value, coverage_report, context)
elsif value.is_a?(Array)
value.each do |item|
parse_all(item, coverage_report)
parse_all(item, coverage_report, context)
end
end
end
def parse_class(file, coverage_report)
def parse_sources(sources, context)
return unless context[:project_path] && context[:paths]
sources = Array.wrap(sources)
# TODO: Go cobertura has a different format with how their packages
# are included in the filename. So we can't rely on the sources.
# We'll deal with this later.
return if sources.include?(GO_SOURCE_PATTERN)
sources.each do |source|
source = build_source_path(source, context)
context[:sources] << source if source.present?
end
end
def build_source_path(source, context)
# | raw source | extracted |
# |-----------------------------|------------|
# | /builds/foo/test/SampleLib/ | SampleLib/ |
# | /builds/foo/test/something | something |
# | /builds/foo/test/ | nil |
# | /builds/foo/test | nil |
source.split("#{context[:project_path]}/", 2)[1]
end
def parse_package(package, coverage_report, context)
classes = package.dig('classes', 'class')
return unless classes.present?
matched_filenames = Array.wrap(classes).map do |item|
parse_class(item, coverage_report, context)
end
# Remove these filenames from the paths to avoid conflict
# with other packages that may contain the same class filenames
remove_matched_filenames(matched_filenames, context)
end
def remove_matched_filenames(filenames, context)
return unless context[:paths]
filenames.each { |f| context[:paths].delete(f) }
end
def parse_class(file, coverage_report, context)
return unless file["filename"].present? && file["lines"].present?
parsed_lines = parse_lines(file["lines"])
filename = determine_filename(file["filename"], context)
coverage_report.add_file(filename, Hash[parsed_lines]) if filename
coverage_report.add_file(file["filename"], Hash[parsed_lines])
filename
end
def parse_lines(lines)
......@@ -58,6 +121,27 @@ module Gitlab
# Using `Integer()` here to raise exception on invalid values
[Integer(line["number"]), Integer(line["hits"])]
end
rescue
raise InvalidLineInformationError, "Line information had invalid values"
end
def determine_filename(filename, context)
return filename unless context[:sources].any?
full_filename = nil
context[:sources].each_with_index do |source, index|
break if index >= MAX_SOURCES
break if full_filename = check_source(source, filename, context)
end
full_filename
end
def check_source(source, filename, context)
full_path = File.join(source, filename)
return full_path if context[:paths].include?(full_path)
end
end
end
......
......@@ -229,6 +229,16 @@ FactoryBot.define do
end
end
trait :coverage_with_paths_not_relative_to_project_root do
file_type { :cobertura }
file_format { :gzip }
after(:build) do |artifact, evaluator|
artifact.file = fixture_file_upload(
Rails.root.join('spec/fixtures/cobertura/coverage_with_paths_not_relative_to_project_root.xml.gz'), 'application/x-gzip')
end
end
trait :coverage_with_corrupted_data do
file_type { :cobertura }
file_format { :gzip }
......
......@@ -4059,13 +4059,40 @@ RSpec.describe Ci::Build do
end
end
context 'when there is a Cobertura coverage report with class filename paths not relative to project root' do
before do
allow(build.project).to receive(:full_path).and_return('root/javademo')
allow(build.pipeline).to receive(:all_worktree_paths).and_return(['src/main/java/com/example/javademo/User.java'])
create(:ci_job_artifact, :coverage_with_paths_not_relative_to_project_root, job: build, project: build.project)
end
it 'parses blobs and add the results to the coverage report with corrected paths' do
expect { subject }.not_to raise_error
expect(coverage_report.files.keys).to match_array(['src/main/java/com/example/javademo/User.java'])
end
context 'and smart_cobertura_parser feature flag is disabled' do
before do
stub_feature_flags(smart_cobertura_parser: false)
end
it 'parses blobs and add the results to the coverage report with unmodified paths' do
expect { subject }.not_to raise_error
expect(coverage_report.files.keys).to match_array(['com/example/javademo/User.java'])
end
end
end
context 'when there is a corrupted Cobertura coverage report' do
before do
create(:ci_job_artifact, :coverage_with_corrupted_data, job: build, project: build.project)
end
it 'raises an error' do
expect { subject }.to raise_error(Gitlab::Ci::Parsers::Coverage::Cobertura::CoberturaParserError)
expect { subject }.to raise_error(Gitlab::Ci::Parsers::Coverage::Cobertura::InvalidLineInformationError)
end
end
end
......
......@@ -3426,6 +3426,16 @@ RSpec.describe Ci::Pipeline, :mailer, factory_default: :keep do
])
end
it 'does not execute N+1 queries' do
single_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
single_rspec = create(:ci_build, :success, name: 'rspec', pipeline: single_build_pipeline, project: project)
create(:ci_job_artifact, :cobertura, job: single_rspec, project: project)
control = ActiveRecord::QueryRecorder.new { single_build_pipeline.coverage_reports }
expect { subject }.not_to exceed_query_limit(control)
end
context 'when builds are retried' do
let!(:build_rspec) { create(:ci_build, :retried, :success, name: 'rspec', pipeline: pipeline, project: project) }
let!(:build_golang) { create(:ci_build, :retried, :success, name: 'golang', pipeline: pipeline, project: project) }
......
......@@ -7,7 +7,8 @@ RSpec.describe ::Ci::Pipelines::CreateArtifactService do
subject { described_class.new.execute(pipeline) }
context 'when pipeline has coverage reports' do
let(:pipeline) { create(:ci_pipeline, :with_coverage_reports) }
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, :with_coverage_reports, project: project) }
context 'when pipeline is finished' do
it 'creates a pipeline artifact' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment