Commit 66c1b902 authored by Nikola Milojevic's avatar Nikola Milojevic Committed by Alper Akgun

Utilize Nokogiri Reader

- Parse xml per class node instead of whole file
parent 884ecd76
---
name: use_cobertura_sax_parser
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/79866
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/352579
milestone: '14.9'
type: development
group: group::memory
default_enabled: false
......@@ -8,140 +8,12 @@ module Gitlab
InvalidXMLError = Class.new(Gitlab::Ci::Parsers::ParserError)
InvalidLineInformationError = Class.new(Gitlab::Ci::Parsers::ParserError)
GO_SOURCE_PATTERN = '/usr/local/go/src'
MAX_SOURCES = 100
def parse!(xml_data, coverage_report, project_path: nil, worktree_paths: nil)
root = Hash.from_xml(xml_data)
context = {
project_path: project_path,
paths: worktree_paths&.to_set,
sources: []
}
parse_all(root, coverage_report, context)
rescue Nokogiri::XML::SyntaxError
raise InvalidXMLError, "XML parsing failed"
end
private
def parse_all(root, coverage_report, context)
return unless root.present?
root.each do |key, value|
parse_node(key, value, coverage_report, context)
end
end
def parse_node(key, value, coverage_report, context)
if key == 'sources' && value && value['source'].present?
parse_sources(value['source'], context)
elsif key == 'package'
Array.wrap(value).each do |item|
parse_package(item, coverage_report, context)
end
elsif key == 'class'
# This means the cobertura XML does not have classes within package nodes.
# This is possible in some cases like in simple JS project structures
# running Jest.
Array.wrap(value).each do |item|
parse_class(item, coverage_report, context)
end
elsif value.is_a?(Hash)
parse_all(value, coverage_report, context)
elsif value.is_a?(Array)
value.each do |item|
parse_all(item, coverage_report, context)
end
end
end
def parse_sources(sources, context)
return unless context[:project_path] && context[:paths]
sources = Array.wrap(sources)
# TODO: Go cobertura has a different format with how their packages
# are included in the filename. So we can't rely on the sources.
# We'll deal with this later.
return if sources.include?(GO_SOURCE_PATTERN)
sources.each do |source|
source = build_source_path(source, context)
context[:sources] << source if source.present?
end
end
def build_source_path(source, context)
# | raw source | extracted |
# |-----------------------------|------------|
# | /builds/foo/test/SampleLib/ | SampleLib/ |
# | /builds/foo/test/something | something |
# | /builds/foo/test/ | nil |
# | /builds/foo/test | nil |
source.split("#{context[:project_path]}/", 2)[1]
end
def parse_package(package, coverage_report, context)
classes = package.dig('classes', 'class')
return unless classes.present?
matched_filenames = Array.wrap(classes).map do |item|
parse_class(item, coverage_report, context)
if Feature.enabled?(:use_cobertura_sax_parser, default_enabled: :yaml)
Nokogiri::XML::SAX::Parser.new(SaxDocument.new(coverage_report, project_path, worktree_paths)).parse(xml_data)
else
DomParser.new.parse(xml_data, coverage_report, project_path, worktree_paths)
end
# Remove these filenames from the paths to avoid conflict
# with other packages that may contain the same class filenames
remove_matched_filenames(matched_filenames, context)
end
def remove_matched_filenames(filenames, context)
return unless context[:paths]
filenames.each { |f| context[:paths].delete(f) }
end
def parse_class(file, coverage_report, context)
return unless file["filename"].present? && file["lines"].present?
parsed_lines = parse_lines(file["lines"])
filename = determine_filename(file["filename"], context)
coverage_report.add_file(filename, Hash[parsed_lines]) if filename
filename
end
def parse_lines(lines)
line_array = Array.wrap(lines["line"])
line_array.map do |line|
# Using `Integer()` here to raise exception on invalid values
[Integer(line["number"]), Integer(line["hits"])]
end
rescue StandardError
raise InvalidLineInformationError, "Line information had invalid values"
end
def determine_filename(filename, context)
return filename unless context[:sources].any?
full_filename = nil
context[:sources].each_with_index do |source, index|
break if index >= MAX_SOURCES
break if full_filename = check_source(source, filename, context)
end
full_filename
end
def check_source(source, filename, context)
full_path = File.join(source, filename)
return full_path if context[:paths].include?(full_path)
end
end
end
......
# frozen_string_literal: true
module Gitlab
module Ci
module Parsers
module Coverage
class DomParser
GO_SOURCE_PATTERN = '/usr/local/go/src'
MAX_SOURCES = 100
def parse(xml_data, coverage_report, project_path, worktree_paths)
root = Hash.from_xml(xml_data)
context = {
project_path: project_path,
paths: worktree_paths&.to_set,
sources: []
}
parse_all(root, coverage_report, context)
rescue Nokogiri::XML::SyntaxError
raise Cobertura::InvalidXMLError, "XML parsing failed"
end
private
def parse_all(root, coverage_report, context)
return unless root.present?
root.each do |key, value|
parse_node(key, value, coverage_report, context)
end
end
def parse_node(key, value, coverage_report, context)
if key == 'sources' && value && value['source'].present?
parse_sources(value['source'], context)
elsif key == 'package'
Array.wrap(value).each do |item|
parse_package(item, coverage_report, context)
end
elsif key == 'class'
# This means the cobertura XML does not have classes within package nodes.
# This is possible in some cases like in simple JS project structures
# running Jest.
Array.wrap(value).each do |item|
parse_class(item, coverage_report, context)
end
elsif value.is_a?(Hash)
parse_all(value, coverage_report, context)
elsif value.is_a?(Array)
value.each do |item|
parse_all(item, coverage_report, context)
end
end
end
def parse_sources(sources, context)
return unless context[:project_path] && context[:paths]
sources = Array.wrap(sources)
# TODO: Go cobertura has a different format with how their packages
# are included in the filename. So we can't rely on the sources.
# We'll deal with this later.
return if sources.include?(GO_SOURCE_PATTERN)
sources.each do |source|
source = build_source_path(source, context)
context[:sources] << source if source.present?
end
end
def build_source_path(source, context)
# | raw source | extracted |
# |-----------------------------|------------|
# | /builds/foo/test/SampleLib/ | SampleLib/ |
# | /builds/foo/test/something | something |
# | /builds/foo/test/ | nil |
# | /builds/foo/test | nil |
source.split("#{context[:project_path]}/", 2)[1]
end
def parse_package(package, coverage_report, context)
classes = package.dig('classes', 'class')
return unless classes.present?
matched_filenames = Array.wrap(classes).map do |item|
parse_class(item, coverage_report, context)
end
# Remove these filenames from the paths to avoid conflict
# with other packages that may contain the same class filenames
remove_matched_filenames(matched_filenames, context)
end
def remove_matched_filenames(filenames, context)
return unless context[:paths]
filenames.each { |f| context[:paths].delete(f) }
end
def parse_class(file, coverage_report, context)
return unless file["filename"].present? && file["lines"].present?
parsed_lines = parse_lines(file["lines"])
filename = determine_filename(file["filename"], context)
coverage_report.add_file(filename, Hash[parsed_lines]) if filename
filename
end
def parse_lines(lines)
line_array = Array.wrap(lines["line"])
line_array.map do |line|
# Using `Integer()` here to raise exception on invalid values
[Integer(line["number"]), Integer(line["hits"])]
end
rescue StandardError
raise Cobertura::InvalidLineInformationError, "Line information had invalid values"
end
def determine_filename(filename, context)
return filename unless context[:sources].any?
full_filename = nil
context[:sources].each_with_index do |source, index|
break if index >= MAX_SOURCES
break if full_filename = check_source(source, filename, context)
end
full_filename
end
def check_source(source, filename, context)
full_path = File.join(source, filename)
return full_path if context[:paths].include?(full_path)
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Ci
module Parsers
module Coverage
class SaxDocument < Nokogiri::XML::SAX::Document
GO_SOURCE_PATTERN = '/usr/local/go/src'
MAX_SOURCES = 100
def initialize(coverage_report, project_path, worktree_paths)
@coverage_report = coverage_report
@project_path = project_path
@paths = worktree_paths&.to_set
@matched_filenames = []
@parsed_lines = []
@sources = []
end
def error(error)
raise Cobertura::InvalidXMLError, "XML parsing failed with error: #{error}"
end
def start_element(node_name, attrs = [])
return unless node_name
self.node_name = node_name
node_attrs = Hash[attrs]
if node_name == 'class' && node_attrs["filename"].present?
self.filename = determine_filename(node_attrs["filename"])
self.matched_filenames << filename if filename
elsif node_name == 'line'
self.parsed_lines << parse_line(node_attrs)
end
end
def characters(node_content)
if node_name == 'source'
parse_source(node_content)
end
end
def end_element(node_name)
if node_name == "package"
remove_matched_filenames
elsif node_name == "class" && filename && parsed_lines.present?
coverage_report.add_file(filename, Hash[parsed_lines])
self.filename = nil
self.parsed_lines = []
end
end
private
attr_accessor :coverage_report, :project_path, :paths, :sources, :node_name, :filename, :parsed_lines, :matched_filenames
def parse_line(line)
[Integer(line["number"]), Integer(line["hits"])]
rescue StandardError
raise Cobertura::InvalidLineInformationError, "Line information had invalid values"
end
def parse_source(node)
return unless project_path && paths && !node.include?(GO_SOURCE_PATTERN)
source = build_source_path(node)
self.sources << source if source.present?
end
def build_source_path(node)
# | raw source | extracted |
# |-----------------------------|------------|
# | /builds/foo/test/SampleLib/ | SampleLib/ |
# | /builds/foo/test/something | something |
# | /builds/foo/test/ | nil |
# | /builds/foo/test | nil |
node.split("#{project_path}/", 2)[1]
end
def remove_matched_filenames
return unless paths
matched_filenames.each { |f| paths.delete(f) }
end
def determine_filename(filename)
return filename unless sources.any?
full_filename = nil
sources.each_with_index do |source, index|
break if index >= MAX_SOURCES
break if full_filename = check_source(source, filename)
end
full_filename
end
def check_source(source, filename)
full_path = File.join(source, filename)
return full_path if paths.include?(full_path)
end
end
end
end
end
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'support/shared_examples/lib/gitlab/ci/parsers/coverage/cobertura_xml_shared_examples'
RSpec.describe Gitlab::Ci::Parsers::Coverage::DomParser do
subject(:parse_report) { described_class.new.parse(cobertura, coverage_report, project_path, paths) }
include_examples 'parse cobertura xml'
end
# frozen_string_literal: true
require 'fast_spec_helper'
require 'support/shared_examples/lib/gitlab/ci/parsers/coverage/cobertura_xml_shared_examples'
RSpec.describe Gitlab::Ci::Parsers::Coverage::SaxDocument do
subject(:parse_report) { Nokogiri::XML::SAX::Parser.new(described_class.new(coverage_report, project_path, paths)).parse(cobertura) }
include_examples 'parse cobertura xml'
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment