Commit 8de33e31 authored by Ethan Reesor's avatar Ethan Reesor

Refactor Go logic

- Move regexes into regex.rb
- Move shared Go logic into Gitlab::Golang
parent be027f87
......@@ -3,6 +3,15 @@
module BlobViewer
class GoMod < DependencyManager
include ServerSide
include Gitlab::Utils::StrongMemoize
MODULE_REGEX = /
\A (?# beginning of file)
module\s+ (?# module directive)
(?<name>.*?) (?# module name)
\s*(?:\/\/.*)? (?# comment)
(?:\n|\z) (?# newline or end of file)
/x.freeze
self.file_types = %i(go_mod go_sum)
......@@ -19,21 +28,16 @@ module BlobViewer
end
def package_name
return if blob.name != 'go.mod'
return @package_name unless @package_name.nil?
return unless blob.data.starts_with? 'module '
strong_memoize(:package_name) do
next if blob.name != 'go.mod'
next unless match = MODULE_REGEX.match(blob.data)
@package_name ||= blob.data.partition("\n").first[7..]
match[:name]
end
end
def package_url
return unless Gitlab::UrlSanitizer.valid?("https://#{package_name}")
if package_name.starts_with? Settings.build_gitlab_go_url + '/'
"#{Gitlab.config.gitlab.protocol}://#{package_name}"
else
"https://pkg.go.dev/#{package_name}"
end
Gitlab::Golang.package_url(package_name)
end
end
end
......@@ -3,7 +3,7 @@
module Packages
module Go
class ModuleFinder
include ::API::Helpers::Packages::Go::ModuleHelpers
include Gitlab::Golang
attr_reader :project, :module_name
......@@ -15,21 +15,15 @@ module Packages
end
def execute
return if @module_name.blank? || !@module_name.start_with?(gitlab_go_url)
return if @module_name.blank? || !@module_name.start_with?(local_module_prefix)
module_path = @module_name[gitlab_go_url.length..].split('/')
module_path = @module_name[local_module_prefix.length..].split('/')
project_path = project.full_path.split('/')
module_project_path = module_path.shift(project_path.length)
return unless module_project_path == project_path
Packages::GoModule.new(@project, @module_name, module_path.join('/'))
end
private
def gitlab_go_url
@gitlab_go_url ||= Settings.build_gitlab_go_url + '/'
end
end
end
end
......@@ -3,7 +3,7 @@
module Packages
module Go
class VersionFinder
include ::API::Helpers::Packages::Go::ModuleHelpers
include Gitlab::Golang
attr_reader :mod
......@@ -13,7 +13,7 @@ module Packages
def execute
@mod.project.repository.tags
.filter { |tag| semver? tag }
.filter { |tag| semver_tag? tag }
.map { |tag| @mod.version_by(ref: tag) }
.filter { |ver| ver.valid? }
end
......
......@@ -2,7 +2,6 @@
class Packages::GoModuleVersion
include Gitlab::Utils::StrongMemoize
include ::API::Helpers::Packages::Go::ModuleHelpers
VALID_TYPES = %i[ref commit pseudo].freeze
......
# frozen_string_literal: true
module API
class GoProxy < Grape::API
helpers Gitlab::Golang
helpers ::API::Helpers::PackagesHelpers
helpers ::API::Helpers::Packages::Go::ModuleHelpers
# basic semver, except case encoded (A => !a)
MODULE_VERSION_REGEX = /v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-([-.!a-z0-9]+))?(?:\+([-.!a-z0-9]+))?/.freeze
......@@ -12,6 +12,20 @@ module API
before { require_packages_enabled! }
helpers do
def case_decode(str)
# Converts "github.com/!azure" to "github.com/Azure"
#
# From `go help goproxy`:
#
# > To avoid problems when serving from case-sensitive file systems,
# > the <module> and <version> elements are case-encoded, replacing
# > every uppercase letter with an exclamation mark followed by the
# > corresponding lower-case letter: github.com/Azure encodes as
# > github.com/!azure.
str.gsub(/![[:alpha:]]/) { |s| s[1..].upcase }
end
def find_project!(id)
# based on API::Helpers::Packages::BasicAuthHelpers#authorized_project_find!
......
# frozen_string_literal: true
module API
module Helpers
module Packages
module Go
module ModuleHelpers
def case_encode(str)
# Converts "github.com/Azure" to "github.com/!azure"
#
# From `go help goproxy`:
#
# > To avoid problems when serving from case-sensitive file systems,
# > the <module> and <version> elements are case-encoded, replacing
# > every uppercase letter with an exclamation mark followed by the
# > corresponding lower-case letter: github.com/Azure encodes as
# > github.com/!azure.
str.gsub(/A-Z/) { |s| "!#{s.downcase}"}
end
def case_decode(str)
# Converts "github.com/!azure" to "github.com/Azure"
#
# See #case_encode
str.gsub(/![[:alpha:]]/) { |s| s[1..].upcase }
end
def semver?(tag)
return false if tag.dereferenced_target.nil?
::Packages::SemVer.match?(tag.name, prefixed: true)
end
def pseudo_version?(version)
return false unless version
if version.is_a? String
version = parse_semver version
return false unless version
end
pre = version.prerelease
# Valid pseudo-versions are:
# vX.0.0-yyyymmddhhmmss-sha1337beef0, when no earlier tagged commit exists for X
# vX.Y.Z-pre.0.yyyymmddhhmmss-sha1337beef0, when most recent prior tag is vX.Y.Z-pre
# vX.Y.(Z+1)-0.yyyymmddhhmmss-sha1337beef0, when most recent prior tag is vX.Y.Z
if version.minor != 0 || version.patch != 0
m = /\A(.*\.)?0\./.freeze.match pre
return false unless m
pre = pre[m[0].length..]
end
# This pattern is intentionally more forgiving than the patterns
# above. Correctness is verified by #pseudo_version_commit.
/\A\d{14}-\h+\z/.freeze.match? pre
end
def pseudo_version_commit(project, semver)
# Per Go's implementation of pseudo-versions, a tag should be
# considered a pseudo-version if it matches one of the patterns
# listed in #pseudo_version?, regardless of the content of the
# timestamp or the length of the SHA fragment. However, an error
# should be returned if the timestamp is not correct or if the SHA
# fragment is not exactly 12 characters long. See also Go's
# implementation of:
#
# - [*codeRepo.validatePseudoVersion](https://github.com/golang/go/blob/daf70d6c1688a1ba1699c933b3c3f04d6f2f73d9/src/cmd/go/internal/modfetch/coderepo.go#L530)
# - [Pseudo-version parsing](https://github.com/golang/go/blob/master/src/cmd/go/internal/modfetch/pseudo.go)
# - [Pseudo-version request processing](https://github.com/golang/go/blob/master/src/cmd/go/internal/modfetch/coderepo.go)
# Go ignores anything before '.' or after the second '-', so we will do the same
timestamp, sha = semver.prerelease.split('-').last 2
timestamp = timestamp.split('.').last
commit = project.repository.commit_by(oid: sha)
# Error messages are based on the responses of proxy.golang.org
# Verify that the SHA fragment references a commit
raise ArgumentError.new 'invalid pseudo-version: unknown commit' unless commit
# Require the SHA fragment to be 12 characters long
raise ArgumentError.new 'invalid pseudo-version: revision is shorter than canonical' unless sha.length == 12
# Require the timestamp to match that of the commit
raise ArgumentError.new 'invalid pseudo-version: does not match version-control timestamp' unless commit.committed_date.strftime('%Y%m%d%H%M%S') == timestamp
commit
end
def parse_semver(str)
::Packages::SemVer.parse(str, prefixed: true)
end
end
end
end
end
end
......@@ -63,10 +63,6 @@ module Gitlab
"https://github.com/#{name}"
end
def pkg_go_dev_url(name)
"https://pkg.go.dev/#{name}"
end
def link_tag(name, url)
sanitize(
%{<a href="#{ERB::Util.html_escape_once(url)}" rel="nofollow noreferrer noopener" target="_blank">#{ERB::Util.html_escape_once(name)}</a>},
......
......@@ -3,32 +3,15 @@
module Gitlab
module DependencyLinker
class GoModLinker < BaseLinker
include Gitlab::Golang
self.file_type = :go_mod
private
SEMVER = /
v (?# prefix)
(0|[1-9]\d*) (?# major)
\.(0|[1-9]\d*) (?# minor)
\.(0|[1-9]\d*) (?# patch)
(?:-((?:\d*[a-zA-Z\-][0-9a-zA-Z\-]*|0|[1-9]\d*)(?:\.(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|0|[1-9]\d*))*))? (?# prerelease)
(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))? (?# build)
/ix.freeze
SEMVER = Gitlab::Regex.unbounded_semver_regex
NAME = Gitlab::Regex.go_package_regex
REGEX = Regexp.new("(?<name>#{NAME.source})(?:\\s+(?<version>#{SEMVER.source}))?", NAME.options).freeze
def package_url(name, version = nil)
return unless Gitlab::UrlSanitizer.valid?("https://#{name}")
if name.starts_with?(Settings.build_gitlab_go_url + '/')
"#{Gitlab.config.gitlab.protocol}://#{name}"
else
url = pkg_go_dev_url(name)
url += "@#{version}" if version
url
end
end
REGEX = Regexp.new("(?<name>#{NAME.source})(?:\\s+(?<version>v#{SEMVER.source}))?", SEMVER.options | NAME.options).freeze
# rubocop: disable CodeReuse/ActiveRecord
def link_dependencies
......
......@@ -7,8 +7,8 @@ module Gitlab
private
BASE64 = /(?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)?/.freeze
REGEX = Regexp.new("^(?<name>#{NAME.source})\\s+(?<version>#{SEMVER.source})(\/go.mod)?\\s+h1:(?<checksum>#{BASE64.source})$", NAME.options).freeze
BASE64 = Gitlab::Regex.base64_regex
REGEX = Regexp.new("^\\s*(?<name>#{NAME.source})\\s+(?<version>v#{SEMVER.source})(\/go.mod)?\\s+h1:(?<checksum>#{BASE64.source})\\s*$", NAME.options).freeze
# rubocop: disable CodeReuse/ActiveRecord
def link_dependencies
......
# frozen_string_literal: true
module Gitlab
module Golang
extend self
def local_module_prefix
@gitlab_prefix ||= "#{Settings.build_gitlab_go_url}/".freeze
end
def semver_tag?(tag)
return false if tag.dereferenced_target.nil?
Packages::SemVer.match?(tag.name, prefixed: true)
end
def pseudo_version?(version)
return false unless version
if version.is_a? String
version = parse_semver version
return false unless version
end
pre = version.prerelease
# Valid pseudo-versions are:
# vX.0.0-yyyymmddhhmmss-sha1337beef0, when no earlier tagged commit exists for X
# vX.Y.Z-pre.0.yyyymmddhhmmss-sha1337beef0, when most recent prior tag is vX.Y.Z-pre
# vX.Y.(Z+1)-0.yyyymmddhhmmss-sha1337beef0, when most recent prior tag is vX.Y.Z
if version.minor != 0 || version.patch != 0
m = /\A(.*\.)?0\./.freeze.match pre
return false unless m
pre = pre[m[0].length..]
end
# This pattern is intentionally more forgiving than the patterns
# above. Correctness is verified by #pseudo_version_commit.
/\A\d{14}-\h+\z/.freeze.match? pre
end
def pseudo_version_commit(project, semver)
# Per Go's implementation of pseudo-versions, a tag should be
# considered a pseudo-version if it matches one of the patterns
# listed in #pseudo_version?, regardless of the content of the
# timestamp or the length of the SHA fragment. However, an error
# should be returned if the timestamp is not correct or if the SHA
# fragment is not exactly 12 characters long. See also Go's
# implementation of:
#
# - [*codeRepo.validatePseudoVersion](https://github.com/golang/go/blob/daf70d6c1688a1ba1699c933b3c3f04d6f2f73d9/src/cmd/go/internal/modfetch/coderepo.go#L530)
# - [Pseudo-version parsing](https://github.com/golang/go/blob/master/src/cmd/go/internal/modfetch/pseudo.go)
# - [Pseudo-version request processing](https://github.com/golang/go/blob/master/src/cmd/go/internal/modfetch/coderepo.go)
# Go ignores anything before '.' or after the second '-', so we will do the same
timestamp, sha = semver.prerelease.split('-').last 2
timestamp = timestamp.split('.').last
commit = project.repository.commit_by(oid: sha)
# Error messages are based on the responses of proxy.golang.org
# Verify that the SHA fragment references a commit
raise ArgumentError.new 'invalid pseudo-version: unknown commit' unless commit
# Require the SHA fragment to be 12 characters long
raise ArgumentError.new 'invalid pseudo-version: revision is shorter than canonical' unless sha.length == 12
# Require the timestamp to match that of the commit
raise ArgumentError.new 'invalid pseudo-version: does not match version-control timestamp' unless commit.committed_date.strftime('%Y%m%d%H%M%S') == timestamp
commit
end
def parse_semver(str)
Packages::SemVer.parse(str, prefixed: true)
end
def pkg_go_dev_url(name, version = nil)
if version
"https://pkg.go.dev/#{name}@#{version}"
else
"https://pkg.go.dev/#{name}"
end
end
def package_url(name, version = nil)
return unless UrlSanitizer.valid?("https://#{name}")
return pkg_go_dev_url(name, version) unless name.starts_with?(local_module_prefix)
# This will not work if `name` refers to a subdirectory of a project. This
# could be expanded with logic similar to Gitlab::Middleware::Go to locate
# the project, check for permissions, and return a smarter result.
"#{Gitlab.config.gitlab.protocol}://#{name}/"
end
end
end
......@@ -47,9 +47,43 @@ module Gitlab
maven_app_name_regex
end
def unbounded_semver_regex
# See the official regex: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
# The order of the alternatives in <prerelease> are intentionally
# reordered to be greedy. Without this change, the unbounded regex would
# only partially match "v0.0.0-20201230123456-abcdefabcdef".
@unbounded_semver_regex ||= /
(?<major>0|[1-9]\d*)
\.(?<minor>0|[1-9]\d*)
\.(?<patch>0|[1-9]\d*)
(?:-(?<prerelease>(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0)(?:\.(?:\d*[a-zA-Z-][0-9a-zA-Z-]*|[1-9]\d*|0))*))?
(?:\+(?<build>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?
/x.freeze
end
def semver_regex
# see the official regex: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
@semver_regex ||= %r{\A(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?\z}.freeze
@semver_regex ||= Regexp.new("\\A#{::Gitlab::Regex.unbounded_semver_regex.source}\\z", ::Gitlab::Regex.unbounded_semver_regex.options)
end
def go_package_regex
# A Go package name looks like a URL but is not; it:
# - Must not have a scheme, such as http:// or https://
# - Must not have a port number, such as :8080 or :8443
@go_package_regex ||= /
\b (?# word boundary)
(?<domain>
[0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])? (?# first domain)
(?:\.[0-9a-z](?:(?:-|[0-9a-z]){0,61}[0-9a-z])?)* (?# inner domains)
\.[a-z]{2,} (?# top-level domain)
)
(?<path>\/(?:
[-\/$_.+!*'(),0-9a-z] (?# plain URL character)
| %[0-9a-f]{2})* (?# URL encoded character)
)? (?# path)
\b (?# word boundary)
/ix.freeze
end
end
......@@ -216,22 +250,8 @@ module Gitlab
@issue ||= /(?<issue>\d+\b)/
end
def go_package_regex
# A Go package name looks like a URL but is not; it:
# - Must not have a scheme, such as http:// or https://
# - Must not have a port number, such as :8080 or :8443
@go_package_regex ||= /
\b (?# word boundary)
[0-9a-z]((-|[0-9a-z]){0,61}[0-9a-z])? (?# first domain)
(\.[0-9a-z]((-|[0-9a-z]){0,61}[0-9a-z])?)* (?# inner domains)
\.[a-z]{2,} (?# top-level domain)
(\/(
[-\/$_.+!*'(),0-9a-z] (?# plain URL character)
| %[0-9a-f]{2})* (?# URL encoded character)
)? (?# path)
\b (?# word boundary)
/ix.freeze
def base64_regex
@base64_regex ||= /(?:[A-Za-z0-9+\/]{4})*(?:[A-Za-z0-9+\/]{2}==|[A-Za-z0-9+\/]{3}=)?/.freeze
end
end
end
......
......@@ -284,4 +284,15 @@ describe Gitlab::Regex do
it { is_expected.to match('example.com/foo/bar/baz') }
it { is_expected.to match('tl.dr.foo.bar.baz') }
end
describe '.unbounded_semver_regex' do
subject { described_class.unbounded_semver_regex }
it { is_expected.to match('1.2.3') }
it { is_expected.to match('1.2.3-beta') }
it { is_expected.to match('1.2.3-alpha.3') }
it { is_expected.not_to match('1') }
it { is_expected.not_to match('1.2') }
it { is_expected.not_to match('1./2.3') }
end
end
......@@ -23,7 +23,7 @@ describe BlobViewer::GoMod do
describe '#package_url' do
it 'returns the package URL' do
expect(subject.package_url).to eq("#{Gitlab.config.gitlab.protocol}://#{Settings.build_gitlab_go_url}/#{project.full_path}")
expect(subject.package_url).to eq("#{Gitlab.config.gitlab.protocol}://#{Settings.build_gitlab_go_url}/#{project.full_path}/")
end
context 'when the homepage has an invalid URL' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment