Commit 46d5ab68 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'feature/gb/add-regexp-variables-expression' into 'master'

Add support for variables expression regexp syntax

Closes #43601

See merge request gitlab-org/gitlab-ce!18902
parents e330b709 afa24514
---
title: Add support for variables expression pattern matching syntax
merge_request: 18902
author:
type: added
...@@ -530,6 +530,16 @@ Below you can find supported syntax reference: ...@@ -530,6 +530,16 @@ Below you can find supported syntax reference:
`$STAGING` value needs to a string, with length higher than zero. `$STAGING` value needs to a string, with length higher than zero.
Variable that contains only whitespace characters is not an empty variable. Variable that contains only whitespace characters is not an empty variable.
1. Pattern matching _(added in 11.0)_
> Example: `$VARIABLE =~ /^content.*/`
It is possible perform pattern matching against a variable and regular
expression. Expression like this evaluates to truth if matches are found.
Pattern matching is case-sensitive by default. Use `i` flag modifier, like
`/pattern/i` to make a pattern case-insensitive.
### Unsupported predefined variables ### Unsupported predefined variables
Because GitLab evaluates variables before creating jobs, we do not support a Because GitLab evaluates variables before creating jobs, we do not support a
......
...@@ -344,10 +344,11 @@ job: ...@@ -344,10 +344,11 @@ job:
kubernetes: active kubernetes: active
``` ```
Example of using variables expressions: Examples of using variables expressions:
```yaml ```yaml
deploy: deploy:
script: cap staging deploy
only: only:
refs: refs:
- branches - branches
...@@ -356,6 +357,16 @@ deploy: ...@@ -356,6 +357,16 @@ deploy:
- $STAGING - $STAGING
``` ```
Another use case is exluding jobs depending on a commit message _(added in 11.0)_:
```yaml
end-to-end:
script: rake test:end-to-end
except:
variables:
- $CI_COMMIT_MESSAGE =~ /skip-end-to-end-tests/
```
Learn more about variables expressions on [a separate page][variables-expressions]. Learn more about variables expressions on [a separate page][variables-expressions].
## `tags` ## `tags`
......
module Gitlab
module Ci
module Pipeline
module Expression
ExpressionError = Class.new(StandardError)
RuntimeError = Class.new(ExpressionError)
end
end
end
end
module Gitlab
module Ci
module Pipeline
module Expression
module Lexeme
class Matches < Lexeme::Operator
PATTERN = /=~/.freeze
def initialize(left, right)
@left = left
@right = right
end
def evaluate(variables = {})
text = @left.evaluate(variables)
regexp = @right.evaluate(variables)
regexp.scan(text.to_s).any?
end
def self.build(_value, behind, ahead)
new(behind, ahead)
end
end
end
end
end
end
end
module Gitlab
module Ci
module Pipeline
module Expression
module Lexeme
require_dependency 're2'
class Pattern < Lexeme::Value
PATTERN = %r{^/.+/[ismU]*$}.freeze
def initialize(regexp)
@value = regexp
unless Gitlab::UntrustedRegexp.valid?(@value)
raise Lexer::SyntaxError, 'Invalid regular expression!'
end
end
def evaluate(variables = {})
Gitlab::UntrustedRegexp.fabricate(@value)
rescue RegexpError
raise Expression::RuntimeError, 'Invalid regular expression!'
end
def self.build(string)
new(string)
end
end
end
end
end
end
end
...@@ -5,15 +5,17 @@ module Gitlab ...@@ -5,15 +5,17 @@ module Gitlab
class Lexer class Lexer
include ::Gitlab::Utils::StrongMemoize include ::Gitlab::Utils::StrongMemoize
SyntaxError = Class.new(Expression::ExpressionError)
LEXEMES = [ LEXEMES = [
Expression::Lexeme::Variable, Expression::Lexeme::Variable,
Expression::Lexeme::String, Expression::Lexeme::String,
Expression::Lexeme::Pattern,
Expression::Lexeme::Null, Expression::Lexeme::Null,
Expression::Lexeme::Equals Expression::Lexeme::Equals,
Expression::Lexeme::Matches
].freeze ].freeze
SyntaxError = Class.new(Statement::StatementError)
MAX_TOKENS = 100 MAX_TOKENS = 100
def initialize(statement, max_tokens: MAX_TOKENS) def initialize(statement, max_tokens: MAX_TOKENS)
......
...@@ -3,15 +3,16 @@ module Gitlab ...@@ -3,15 +3,16 @@ module Gitlab
module Pipeline module Pipeline
module Expression module Expression
class Statement class Statement
StatementError = Class.new(StandardError) StatementError = Class.new(Expression::ExpressionError)
GRAMMAR = [ GRAMMAR = [
%w[variable],
%w[variable equals string], %w[variable equals string],
%w[variable equals variable], %w[variable equals variable],
%w[variable equals null], %w[variable equals null],
%w[string equals variable], %w[string equals variable],
%w[null equals variable], %w[null equals variable],
%w[variable] %w[variable matches pattern]
].freeze ].freeze
def initialize(statement, variables = {}) def initialize(statement, variables = {})
...@@ -35,11 +36,13 @@ module Gitlab ...@@ -35,11 +36,13 @@ module Gitlab
def truthful? def truthful?
evaluate.present? evaluate.present?
rescue Expression::ExpressionError
false
end end
def valid? def valid?
parse_tree.is_a?(Lexeme::Base) parse_tree.is_a?(Lexeme::Base)
rescue StatementError rescue Expression::ExpressionError
false false
end end
end end
......
...@@ -9,7 +9,9 @@ module Gitlab ...@@ -9,7 +9,9 @@ module Gitlab
# there is a strict limit on total execution time. See the RE2 documentation # there is a strict limit on total execution time. See the RE2 documentation
# at https://github.com/google/re2/wiki/Syntax for more details. # at https://github.com/google/re2/wiki/Syntax for more details.
class UntrustedRegexp class UntrustedRegexp
delegate :===, to: :regexp require_dependency 're2'
delegate :===, :source, to: :regexp
def initialize(pattern, multiline: false) def initialize(pattern, multiline: false)
if multiline if multiline
...@@ -35,6 +37,10 @@ module Gitlab ...@@ -35,6 +37,10 @@ module Gitlab
RE2.Replace(text, regexp, rewrite) RE2.Replace(text, regexp, rewrite)
end end
def ==(other)
self.source == other.source
end
# Handles regular expressions with the preferred RE2 library where possible # Handles regular expressions with the preferred RE2 library where possible
# via UntustedRegex. Falls back to Ruby's built-in regular expression library # via UntustedRegex. Falls back to Ruby's built-in regular expression library
# when the syntax would be invalid in RE2. # when the syntax would be invalid in RE2.
...@@ -48,6 +54,24 @@ module Gitlab ...@@ -48,6 +54,24 @@ module Gitlab
Regexp.new(pattern) Regexp.new(pattern)
end end
def self.valid?(pattern)
!!self.fabricate(pattern)
rescue RegexpError
false
end
def self.fabricate(pattern)
matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
raise RegexpError, 'Invalid regular expression!' if matches.nil?
expression = matches[:regexp]
flags = matches[:flags]
expression.prepend("(?#{flags})") if flags.present?
self.new(expression, multiline: false)
end
private private
attr_reader :regexp attr_reader :regexp
......
...@@ -111,7 +111,15 @@ describe Gitlab::Ci::Config::Entry::Policy do ...@@ -111,7 +111,15 @@ describe Gitlab::Ci::Config::Entry::Policy do
context 'when specifying invalid variables expressions token' do context 'when specifying invalid variables expressions token' do
let(:config) { { variables: ['$MY_VAR == 123'] } } let(:config) { { variables: ['$MY_VAR == 123'] } }
it 'reports an error about invalid statement' do it 'reports an error about invalid expression' do
expect(entry.errors).to include /invalid expression syntax/
end
end
context 'when using invalid variables expressions regexp' do
let(:config) { { variables: ['$MY_VAR =~ /some ( thing/'] } }
it 'reports an error about invalid expression' do
expect(entry.errors).to include /invalid expression syntax/ expect(entry.errors).to include /invalid expression syntax/
end end
end end
......
require 'fast_spec_helper'
require_dependency 're2'
describe Gitlab::Ci::Pipeline::Expression::Lexeme::Matches do
let(:left) { double('left') }
let(:right) { double('right') }
describe '.build' do
it 'creates a new instance of the token' do
expect(described_class.build('=~', left, right))
.to be_a(described_class)
end
end
describe '.type' do
it 'is an operator' do
expect(described_class.type).to eq :operator
end
end
describe '#evaluate' do
it 'returns false when left and right do not match' do
allow(left).to receive(:evaluate).and_return('my-string')
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('something'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq false
end
it 'returns true when left and right match' do
allow(left).to receive(:evaluate).and_return('my-awesome-string')
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('awesome.string$'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
it 'supports matching against a nil value' do
allow(left).to receive(:evaluate).and_return(nil)
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('pattern'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq false
end
it 'supports multiline strings' do
allow(left).to receive(:evaluate).and_return <<~TEXT
My awesome contents
My-text-string!
TEXT
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('text-string'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
it 'supports regexp flags' do
allow(left).to receive(:evaluate).and_return <<~TEXT
My AWESOME content
TEXT
allow(right).to receive(:evaluate)
.and_return(Gitlab::UntrustedRegexp.new('(?i)awesome'))
operator = described_class.new(left, right)
expect(operator.evaluate).to eq true
end
end
end
require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do
describe '.build' do
it 'creates a new instance of the token' do
expect(described_class.build('/.*/'))
.to be_a(described_class)
end
it 'raises an error if pattern is invalid' do
expect { described_class.build('/ some ( thin/i') }
.to raise_error(Gitlab::Ci::Pipeline::Expression::Lexer::SyntaxError)
end
end
describe '.type' do
it 'is a value lexeme' do
expect(described_class.type).to eq :value
end
end
describe '.scan' do
it 'correctly identifies a pattern token' do
scanner = StringScanner.new('/pattern/')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('pattern')
end
it 'is a greedy scanner for regexp boundaries' do
scanner = StringScanner.new('/some .* / pattern/')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('some .* / pattern')
end
it 'does not allow to use an empty pattern' do
scanner = StringScanner.new(%(//))
token = described_class.scan(scanner)
expect(token).to be_nil
end
it 'support single flag' do
scanner = StringScanner.new('/pattern/i')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('(?i)pattern')
end
it 'support multiple flags' do
scanner = StringScanner.new('/pattern/im')
token = described_class.scan(scanner)
expect(token).not_to be_nil
expect(token.build.evaluate)
.to eq Gitlab::UntrustedRegexp.new('(?im)pattern')
end
it 'does not support arbitrary flags' do
scanner = StringScanner.new('/pattern/x')
token = described_class.scan(scanner)
expect(token).to be_nil
end
end
describe '#evaluate' do
it 'returns a regular expression' do
regexp = described_class.new('/abc/')
expect(regexp.evaluate).to eq Gitlab::UntrustedRegexp.new('abc')
end
it 'raises error if evaluated regexp is not valid' do
allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true)
regexp = described_class.new('/invalid ( .*/')
expect { regexp.evaluate }
.to raise_error(Gitlab::Ci::Pipeline::Expression::RuntimeError)
end
end
end
...@@ -6,7 +6,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do ...@@ -6,7 +6,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
end end
describe '#tokens' do describe '#tokens' do
it 'tokenss single value' do it 'returns single value' do
tokens = described_class.new('$VARIABLE').tokens tokens = described_class.new('$VARIABLE').tokens
expect(tokens).to be_one expect(tokens).to be_one
...@@ -20,14 +20,14 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do ...@@ -20,14 +20,14 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
expect(tokens).to all(be_an_instance_of(token_class)) expect(tokens).to all(be_an_instance_of(token_class))
end end
it 'tokenss multiple values of the same token' do it 'returns multiple values of the same token' do
tokens = described_class.new("$VARIABLE1 $VARIABLE2").tokens tokens = described_class.new("$VARIABLE1 $VARIABLE2").tokens
expect(tokens.size).to eq 2 expect(tokens.size).to eq 2
expect(tokens).to all(be_an_instance_of(token_class)) expect(tokens).to all(be_an_instance_of(token_class))
end end
it 'tokenss multiple values with different tokens' do it 'returns multiple values with different tokens' do
tokens = described_class.new('$VARIABLE "text" "value"').tokens tokens = described_class.new('$VARIABLE "text" "value"').tokens
expect(tokens.size).to eq 3 expect(tokens.size).to eq 3
...@@ -36,7 +36,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do ...@@ -36,7 +36,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexer do
expect(tokens.third.value).to eq '"value"' expect(tokens.third.value).to eq '"value"'
end end
it 'tokenss tokens and operators' do it 'returns tokens and operators' do
tokens = described_class.new('$VARIABLE == "text"').tokens tokens = described_class.new('$VARIABLE == "text"').tokens
expect(tokens.size).to eq 3 expect(tokens.size).to eq 3
......
require 'spec_helper' require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Parser do describe Gitlab::Ci::Pipeline::Expression::Parser do
describe '#tree' do describe '#tree' do
......
require 'spec_helper' require 'fast_spec_helper'
require 'rspec-parameterized'
describe Gitlab::Ci::Pipeline::Expression::Statement do describe Gitlab::Ci::Pipeline::Expression::Statement do
subject do subject do
...@@ -36,7 +37,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do ...@@ -36,7 +37,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
'== "123"', # invalid left side '== "123"', # invalid left side
'"some string"', # only string provided '"some string"', # only string provided
'$VAR ==', # invalid right side '$VAR ==', # invalid right side
'12345', # unknown syntax 'null', # missing lexemes
'' # empty statement '' # empty statement
] ]
...@@ -44,7 +45,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do ...@@ -44,7 +45,7 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
context "when expression grammar is #{syntax.inspect}" do context "when expression grammar is #{syntax.inspect}" do
let(:text) { syntax } let(:text) { syntax }
it 'aises a statement error exception' do it 'raises a statement error exception' do
expect { subject.parse_tree } expect { subject.parse_tree }
.to raise_error described_class::StatementError .to raise_error described_class::StatementError
end end
...@@ -82,48 +83,66 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do ...@@ -82,48 +83,66 @@ describe Gitlab::Ci::Pipeline::Expression::Statement do
end end
describe '#evaluate' do describe '#evaluate' do
statements = [ using RSpec::Parameterized::TableSyntax
['$PRESENT_VARIABLE == "my variable"', true],
["$PRESENT_VARIABLE == 'my variable'", true], where(:expression, :value) do
['"my variable" == $PRESENT_VARIABLE', true], '$PRESENT_VARIABLE == "my variable"' | true
['$PRESENT_VARIABLE == null', false], '"my variable" == $PRESENT_VARIABLE' | true
['$EMPTY_VARIABLE == null', false], '$PRESENT_VARIABLE == null' | false
['"" == $EMPTY_VARIABLE', true], '$EMPTY_VARIABLE == null' | false
['$EMPTY_VARIABLE', ''], '"" == $EMPTY_VARIABLE' | true
['$UNDEFINED_VARIABLE == null', true], '$EMPTY_VARIABLE' | ''
['null == $UNDEFINED_VARIABLE', true], '$UNDEFINED_VARIABLE == null' | true
['$PRESENT_VARIABLE', 'my variable'], 'null == $UNDEFINED_VARIABLE' | true
['$UNDEFINED_VARIABLE', nil] '$PRESENT_VARIABLE' | 'my variable'
] '$UNDEFINED_VARIABLE' | nil
"$PRESENT_VARIABLE =~ /var.*e$/" | true
statements.each do |expression, value| "$PRESENT_VARIABLE =~ /^var.*/" | false
context "when using expression `#{expression}`" do "$EMPTY_VARIABLE =~ /var.*/" | false
let(:text) { expression } "$UNDEFINED_VARIABLE =~ /var.*/" | false
"$PRESENT_VARIABLE =~ /VAR.*/i" | true
it "evaluates to `#{value.inspect}`" do end
expect(subject.evaluate).to eq value
end with_them do
let(:text) { expression }
it "evaluates to `#{params[:value].inspect}`" do
expect(subject.evaluate).to eq value
end end
end end
end end
describe '#truthful?' do describe '#truthful?' do
statements = [ using RSpec::Parameterized::TableSyntax
['$PRESENT_VARIABLE == "my variable"', true],
["$PRESENT_VARIABLE == 'no match'", false], where(:expression, :value) do
['$UNDEFINED_VARIABLE == null', true], '$PRESENT_VARIABLE == "my variable"' | true
['$PRESENT_VARIABLE', true], "$PRESENT_VARIABLE == 'no match'" | false
['$UNDEFINED_VARIABLE', false], '$UNDEFINED_VARIABLE == null' | true
['$EMPTY_VARIABLE', false] '$PRESENT_VARIABLE' | true
] '$UNDEFINED_VARIABLE' | false
'$EMPTY_VARIABLE' | false
statements.each do |expression, value| '$INVALID = 1' | false
context "when using expression `#{expression}`" do "$PRESENT_VARIABLE =~ /var.*/" | true
let(:text) { expression } "$UNDEFINED_VARIABLE =~ /var.*/" | false
end
it "returns `#{value.inspect}`" do
expect(subject.truthful?).to eq value with_them do
end let(:text) { expression }
it "returns `#{params[:value].inspect}`" do
expect(subject.truthful?).to eq value
end
end
context 'when evaluating expression raises an error' do
let(:text) { '$PRESENT_VARIABLE' }
it 'returns false' do
allow(subject).to receive(:evaluate)
.and_raise(described_class::StatementError)
expect(subject.truthful?).to be_falsey
end end
end end
end end
......
require 'spec_helper' require 'fast_spec_helper'
describe Gitlab::Ci::Pipeline::Expression::Token do describe Gitlab::Ci::Pipeline::Expression::Token do
let(:value) { '$VARIABLE' } let(:value) { '$VARIABLE' }
......
require 'spec_helper' require 'fast_spec_helper'
require 'support/shared_examples/malicious_regexp_shared_examples'
describe Gitlab::UntrustedRegexp do describe Gitlab::UntrustedRegexp do
describe '.valid?' do
it 'returns true if regexp is valid' do
expect(described_class.valid?('/some ( thing/'))
.to be false
end
it 'returns true if regexp is invalid' do
expect(described_class.valid?('/some .* thing/'))
.to be true
end
end
describe '.fabricate' do
context 'when regexp is using /regexp/ scheme with flags' do
it 'fabricates regexp with a single flag' do
regexp = described_class.fabricate('/something/i')
expect(regexp).to eq described_class.new('(?i)something')
expect(regexp.scan('SOMETHING')).to be_one
end
it 'fabricates regexp with multiple flags' do
regexp = described_class.fabricate('/something/im')
expect(regexp).to eq described_class.new('(?im)something')
end
it 'fabricates regexp without flags' do
regexp = described_class.fabricate('/something/')
expect(regexp).to eq described_class.new('something')
end
end
context 'when regexp is a raw pattern' do
it 'raises an error' do
expect { described_class.fabricate('some .* thing') }
.to raise_error(RegexpError)
end
end
end
describe '#initialize' do describe '#initialize' do
subject { described_class.new(pattern) } subject { described_class.new(pattern) }
......
require 'timeout'
shared_examples 'malicious regexp' do shared_examples 'malicious regexp' do
let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' } let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' }
let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' } let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment