Commit 46199c0e authored by Alexander Barkov's avatar Alexander Barkov

MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable

We don't fix the bug itself, we just make regex functions display errors
returned from pcre_exec() as MariaDB warnings.
parent 0b4231e9
......@@ -845,3 +845,32 @@ SET default_regex_flags=DEFAULT;
SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that');
REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that')
1 this and that
#
# MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable
#
# Testing a warning
SET NAMES latin1;
SET @regCheck= '\\xE0\\x01';
SELECT 0xE001 REGEXP @regCheck;
0xE001 REGEXP @regCheck
0
Warnings:
Warning 1139 Got error 'pcre_exec: Invalid utf8 byte sequence in the subject string' from regexp
# Testing workaround N1: This makes the pattern to be a binary string:
SET NAMES latin1;
SET @regCheck= X'E001';
SELECT 0xE001 REGEXP @regCheck;
0xE001 REGEXP @regCheck
1
# Testing workaround N2: This also makes the pattern to be a binary string, using a different syntax:
SET NAMES latin1;
SET @regCheck= _binary '\\xE0\\x01';
SELECT 0xE001 REGEXP @regCheck;
0xE001 REGEXP @regCheck
1
# Testing workarond N3: This makes derivation of the subject string stronger (IMLICIT instead of COERCIBLE)
SET NAMES latin1;
SET @regCheck= '\\xE0\\x01';
SELECT CAST(0xE001 AS BINARY) REGEXP @regCheck;
CAST(0xE001 AS BINARY) REGEXP @regCheck
1
......
SET debug_dbug='+d,pcre_exec_error_123';
SELECT 'a' RLIKE 'a';
'a' RLIKE 'a'
0
Warnings:
Warning 1139 Got error 'pcre_exec: Internal error (-123)' from regexp
SET debug_dbug='';
SELECT 'a' RLIKE 'a';
'a' RLIKE 'a'
1
......@@ -402,3 +402,27 @@ SET default_regex_flags=DEFAULT;
--echo # MDEV-6965 non-captured group \2 in regexp_replace
--echo #
SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2that');
--echo #
--echo # MDEV-8102 REGEXP function fails to match hex values when expression is stored as a variable
--echo #
--echo # Testing a warning
SET NAMES latin1;
SET @regCheck= '\\xE0\\x01';
SELECT 0xE001 REGEXP @regCheck;
--echo # Testing workaround N1: This makes the pattern to be a binary string:
SET NAMES latin1;
SET @regCheck= X'E001';
SELECT 0xE001 REGEXP @regCheck;
--echo # Testing workaround N2: This also makes the pattern to be a binary string, using a different syntax:
SET NAMES latin1;
SET @regCheck= _binary '\\xE0\\x01';
SELECT 0xE001 REGEXP @regCheck;
--echo # Testing workarond N3: This makes derivation of the subject string stronger (IMLICIT instead of COERCIBLE)
SET NAMES latin1;
SET @regCheck= '\\xE0\\x01';
SELECT CAST(0xE001 AS BINARY) REGEXP @regCheck;
--source include/have_debug.inc
SET debug_dbug='+d,pcre_exec_error_123';
SELECT 'a' RLIKE 'a';
SET debug_dbug='';
SELECT 'a' RLIKE 'a';
......@@ -5160,10 +5160,65 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error)
}
/**
Send a warning explaining an error code returned by pcre_exec().
*/
void Regexp_processor_pcre::pcre_exec_warn(int rc) const
{
char buf[64];
const char *errmsg= NULL;
/*
Make a descriptive message only for those pcre_exec() error codes
that can actually happen in MariaDB.
*/
switch (rc)
{
case PCRE_ERROR_NOMEMORY:
errmsg= "pcre_exec: Out of memory";
break;
case PCRE_ERROR_BADUTF8:
errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string";
break;
case PCRE_ERROR_RECURSELOOP:
errmsg= "pcre_exec: Recursion loop detected";
break;
default:
/*
As other error codes should normally not happen,
we just report the error code without textual description
of the code.
*/
my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc);
errmsg= buf;
}
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_REGEXP_ERROR, ER(ER_REGEXP_ERROR), errmsg);
}
/**
Call pcre_exec() and send a warning if pcre_exec() returned with an error.
*/
int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code,
const pcre_extra *extra,
const char *subject,
int length, int startoffset,
int options, int *ovector,
int ovecsize)
{
int rc= pcre_exec(code, extra, subject, length,
startoffset, options, ovector, ovecsize);
DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;);
if (rc < PCRE_ERROR_NOMATCH)
pcre_exec_warn(rc);
return rc;
}
bool Regexp_processor_pcre::exec(const char *str, int length, int offset)
{
m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str, length,
offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, NULL, str, length, offset, 0,
m_SubStrVec, m_subpatterns_needed * 3);
return false;
}
......@@ -5173,8 +5228,10 @@ bool Regexp_processor_pcre::exec(String *str, int offset,
{
if (!(str= convert_if_needed(str, &subject_converter)))
return true;
m_pcre_exec_rc= pcre_exec(m_pcre, NULL, str->c_ptr_safe(), str->length(),
offset, 0, m_SubStrVec, m_subpatterns_needed * 3);
m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, NULL,
str->c_ptr_safe(), str->length(),
offset, 0,
m_SubStrVec, m_subpatterns_needed * 3);
if (m_pcre_exec_rc > 0)
{
uint i;
......
......@@ -1549,6 +1549,10 @@ class Regexp_processor_pcre
int m_pcre_exec_rc;
int m_SubStrVec[30];
uint m_subpatterns_needed;
void pcre_exec_warn(int rc) const;
int pcre_exec_with_warn(const pcre *code, const pcre_extra *extra,
const char *subject, int length, int startoffset,
int options, int *ovector, int ovecsize);
public:
String *convert_if_needed(String *src, String *converter);
String subject_converter;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment