Commit 8d4b4160 authored by Alexander Barkov's avatar Alexander Barkov

Bug#38227 EXTRACTVALUE doesn't work with DTD declarations

Problem:
 XML syntax parser allowed to use quoted strings as attribute names,
 and tried to put them into parser state stack instead of identifiers.
 After that parser failed, if quoted string contained some slash characters.
Fix:
 - Disallowing quoted strings in regular tags.
 - Allowing quoted string in DOCTYPE declararion, but
 don't push it into parse state stack (just skip it).
parent e2fef3d1
......@@ -1029,4 +1029,28 @@ SELECT 1 FROM t1 ORDER BY(UPDATEXML(a, '1', '1'));
1
1
DROP TABLE t1;
SET @xml=
'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title> Title - document with document declaration</title>
</head>
<body> Hi, Im a webpage with document a declaration </body>
</html>';
SELECT ExtractValue(@xml, 'html/head/title');
ExtractValue(@xml, 'html/head/title')
Title - document with document declaration
SELECT ExtractValue(@xml, 'html/body');
ExtractValue(@xml, 'html/body')
Hi, Im a webpage with document a declaration
SELECT ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml');
ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml')
NULL
Warnings:
Warning 1525 Incorrect XML value: 'parse error at line 1 pos 11: STRING unexpected ('>' wanted)'
SELECT ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml');
ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml')
NULL
Warnings:
Warning 1525 Incorrect XML value: 'parse error at line 1 pos 17: STRING unexpected ('>' wanted)'
End of 5.1 tests
......@@ -551,4 +551,29 @@ INSERT INTO t1 VALUES (0), (0);
SELECT 1 FROM t1 ORDER BY(UPDATEXML(a, '1', '1'));
DROP TABLE t1;
#
# BUG#38227 EXTRACTVALUE doesn't work with DTD declarations
#
# Check that quoted strings work fine in DOCTYPE declaration.
#
SET @xml=
'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title> Title - document with document declaration</title>
</head>
<body> Hi, Im a webpage with document a declaration </body>
</html>';
SELECT ExtractValue(@xml, 'html/head/title');
SELECT ExtractValue(@xml, 'html/body');
# These two documents will fail.
# Quoted strings are not allowed in regular tags
#
SELECT ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml');
SELECT ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml');
--echo End of 5.1 tests
......@@ -328,7 +328,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
}
while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
(MY_XML_STRING == lex))
((MY_XML_STRING == lex && exclam)))
{
MY_XML_ATTR b;
if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
......@@ -349,13 +349,22 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
return MY_XML_ERROR;
}
}
else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex))
else if (MY_XML_IDENT == lex)
{
p->current_node_type= MY_XML_NODE_ATTR;
if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
(MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
return MY_XML_ERROR;
}
else if ((MY_XML_STRING == lex) && exclam)
{
/*
We are in <!DOCTYPE>, e.g.
<!DOCTYPE name SYSTEM "SystemLiteral">
<!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
Just skip "SystemLiteral" and "PublicidLiteral"
*/
}
else
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment