Commit dec5bb78 authored by Arvindh Rajesh Tamilmani's avatar Arvindh Rajesh Tamilmani Committed by Russ Cox

xml: handle unexpected EOF while parsing and fix a bug in name

mustgetc reports unexpected EOF as SyntaxError.  using
mustgetc seems to be a better approach than letting the
caller handle unexpected EOF every time.

name: the second if statement should explicitly return
ok==false.

R=rsc
https://golang.org/cl/174083
parent 19c18358
...@@ -409,7 +409,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -409,7 +409,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return CharData(data), nil; return CharData(data), nil;
} }
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
switch b { switch b {
...@@ -423,7 +423,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -423,7 +423,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err; return nil, p.err;
} }
p.space(); p.space();
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != '>' { if b != '>' {
...@@ -438,17 +438,17 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -438,17 +438,17 @@ func (p *Parser) RawToken() (Token, os.Error) {
// the version is 1.0 and the encoding is UTF-8. // the version is 1.0 and the encoding is UTF-8.
var target string; var target string;
if target, ok = p.name(); !ok { if target, ok = p.name(); !ok {
return nil, p.err if p.err == nil {
p.err = SyntaxError("expected target name after <?")
}
return nil, p.err;
} }
p.space(); p.space();
p.buf.Reset(); p.buf.Reset();
var b0 byte; var b0 byte;
for { for {
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
if p.err == os.EOF { return nil, p.err
p.err = SyntaxError("unterminated <? directive")
}
return nil, p.err;
} }
p.buf.WriteByte(b); p.buf.WriteByte(b);
if b0 == '?' && b == '>' { if b0 == '?' && b == '>' {
...@@ -462,13 +462,13 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -462,13 +462,13 @@ func (p *Parser) RawToken() (Token, os.Error) {
case '!': case '!':
// <!: Maybe comment, maybe CDATA. // <!: Maybe comment, maybe CDATA.
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
switch b { switch b {
case '-': // <!- case '-': // <!-
// Probably <!-- for a comment. // Probably <!-- for a comment.
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != '-' { if b != '-' {
...@@ -479,11 +479,8 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -479,11 +479,8 @@ func (p *Parser) RawToken() (Token, os.Error) {
p.buf.Reset(); p.buf.Reset();
var b0, b1 byte; var b0, b1 byte;
for { for {
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
if p.err == os.EOF { return nil, p.err
p.err = SyntaxError("unterminated <!-- comment")
}
return nil, p.err;
} }
p.buf.WriteByte(b); p.buf.WriteByte(b);
if b0 == '-' && b1 == '-' && b == '>' { if b0 == '-' && b1 == '-' && b == '>' {
...@@ -498,7 +495,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -498,7 +495,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
case '[': // <![ case '[': // <![
// Probably <![CDATA[. // Probably <![CDATA[.
for i := 0; i < 6; i++ { for i := 0; i < 6; i++ {
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != "CDATA["[i] { if b != "CDATA["[i] {
...@@ -519,7 +516,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -519,7 +516,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
p.buf.Reset(); p.buf.Reset();
p.buf.WriteByte(b); p.buf.WriteByte(b);
for { for {
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b == '>' { if b == '>' {
...@@ -548,12 +545,12 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -548,12 +545,12 @@ func (p *Parser) RawToken() (Token, os.Error) {
attr = make([]Attr, 0, 4); attr = make([]Attr, 0, 4);
for { for {
p.space(); p.space();
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b == '/' { if b == '/' {
empty = true; empty = true;
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != '>' { if b != '>' {
...@@ -584,7 +581,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -584,7 +581,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err; return nil, p.err;
} }
p.space(); p.space();
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != '=' { if b != '=' {
...@@ -592,7 +589,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -592,7 +589,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err; return nil, p.err;
} }
p.space(); p.space();
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return nil, p.err return nil, p.err
} }
if b != '"' && b != '\'' { if b != '"' && b != '\'' {
...@@ -652,6 +649,19 @@ func (p *Parser) getc() (b byte, ok bool) { ...@@ -652,6 +649,19 @@ func (p *Parser) getc() (b byte, ok bool) {
return b, true; return b, true;
} }
// Must read a single byte.
// If there is no byte to read,
// set p.err to SyntaxError("unexpected EOF")
// and return ok==false
func (p *Parser) mustgetc() (b byte, ok bool) {
if b, ok = p.getc(); !ok {
if p.err == os.EOF {
p.err = SyntaxError("unexpected EOF")
}
}
return;
}
// Unread a single byte. // Unread a single byte.
func (p *Parser) ungetc(b byte) { func (p *Parser) ungetc(b byte) {
if b == '\n' { if b == '\n' {
...@@ -678,7 +688,7 @@ func (p *Parser) text(quote int, cdata bool) []byte { ...@@ -678,7 +688,7 @@ func (p *Parser) text(quote int, cdata bool) []byte {
p.buf.Reset(); p.buf.Reset();
Input: Input:
for { for {
b, ok := p.getc(); b, ok := p.mustgetc();
if !ok { if !ok {
return nil return nil
} }
...@@ -717,7 +727,10 @@ Input: ...@@ -717,7 +727,10 @@ Input:
for i = 0; i < len(p.tmp); i++ { for i = 0; i < len(p.tmp); i++ {
p.tmp[i], p.err = p.r.ReadByte(); p.tmp[i], p.err = p.r.ReadByte();
if p.err != nil { if p.err != nil {
return nil if p.err == os.EOF {
p.err = SyntaxError("unexpected EOF")
}
return nil;
} }
c := p.tmp[i]; c := p.tmp[i];
if c == ';' { if c == ';' {
...@@ -819,22 +832,23 @@ func (p *Parser) nsname() (name Name, ok bool) { ...@@ -819,22 +832,23 @@ func (p *Parser) nsname() (name Name, ok bool) {
} }
// Get name: /first(first|second)*/ // Get name: /first(first|second)*/
// Do not set p.err if the name is missing: let the caller provide better context. // Do not set p.err if the name is missing (unless unexpected EOF is received):
// let the caller provide better context.
func (p *Parser) name() (s string, ok bool) { func (p *Parser) name() (s string, ok bool) {
var b byte; var b byte;
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return return
} }
// As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]*
if b < utf8.RuneSelf && !isNameByte(b) { if b < utf8.RuneSelf && !isNameByte(b) {
p.ungetc(b); p.ungetc(b);
return; return "", false;
} }
p.buf.Reset(); p.buf.Reset();
p.buf.WriteByte(b); p.buf.WriteByte(b);
for { for {
if b, ok = p.getc(); !ok { if b, ok = p.mustgetc(); !ok {
return return
} }
if b < utf8.RuneSelf && !isNameByte(b) { if b < utf8.RuneSelf && !isNameByte(b) {
......
...@@ -94,6 +94,57 @@ var cookedTokens = []Token{ ...@@ -94,6 +94,57 @@ var cookedTokens = []Token{
Comment(strings.Bytes(" missing final newline ")), Comment(strings.Bytes(" missing final newline ")),
} }
var xmlInput = []string{
// unexpected EOF cases
"<",
"<t",
"<t ",
"<t/",
"<t/>c",
"<!",
"<!-",
"<!--",
"<!--c-",
"<!--c--",
"<!d",
"<t></",
"<t></t",
"<?",
"<?p",
"<t a",
"<t a=",
"<t a='",
"<t a=''",
"<t/><![",
"<t/><![C",
"<t/><![CDATA[d",
"<t/><![CDATA[d]",
"<t/><![CDATA[d]]",
// other Syntax errors
" ",
">",
"<>",
"<t/a",
"<0 />",
"<?0 >",
// "<!0 >", // let the Token() caller handle
"</0>",
"<t 0=''>",
"<t a='&'>",
"<t a='<'>",
"<t>&nbspc;</t>",
"<t a>",
"<t a=>",
"<t a=v>",
// "<![CDATA[d]]>", // let the Token() caller handle
"cdata",
"<t></e>",
"<t></>",
"<t></t!",
"<t>cdata]]></t>",
}
type stringReader struct { type stringReader struct {
s string; s string;
off int; off int;
...@@ -149,3 +200,15 @@ func TestToken(t *testing.T) { ...@@ -149,3 +200,15 @@ func TestToken(t *testing.T) {
} }
} }
} }
func TestSyntax(t *testing.T) {
for i := range xmlInput {
p := NewParser(StringReader(xmlInput[i]));
var err os.Error;
for _, err = p.Token(); err == nil; _, err = p.Token() {
}
if _, ok := err.(SyntaxError); !ok {
t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment