Commit a65a56ec authored by Kyle Consalus's avatar Kyle Consalus Committed by Russ Cox

xml: add line numbers to syntax errors.

R=rsc
CC=golang-dev
https://golang.org/cl/699041
parent e781303e
...@@ -12,7 +12,6 @@ package xml ...@@ -12,7 +12,6 @@ package xml
// TODO(rsc): // TODO(rsc):
// Test error handling. // Test error handling.
// Expose parser line number in errors.
import ( import (
"bufio" "bufio"
...@@ -26,9 +25,14 @@ import ( ...@@ -26,9 +25,14 @@ import (
) )
// A SyntaxError represents a syntax error in the XML input stream. // A SyntaxError represents a syntax error in the XML input stream.
type SyntaxError string type SyntaxError struct {
Msg string
Line int
}
func (e SyntaxError) String() string { return "XML syntax error: " + string(e) } func (e *SyntaxError) String() string {
return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg
}
// A Name represents an XML name (Local) annotated // A Name represents an XML name (Local) annotated
// with a name space identifier (Space). // with a name space identifier (Space).
...@@ -344,6 +348,11 @@ func (p *Parser) pushNs(local string, url string, ok bool) { ...@@ -344,6 +348,11 @@ func (p *Parser) pushNs(local string, url string, ok bool) {
s.ok = ok s.ok = ok
} }
// Creates a SyntaxError with the current line number.
func (p *Parser) syntaxError(msg string) os.Error {
return &SyntaxError{Msg: msg, Line: p.line}
}
// Record that we are ending an element with the given name. // Record that we are ending an element with the given name.
// The name must match the record at the top of the stack, // The name must match the record at the top of the stack,
// which must be a pushElement record. // which must be a pushElement record.
...@@ -355,7 +364,7 @@ func (p *Parser) popElement(t *EndElement) bool { ...@@ -355,7 +364,7 @@ func (p *Parser) popElement(t *EndElement) bool {
name := t.Name name := t.Name
switch { switch {
case s == nil || s.kind != stkStart: case s == nil || s.kind != stkStart:
p.err = SyntaxError("unexpected end element </" + name.Local + ">") p.err = p.syntaxError("unexpected end element </" + name.Local + ">")
return false return false
case s.name.Local != name.Local: case s.name.Local != name.Local:
if !p.Strict { if !p.Strict {
...@@ -364,10 +373,10 @@ func (p *Parser) popElement(t *EndElement) bool { ...@@ -364,10 +373,10 @@ func (p *Parser) popElement(t *EndElement) bool {
t.Name = s.name t.Name = s.name
return true return true
} }
p.err = SyntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") p.err = p.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
return false return false
case s.name.Space != name.Space: case s.name.Space != name.Space:
p.err = SyntaxError("element <" + s.name.Local + "> in space " + s.name.Space + p.err = p.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
"closed by </" + name.Local + "> in space " + name.Space) "closed by </" + name.Local + "> in space " + name.Space)
return false return false
} }
...@@ -442,7 +451,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -442,7 +451,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
var name Name var name Name
if name, ok = p.nsname(); !ok { if name, ok = p.nsname(); !ok {
if p.err == nil { if p.err == nil {
p.err = SyntaxError("expected element name after </") p.err = p.syntaxError("expected element name after </")
} }
return nil, p.err return nil, p.err
} }
...@@ -451,7 +460,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -451,7 +460,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err return nil, p.err
} }
if b != '>' { if b != '>' {
p.err = SyntaxError("invalid characters between </" + name.Local + " and >") p.err = p.syntaxError("invalid characters between </" + name.Local + " and >")
return nil, p.err return nil, p.err
} }
return EndElement{name}, nil return EndElement{name}, nil
...@@ -463,7 +472,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -463,7 +472,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
var target string var target string
if target, ok = p.name(); !ok { if target, ok = p.name(); !ok {
if p.err == nil { if p.err == nil {
p.err = SyntaxError("expected target name after <?") p.err = p.syntaxError("expected target name after <?")
} }
return nil, p.err return nil, p.err
} }
...@@ -496,7 +505,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -496,7 +505,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err return nil, p.err
} }
if b != '-' { if b != '-' {
p.err = SyntaxError("invalid sequence <!- not part of <!--") p.err = p.syntaxError("invalid sequence <!- not part of <!--")
return nil, p.err return nil, p.err
} }
// Look for terminator. // Look for terminator.
...@@ -523,7 +532,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -523,7 +532,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err return nil, p.err
} }
if b != "CDATA["[i] { if b != "CDATA["[i] {
p.err = SyntaxError("invalid <![ sequence") p.err = p.syntaxError("invalid <![ sequence")
return nil, p.err return nil, p.err
} }
} }
...@@ -561,7 +570,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -561,7 +570,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
) )
if name, ok = p.nsname(); !ok { if name, ok = p.nsname(); !ok {
if p.err == nil { if p.err == nil {
p.err = SyntaxError("expected element name after <") p.err = p.syntaxError("expected element name after <")
} }
return nil, p.err return nil, p.err
} }
...@@ -578,7 +587,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -578,7 +587,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err return nil, p.err
} }
if b != '>' { if b != '>' {
p.err = SyntaxError("expected /> in element") p.err = p.syntaxError("expected /> in element")
return nil, p.err return nil, p.err
} }
break break
...@@ -600,7 +609,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -600,7 +609,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
a := &attr[n] a := &attr[n]
if a.Name, ok = p.nsname(); !ok { if a.Name, ok = p.nsname(); !ok {
if p.err == nil { if p.err == nil {
p.err = SyntaxError("expected attribute name in element") p.err = p.syntaxError("expected attribute name in element")
} }
return nil, p.err return nil, p.err
} }
...@@ -609,7 +618,7 @@ func (p *Parser) RawToken() (Token, os.Error) { ...@@ -609,7 +618,7 @@ func (p *Parser) RawToken() (Token, os.Error) {
return nil, p.err return nil, p.err
} }
if b != '=' { if b != '=' {
p.err = SyntaxError("attribute name without = in element") p.err = p.syntaxError("attribute name without = in element")
return nil, p.err return nil, p.err
} }
p.space() p.space()
...@@ -638,7 +647,7 @@ func (p *Parser) attrval() []byte { ...@@ -638,7 +647,7 @@ func (p *Parser) attrval() []byte {
} }
// Handle unquoted attribute values for strict parsers // Handle unquoted attribute values for strict parsers
if p.Strict { if p.Strict {
p.err = SyntaxError("unquoted or missing attribute value in element") p.err = p.syntaxError("unquoted or missing attribute value in element")
return nil return nil
} }
// Handle unquoted attribute values for unstrict parsers // Handle unquoted attribute values for unstrict parsers
...@@ -707,7 +716,7 @@ func (p *Parser) getc() (b byte, ok bool) { ...@@ -707,7 +716,7 @@ func (p *Parser) getc() (b byte, ok bool) {
func (p *Parser) mustgetc() (b byte, ok bool) { func (p *Parser) mustgetc() (b byte, ok bool) {
if b, ok = p.getc(); !ok { if b, ok = p.getc(); !ok {
if p.err == os.EOF { if p.err == os.EOF {
p.err = SyntaxError("unexpected EOF") p.err = p.syntaxError("unexpected EOF")
} }
} }
return return
...@@ -751,14 +760,14 @@ Input: ...@@ -751,14 +760,14 @@ Input:
trunc = 2 trunc = 2
break Input break Input
} }
p.err = SyntaxError("unescaped ]]> not in CDATA section") p.err = p.syntaxError("unescaped ]]> not in CDATA section")
return nil return nil
} }
// Stop reading text if we see a <. // Stop reading text if we see a <.
if b == '<' && !cdata { if b == '<' && !cdata {
if quote >= 0 { if quote >= 0 {
p.err = SyntaxError("unescaped < inside quoted string") p.err = p.syntaxError("unescaped < inside quoted string")
return nil return nil
} }
p.ungetc('<') p.ungetc('<')
...@@ -779,7 +788,7 @@ Input: ...@@ -779,7 +788,7 @@ Input:
p.tmp[i], p.err = p.r.ReadByte() p.tmp[i], p.err = p.r.ReadByte()
if p.err != nil { if p.err != nil {
if p.err == os.EOF { if p.err == os.EOF {
p.err = SyntaxError("unexpected EOF") p.err = p.syntaxError("unexpected EOF")
} }
return nil return nil
} }
...@@ -804,7 +813,7 @@ Input: ...@@ -804,7 +813,7 @@ Input:
p.buf.Write(p.tmp[0:i]) p.buf.Write(p.tmp[0:i])
continue Input continue Input
} }
p.err = SyntaxError("character entity expression &" + s + "... too long") p.err = p.syntaxError("character entity expression &" + s + "... too long")
return nil return nil
} }
var haveText bool var haveText bool
...@@ -836,7 +845,7 @@ Input: ...@@ -836,7 +845,7 @@ Input:
p.buf.Write(p.tmp[0:i]) p.buf.Write(p.tmp[0:i])
continue Input continue Input
} }
p.err = SyntaxError("invalid character entity &" + s + ";") p.err = p.syntaxError("invalid character entity &" + s + ";")
return nil return nil
} }
p.buf.Write([]byte(text)) p.buf.Write([]byte(text))
...@@ -913,7 +922,7 @@ func (p *Parser) name() (s string, ok bool) { ...@@ -913,7 +922,7 @@ func (p *Parser) name() (s string, ok bool) {
s = p.buf.String() s = p.buf.String()
for i, c := range s { for i, c := range s {
if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) { if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) {
p.err = SyntaxError("invalid XML name: " + s) p.err = p.syntaxError("invalid XML name: " + s)
return "", false return "", false
} }
} }
......
...@@ -209,7 +209,7 @@ func TestSyntax(t *testing.T) { ...@@ -209,7 +209,7 @@ func TestSyntax(t *testing.T) {
var err os.Error var err os.Error
for _, err = p.Token(); err == nil; _, err = p.Token() { for _, err = p.Token(); err == nil; _, err = p.Token() {
} }
if _, ok := err.(SyntaxError); !ok { if _, ok := err.(*SyntaxError); !ok {
t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
} }
} }
...@@ -314,7 +314,7 @@ func TestUnquotedAttrs(t *testing.T) { ...@@ -314,7 +314,7 @@ func TestUnquotedAttrs(t *testing.T) {
p := NewParser(StringReader(data)) p := NewParser(StringReader(data))
p.Strict = false p.Strict = false
token, err := p.Token() token, err := p.Token()
if _, ok := err.(SyntaxError); ok { if _, ok := err.(*SyntaxError); ok {
t.Errorf("Unexpected error: %v", err) t.Errorf("Unexpected error: %v", err)
} }
if token.(StartElement).Name.Local != "tag" { if token.(StartElement).Name.Local != "tag" {
...@@ -354,3 +354,18 @@ func TestCopyTokenStartElement(t *testing.T) { ...@@ -354,3 +354,18 @@ func TestCopyTokenStartElement(t *testing.T) {
t.Error("CopyToken(CharData) uses same buffer.") t.Error("CopyToken(CharData) uses same buffer.")
} }
} }
func TestSyntaxErrorLineNum(t *testing.T) {
testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
p := NewParser(StringReader(testInput))
var err os.Error
for _, err = p.Token(); err == nil; _, err = p.Token() {
}
synerr, ok := err.(*SyntaxError)
if !ok {
t.Error("Expected SyntaxError.")
}
if synerr.Line != 3 {
t.Error("SyntaxError didn't have correct line number.")
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment