Commit fc80ce81 authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: report too short escape sequences

Generally improve error messages for escape sequences.

R=adonovan
CC=golang-codereviews
https://golang.org/cl/49430046
parent fdbf3d90
......@@ -358,60 +358,77 @@ exit:
return tok, string(s.src[offs:s.offset])
}
func (s *Scanner) scanEscape(quote rune) {
// scanEscape parses an escape sequence where rune is the accepted
// escaped quote. In case of a syntax error, it stops at the offending
// character (without consuming it) and returns false. Otherwise
// it returns true.
func (s *Scanner) scanEscape(quote rune) bool {
offs := s.offset
var i, base, max uint32
var n int
var base, max uint32
switch s.ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
s.next()
return
return true
case '0', '1', '2', '3', '4', '5', '6', '7':
i, base, max = 3, 8, 255
n, base, max = 3, 8, 255
case 'x':
s.next()
i, base, max = 2, 16, 255
n, base, max = 2, 16, 255
case 'u':
s.next()
i, base, max = 4, 16, unicode.MaxRune
n, base, max = 4, 16, unicode.MaxRune
case 'U':
s.next()
i, base, max = 8, 16, unicode.MaxRune
n, base, max = 8, 16, unicode.MaxRune
default:
s.next() // always make progress
s.error(offs, "unknown escape sequence")
return
msg := "unknown escape sequence"
if s.ch < 0 {
msg = "escape sequence not terminated"
}
s.error(offs, msg)
return false
}
var x uint32
for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
for n > 0 {
d := uint32(digitVal(s.ch))
if d >= base {
s.error(s.offset, "illegal character in escape sequence")
break
msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
if s.ch < 0 {
msg = "escape sequence not terminated"
}
s.error(s.offset, msg)
return false
}
x = x*base + d
s.next()
n--
}
// in case of an error, consume remaining chars
for ; i > 0 && s.ch != quote && s.ch >= 0; i-- {
s.next()
}
if x > max || 0xD800 <= x && x < 0xE000 {
s.error(offs, "escape sequence is invalid Unicode code point")
return false
}
return true
}
func (s *Scanner) scanRune() string {
// '\'' opening already consumed
offs := s.offset - 1
valid := true
n := 0
for {
ch := s.ch
if ch == '\n' || ch < 0 {
s.error(offs, "rune literal not terminated")
n = 1 // avoid further errors
// only report error if we don't have one already
if valid {
s.error(offs, "rune literal not terminated")
valid = false
}
break
}
s.next()
......@@ -420,11 +437,14 @@ func (s *Scanner) scanRune() string {
}
n++
if ch == '\\' {
s.scanEscape('\'')
if !s.scanEscape('\'') {
valid = false
}
// continue to read to closing quote
}
}
if n != 1 {
if valid && n != 1 {
s.error(offs, "illegal rune literal")
}
......
......@@ -641,13 +641,9 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err str
}
s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
_, tok0, lit0 := s.Scan()
_, tok1, _ := s.Scan()
if tok0 != tok {
t.Errorf("%q: got %s, expected %s", src, tok0, tok)
}
if tok1 != token.EOF {
t.Errorf("%q: got %s, expected EOF", src, tok1)
}
if tok0 != token.ILLEGAL && lit0 != lit {
t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
}
......@@ -678,12 +674,34 @@ var errors = []struct {
{`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
{`' '`, token.CHAR, 0, `' '`, ""},
{`''`, token.CHAR, 0, `''`, "illegal rune literal"},
{`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
{`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
{`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
{`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
{`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
{`'\08'`, token.CHAR, 3, `'\08'`, "illegal character in escape sequence"},
{`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character in escape sequence"},
{`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
{`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
{`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
{`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
{`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
{`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
{`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
{`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
{`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
{`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
{`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
{`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
{`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
{`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
{`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
{"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
{"'\n ", token.CHAR, 0, "'", "rune literal not terminated"},
{`""`, token.STRING, 0, `""`, ""},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment