Commit 1693e7b6 authored by Robert Griesemer's avatar Robert Griesemer

cmd/compile/internal/syntax: better errors and recovery for invalid character literals

Fixes #15611.

Change-Id: I352b145026466cafef8cf87addafbd30716bda24
Reviewed-on: https://go-review.googlesource.com/37138
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
parent 990124da
...@@ -466,41 +466,68 @@ done: ...@@ -466,41 +466,68 @@ done:
s.tok = _Literal s.tok = _Literal
} }
func (s *scanner) stdString() { func (s *scanner) rune() {
s.startLit() s.startLit()
for { ok := true // only report errors if we're ok so far
n := 0
for ; ; n++ {
r := s.getr() r := s.getr()
if r == '"' { if r == '\'' {
break break
} }
if r == '\\' { if r == '\\' {
s.escape('"') if !s.escape('\'') {
ok = false
}
continue continue
} }
if r == '\n' { if r == '\n' {
s.ungetr() // assume newline is not part of literal s.ungetr() // assume newline is not part of literal
s.error("newline in string") if ok {
s.error("newline in character literal")
ok = false
}
break break
} }
if r < 0 { if r < 0 {
s.errh(s.line, s.col, "string not terminated") if ok {
s.errh(s.line, s.col, "invalid character literal (missing closing ')")
ok = false
}
break break
} }
} }
if ok {
if n == 0 {
s.error("empty character literal or unescaped ' in character literal")
} else if n != 1 {
s.errh(s.line, s.col, "invalid character literal (more than one character)")
}
}
s.nlsemi = true s.nlsemi = true
s.lit = string(s.stopLit()) s.lit = string(s.stopLit())
s.kind = StringLit s.kind = RuneLit
s.tok = _Literal s.tok = _Literal
} }
func (s *scanner) rawString() { func (s *scanner) stdString() {
s.startLit() s.startLit()
for { for {
r := s.getr() r := s.getr()
if r == '`' { if r == '"' {
break
}
if r == '\\' {
s.escape('"')
continue
}
if r == '\n' {
s.ungetr() // assume newline is not part of literal
s.error("newline in string")
break break
} }
if r < 0 { if r < 0 {
...@@ -508,9 +535,6 @@ func (s *scanner) rawString() { ...@@ -508,9 +535,6 @@ func (s *scanner) rawString() {
break break
} }
} }
// We leave CRs in the string since they are part of the
// literal (even though they are not part of the literal
// value).
s.nlsemi = true s.nlsemi = true
s.lit = string(s.stopLit()) s.lit = string(s.stopLit())
...@@ -518,35 +542,26 @@ func (s *scanner) rawString() { ...@@ -518,35 +542,26 @@ func (s *scanner) rawString() {
s.tok = _Literal s.tok = _Literal
} }
func (s *scanner) rune() { func (s *scanner) rawString() {
s.startLit() s.startLit()
for {
r := s.getr() r := s.getr()
ok := false if r == '`' {
if r == '\'' { break
s.error("empty character literal or unescaped ' in character literal")
} else if r == '\n' {
s.ungetr() // assume newline is not part of literal
s.error("newline in character literal")
} else {
ok = true
if r == '\\' {
ok = s.escape('\'')
}
} }
if r < 0 {
r = s.getr() s.errh(s.line, s.col, "string not terminated")
if r != '\'' { break
// only report error if we're ok so far
if ok {
s.error("missing '")
} }
s.ungetr()
} }
// We leave CRs in the string since they are part of the
// literal (even though they are not part of the literal
// value).
s.nlsemi = true s.nlsemi = true
s.lit = string(s.stopLit()) s.lit = string(s.stopLit())
s.kind = RuneLit s.kind = StringLit
s.tok = _Literal s.tok = _Literal
} }
......
...@@ -292,9 +292,9 @@ func TestScanErrors(t *testing.T) { ...@@ -292,9 +292,9 @@ func TestScanErrors(t *testing.T) {
{`''`, "empty character literal or unescaped ' in character literal", 1, 1}, {`''`, "empty character literal or unescaped ' in character literal", 1, 1},
{"'\n", "newline in character literal", 1, 1}, {"'\n", "newline in character literal", 1, 1},
{`'\`, "missing '", 1, 2}, {`'\`, "invalid character literal (missing closing ')", 1, 0},
{`'\'`, "missing '", 1, 3}, {`'\'`, "invalid character literal (missing closing ')", 1, 0},
{`'\x`, "missing '", 1, 3}, {`'\x`, "invalid character literal (missing closing ')", 1, 0},
{`'\x'`, "non-hex character in escape sequence: '", 1, 3}, {`'\x'`, "non-hex character in escape sequence: '", 1, 3},
{`'\y'`, "unknown escape sequence", 1, 2}, {`'\y'`, "unknown escape sequence", 1, 2},
{`'\x0'`, "non-hex character in escape sequence: '", 1, 4}, {`'\x0'`, "non-hex character in escape sequence: '", 1, 4},
...@@ -302,7 +302,8 @@ func TestScanErrors(t *testing.T) { ...@@ -302,7 +302,8 @@ func TestScanErrors(t *testing.T) {
{`'\377' /*`, "comment not terminated", 1, 7}, // valid octal escape {`'\377' /*`, "comment not terminated", 1, 7}, // valid octal escape
{`'\378`, "non-octal character in escape sequence: 8", 1, 4}, {`'\378`, "non-octal character in escape sequence: 8", 1, 4},
{`'\400'`, "octal escape value > 255: 256", 1, 5}, {`'\400'`, "octal escape value > 255: 256", 1, 5},
{`'xx`, "missing '", 1, 2}, {`'xx`, "invalid character literal (missing closing ')", 1, 0},
{`'xx'`, "invalid character literal (more than one character)", 1, 0},
{"\"\n", "newline in string", 1, 1}, {"\"\n", "newline in string", 1, 1},
{`"`, "string not terminated", 1, 0}, {`"`, "string not terminated", 1, 0},
......
// errorcheck
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
// These error messages are for the invalid literals on lines 19 and 20:
// ERROR "newline in character literal"
// ERROR "invalid character literal \(missing closing '\)"
const (
_ = '' // ERROR "empty character literal or unescaped ' in character literal"
_ = 'f'
_ = 'foo' // ERROR "invalid character literal \(more than one character\)"
//line issue15611.go:11
_ = '
_ = '
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment