cmd/compile/internal/syntax: better errors and recovery for invalid character literals

Fixes #15611. Change-Id: I352b145026466cafef8cf87addafbd30716bda24 Reviewed-on: https://go-review.googlesource.com/37138 Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>

cmd/compile/internal/syntax: better errors and recovery for invalid character literals
Fixes #15611. Change-Id: I352b145026466cafef8cf87addafbd30716bda24 Reviewed-on: https://go-review.googlesource.com/37138 Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
1693e7b6 · Robert Griesemer · 990124da · 1693e7b6 · 1693e7b6 · 1693e7b6
Commit 1693e7b6 authored Feb 16, 2017 by Robert Griesemer
3 changed files
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -466,41 +466,68 @@ done:
 	s.tok = _Literal
 }
-func (s *scanner) stdString() {
+func (s *scanner) rune() {
 	s.startLit()
-	for {
+	ok := true // only report errors if we're ok so far
+	n := 0
+	for ; ; n++ {
 		r := s.getr()
-		if r == '"' {
+		if r == '\'' {
 			break
 		}
 		if r == '\\' {
-			s.escape('"')
+			if !s.escape('\'') {
+				ok = false
+			}
 			continue
 		}
 		if r == '\n' {
 			s.ungetr() // assume newline is not part of literal
-			s.error("newline in string")
+			if ok {
+				s.error("newline in character literal")
+				ok = false
+			}
 			break
 		}
 		if r < 0 {
-			s.errh(s.line, s.col, "string not terminated")
+			if ok {
+				s.errh(s.line, s.col, "invalid character literal (missing closing ')")
+				ok = false
+			}
 			break
 		}
 	}
+	if ok {
+		if n == 0 {
+			s.error("empty character literal or unescaped ' in character literal")
+		} else if n != 1 {
+			s.errh(s.line, s.col, "invalid character literal (more than one character)")
+		}
+	}
 	s.nlsemi = true
 	s.lit = string(s.stopLit())
-	s.kind = StringLit
+	s.kind = RuneLit
 	s.tok = _Literal
 }
-func (s *scanner) rawString() {
+func (s *scanner) stdString() {
 	s.startLit()
 	for {
 		r := s.getr()
-		if r == '`' {
+		if r == '"' {
+			break
+		}
+		if r == '\\' {
+			s.escape('"')
+			continue
+		}
+		if r == '\n' {
+			s.ungetr() // assume newline is not part of literal
+			s.error("newline in string")
 			break
 		}
 		if r < 0 {
@@ -508,9 +535,6 @@ func (s *scanner) rawString() {
 			break
 		}
 	}
-	// We leave CRs in the string since they are part of the
-	// literal (even though they are not part of the literal
-	// value).
 	s.nlsemi = true
 	s.lit = string(s.stopLit())
@@ -518,35 +542,26 @@ func (s *scanner) rawString() {
 	s.tok = _Literal
 }
-func (s *scanner) rune() {
+func (s *scanner) rawString() {
 	s.startLit()
+	for {
 		r := s.getr()
-	ok := false
+		if r == '`' {
-	if r == '\'' {
+			break
-		s.error("empty character literal or unescaped ' in character literal")
-	} else if r == '\n' {
-		s.ungetr() // assume newline is not part of literal
-		s.error("newline in character literal")
-	} else {
-		ok = true
-		if r == '\\' {
-			ok = s.escape('\'')
-		}
 		}
+		if r < 0 {
-	r = s.getr()
+			s.errh(s.line, s.col, "string not terminated")
-	if r != '\'' {
+			break
-		// only report error if we're ok so far
-		if ok {
-			s.error("missing '")
 		}
-		s.ungetr()
 	}
+	// We leave CRs in the string since they are part of the
+	// literal (even though they are not part of the literal
+	// value).
 	s.nlsemi = true
 	s.lit = string(s.stopLit())
-	s.kind = RuneLit
+	s.kind = StringLit
 	s.tok = _Literal
 }

--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -292,9 +292,9 @@ func TestScanErrors(t *testing.T) {
 		{`''`, "empty character literal or unescaped ' in character literal", 1, 1},
 		{"'\n", "newline in character literal", 1, 1},
-		{`'\`, "missing '", 1, 2},
+		{`'\`, "invalid character literal (missing closing ')", 1, 0},
-		{`'\'`, "missing '", 1, 3},
+		{`'\'`, "invalid character literal (missing closing ')", 1, 0},
-		{`'\x`, "missing '", 1, 3},
+		{`'\x`, "invalid character literal (missing closing ')", 1, 0},
 		{`'\x'`, "non-hex character in escape sequence: '", 1, 3},
 		{`'\y'`, "unknown escape sequence", 1, 2},
 		{`'\x0'`, "non-hex character in escape sequence: '", 1, 4},
@@ -302,7 +302,8 @@ func TestScanErrors(t *testing.T) {
 		{`'\377' /*`, "comment not terminated", 1, 7}, // valid octal escape
 		{`'\378`, "non-octal character in escape sequence: 8", 1, 4},
 		{`'\400'`, "octal escape value > 255: 256", 1, 5},
-		{`'xx`, "missing '", 1, 2},
+		{`'xx`, "invalid character literal (missing closing ')", 1, 0},
+		{`'xx'`, "invalid character literal (more than one character)", 1, 0},
 		{"\"\n", "newline in string", 1, 1},
 		{`"`, "string not terminated", 1, 0},

--- a/test/fixedbugs/issue15611.go
+++ b/test/fixedbugs/issue15611.go
+// errorcheck
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package p
+// These error messages are for the invalid literals on lines 19 and 20:
+// ERROR "newline in character literal"
+// ERROR "invalid character literal \(missing closing '\)"
+const (
+	_ = ''     // ERROR "empty character literal or unescaped ' in character literal"
+	_ = 'f'
+	_ = 'foo'  // ERROR "invalid character literal \(more than one character\)"
+//line issue15611.go:11
+	_ = '
+	_ = '
\ No newline at end of file