go/scanner: don't return token.INVALID for ".." sequence

Per the spec, "...the next token is the longest sequence of characters that form a valid token." Thus, encountering a ".." sequence should return two token.PERIOD tokens rather than a single token.ILLEGAL. Fixes #28112. Change-Id: Iba5da841f40036e53f48f9be23f933f362e67f5e Reviewed-on: https://go-review.googlesource.com/c/141337Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>

go/scanner: don't return token.INVALID for ".." sequence
Per the spec, "...the next token is the longest sequence of characters that form a valid token." Thus, encountering a ".." sequence should return two token.PERIOD tokens rather than a single token.ILLEGAL. Fixes #28112. Change-Id: Iba5da841f40036e53f48f9be23f933f362e67f5e Reviewed-on: https://go-review.googlesource.com/c/141337Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
f64fd66f · Robert Griesemer · 7fb60eb1 · f64fd66f · f64fd66f
Commit f64fd66f authored Oct 10, 2018 by Robert Griesemer
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 6 deletions

src/go/scanner/scanner.go src/go/scanner/scanner.go +13 -5

src/go/scanner/scanner_test.go src/go/scanner/scanner_test.go +19 -1

No files found.
--- a/src/go/scanner/scanner.go
+++ b/src/go/scanner/scanner.go
@@ -85,6 +85,15 @@ func (s *Scanner) next() {
 	}
 }
+// peek returns the byte following the most recently read character without
+// advancing the scanner. If the scanner is at EOF, peek returns 0.
+func (s *Scanner) peek() byte {
+	if s.rdOffset < len(s.src) {
+		return s.src[s.rdOffset]
+	}
+	return 0
+}
 // A mode value is a set of flags (or 0).
 // They control scanner behavior.
 //
@@ -735,14 +744,13 @@ scanAgain:
 			if '0' <= s.ch && s.ch <= '9' {
 				insertSemi = true
 				tok, lit = s.scanNumber(true)
-			} else if s.ch == '.' {
+			} else {
-				s.next()
+				tok = token.PERIOD
-				if s.ch == '.' {
+				if s.ch == '.' && s.peek() == '.' {
 					s.next()
+					s.next() // consume last '.'
 					tok = token.ELLIPSIS
 				}
-			} else {
-				tok = token.PERIOD
 			}
 		case ',':
 			tok = token.COMMA

--- a/src/go/scanner/scanner_test.go
+++ b/src/go/scanner/scanner_test.go
@@ -757,6 +757,7 @@ var errors = []struct {
 	{"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
 	{`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"},
 	{`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
+	{"..", token.PERIOD, 0, "", ""}, // two periods, not invalid token (issue #28112)
 	{`' '`, token.CHAR, 0, `' '`, ""},
 	{`''`, token.CHAR, 0, `''`, "illegal rune literal"},
 	{`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
@@ -822,7 +823,7 @@ func TestScanErrors(t *testing.T) {
 // Verify that no comments show up as literal values when skipping comments.
 func TestIssue10213(t *testing.T) {
-	var src = `
+	const src = `
 		var (
 			A = 1 // foo
 		)
@@ -855,6 +856,23 @@ func TestIssue10213(t *testing.T) {
 	}
 }
+func TestIssue28112(t *testing.T) {
+	const src = "... .. 0.. .." // make sure to have stand-alone ".." immediately before EOF to test EOF behavior
+	tokens := []token.Token{token.ELLIPSIS, token.PERIOD, token.PERIOD, token.FLOAT, token.PERIOD, token.PERIOD, token.PERIOD, token.EOF}
+	var s Scanner
+	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
+	for _, want := range tokens {
+		pos, got, lit := s.Scan()
+		if got != want {
+			t.Errorf("%s: got %s, want %s", fset.Position(pos), got, want)
+		}
+		// literals expect to have a (non-empty) literal string and we don't care about other tokens for this test
+		if tokenclass(got) == literal && lit == "" {
+			t.Errorf("%s: for %s got empty literal string", fset.Position(pos), got)
+		}
+	}
+}
 func BenchmarkScan(b *testing.B) {
 	b.StopTimer()
 	fset := token.NewFileSet()