go/scanner: the position of '\n's chars must be the last position of the current line

Background: This didn't matter until recently, because '\n' don't appear as token starts in source code and thus the exact position was irrelevant (and set as was easiest in the code). With auto semicolon insertion, a virtual semicolon may be inserted when a '\n' is seen. The position of the semicolon is the position of the '\n'. Without this fix, these semicolons appeared on the next line instead of the line where they were inserted. This affected the association of comments to declarations in the parser. As a result, some lead comments where considered line comments, not collected in the ast, and not shown in godoc pages. (This affected only godoc pages, not gofmt-formatted programs). Fixes #592. R=rsc CC=golang-dev https://golang.org/cl/224068

go/scanner: the position of '\n's chars must be the last position of the current line
Background: This didn't matter until recently, because '\n' don't appear as token starts in source code and thus the exact position was irrelevant (and set as was easiest in the code). With auto semicolon insertion, a virtual semicolon may be inserted when a '\n' is seen. The position of the semicolon is the position of the '\n'. Without this fix, these semicolons appeared on the next line instead of the line where they were inserted. This affected the association of comments to declarations in the parser. As a result, some lead comments where considered line comments, not collected in the ast, and not shown in godoc pages. (This affected only godoc pages, not gofmt-formatted programs). Fixes #592. R=rsc CC=golang-dev https://golang.org/cl/224068
9520a682 · Robert Griesemer · 8c540474 · 9520a682 · 9520a682
Commit 9520a682 authored 15 years ago by Robert Griesemer
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 8 deletions

src/pkg/go/scanner/scanner.go src/pkg/go/scanner/scanner.go +8 -6

src/pkg/go/scanner/scanner_test.go src/pkg/go/scanner/scanner_test.go +4 -2

No files found.
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -46,13 +46,15 @@ func (S *Scanner) next() {
 	if S.offset < len(S.src) {
 		S.pos.Offset = S.offset
 		S.pos.Column++
+		if S.ch == '\n' {
+			// next character starts a new line
+			S.pos.Line++
+			S.pos.Column = 1
+		}
 		r, w := int(S.src[S.offset]), 1
 		switch {
 		case r == 0:
 			S.error(S.pos, "illegal character NUL")
-		case r == '\n':
-			S.pos.Line++
-			S.pos.Column = 0
 		case r >= 0x80:
 			// not ASCII
 			r, w = utf8.DecodeRune(S.src[S.offset:])
@@ -168,7 +170,7 @@ func (S *Scanner) scanComment(pos token.Position) {
 								// valid //line filename:line comment;
 								// update scanner position
 								S.pos.Filename = string(text[len(prefix):i])
-								S.pos.Line = line
+								S.pos.Line = line - 1 // -1 since the '\n' has not been consumed yet
 							}
 						}
 					}
@@ -211,7 +213,7 @@ func (S *Scanner) findNewline(pos token.Position) bool {
 			newline = true
 			break
 		}
-		S.skipWhitespace()
+		S.skipWhitespace() // S.insertSemi is set
 		if S.ch == '\n' {
 			newline = true
 			break
@@ -526,7 +528,7 @@ scanAgain:
 		case -1:
 			tok = token.EOF
 		case '\n':
-			// we only reach here of S.insertSemi was
+			// we only reach here if S.insertSemi was
 			// set in the first place and exited early
 			// from S.skipWhitespace()
 			S.insertSemi = false // newline consumed

--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -219,11 +219,12 @@ func TestScan(t *testing.T) {
 	for _, e := range tokens {
 		src += e.lit + whitespace
 	}
+	src_linecount := newlineCount(src)
 	whitespace_linecount := newlineCount(whitespace)
 	// verify scan
 	index := 0
-	epos := token.Position{"", 0, 1, 1}
+	epos := token.Position{"", 0, 1, 1} // expected position
 	nerrors := Tokenize("", []byte(src), &testErrorHandler{t}, ScanComments,
 		func(pos token.Position, tok token.Token, litb []byte) bool {
 			e := elt{token.EOF, "", special}
@@ -233,7 +234,8 @@ func TestScan(t *testing.T) {
 			lit := string(litb)
 			if tok == token.EOF {
 				lit = "<EOF>"
-				epos.Column = 0
+				epos.Line = src_linecount
+				epos.Column = 1
 			}
 			checkPos(t, lit, pos, epos)
 			if tok != e.tok {