Commit e64d3377 authored by Robert Griesemer's avatar Robert Griesemer

scanner: treat line comments like in Go

- don't consume '\n' as part of line comment
(otherwise grammars where '\n' are tokens won't
see them after a line comment)

- permit line comments to end in EOF

R=r
CC=golang-dev
https://golang.org/cl/4277089
parent e008757d
...@@ -331,7 +331,7 @@ func (s *Scanner) error(msg string) { ...@@ -331,7 +331,7 @@ func (s *Scanner) error(msg string) {
s.Error(s, msg) s.Error(s, msg)
return return
} }
fmt.Fprintf(os.Stderr, "%s: %s", s.Position, msg) fmt.Fprintf(os.Stderr, "%s: %s\n", s.Position, msg)
} }
...@@ -503,41 +503,32 @@ func (s *Scanner) scanChar() { ...@@ -503,41 +503,32 @@ func (s *Scanner) scanChar() {
} }
func (s *Scanner) scanLineComment() { func (s *Scanner) scanComment(ch int) int {
ch := s.next() // read character after "//" // ch == '/' || ch == '*'
for ch != '\n' { if ch == '/' {
if ch < 0 { // line comment
s.error("comment not terminated") ch = s.next() // read character after "//"
return for ch != '\n' && ch >= 0 {
ch = s.next()
} }
ch = s.next() return ch
} }
}
func (s *Scanner) scanGeneralComment() { // general comment
ch := s.next() // read character after "/*" ch = s.next() // read character after "/*"
for { for {
if ch < 0 { if ch < 0 {
s.error("comment not terminated") s.error("comment not terminated")
return break
} }
ch0 := ch ch0 := ch
ch = s.next() ch = s.next()
if ch0 == '*' && ch == '/' { if ch0 == '*' && ch == '/' {
ch = s.next()
break break
} }
} }
} return ch
func (s *Scanner) scanComment(ch int) {
// ch == '/' || ch == '*'
if ch == '/' {
s.scanLineComment()
return
}
s.scanGeneralComment()
} }
...@@ -619,13 +610,11 @@ redo: ...@@ -619,13 +610,11 @@ redo:
if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 { if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 {
if s.Mode&SkipComments != 0 { if s.Mode&SkipComments != 0 {
s.tokPos = -1 // don't collect token text s.tokPos = -1 // don't collect token text
s.scanComment(ch) ch = s.scanComment(ch)
ch = s.next()
goto redo goto redo
} }
s.scanComment(ch) ch = s.scanComment(ch)
tok = Comment tok = Comment
ch = s.next()
} }
case '`': case '`':
if s.Mode&ScanRawStrings != 0 { if s.Mode&ScanRawStrings != 0 {
......
...@@ -77,15 +77,15 @@ type token struct { ...@@ -77,15 +77,15 @@ type token struct {
var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
var tokenList = []token{ var tokenList = []token{
{Comment, "// line comments\n"}, {Comment, "// line comments"},
{Comment, "//\n"}, {Comment, "//"},
{Comment, "////\n"}, {Comment, "////"},
{Comment, "// comment\n"}, {Comment, "// comment"},
{Comment, "// /* comment */\n"}, {Comment, "// /* comment */"},
{Comment, "// // comment //\n"}, {Comment, "// // comment //"},
{Comment, "//" + f100 + "\n"}, {Comment, "//" + f100},
{Comment, "// general comments\n"}, {Comment, "// general comments"},
{Comment, "/**/"}, {Comment, "/**/"},
{Comment, "/***/"}, {Comment, "/***/"},
{Comment, "/* comment */"}, {Comment, "/* comment */"},
...@@ -94,7 +94,7 @@ var tokenList = []token{ ...@@ -94,7 +94,7 @@ var tokenList = []token{
{Comment, "/*\n comment\n*/"}, {Comment, "/*\n comment\n*/"},
{Comment, "/*" + f100 + "*/"}, {Comment, "/*" + f100 + "*/"},
{Comment, "// identifiers\n"}, {Comment, "// identifiers"},
{Ident, "a"}, {Ident, "a"},
{Ident, "a0"}, {Ident, "a0"},
{Ident, "foobar"}, {Ident, "foobar"},
...@@ -116,21 +116,21 @@ var tokenList = []token{ ...@@ -116,21 +116,21 @@ var tokenList = []token{
{Ident, "bar9876"}, {Ident, "bar9876"},
{Ident, f100}, {Ident, f100},
{Comment, "// decimal ints\n"}, {Comment, "// decimal ints"},
{Int, "0"}, {Int, "0"},
{Int, "1"}, {Int, "1"},
{Int, "9"}, {Int, "9"},
{Int, "42"}, {Int, "42"},
{Int, "1234567890"}, {Int, "1234567890"},
{Comment, "// octal ints\n"}, {Comment, "// octal ints"},
{Int, "00"}, {Int, "00"},
{Int, "01"}, {Int, "01"},
{Int, "07"}, {Int, "07"},
{Int, "042"}, {Int, "042"},
{Int, "01234567"}, {Int, "01234567"},
{Comment, "// hexadecimal ints\n"}, {Comment, "// hexadecimal ints"},
{Int, "0x0"}, {Int, "0x0"},
{Int, "0x1"}, {Int, "0x1"},
{Int, "0xf"}, {Int, "0xf"},
...@@ -144,7 +144,7 @@ var tokenList = []token{ ...@@ -144,7 +144,7 @@ var tokenList = []token{
{Int, "0X123456789abcDEF"}, {Int, "0X123456789abcDEF"},
{Int, "0X" + f100}, {Int, "0X" + f100},
{Comment, "// floats\n"}, {Comment, "// floats"},
{Float, "0."}, {Float, "0."},
{Float, "1."}, {Float, "1."},
{Float, "42."}, {Float, "42."},
...@@ -174,7 +174,7 @@ var tokenList = []token{ ...@@ -174,7 +174,7 @@ var tokenList = []token{
{Float, "42E+10"}, {Float, "42E+10"},
{Float, "01234567890E-10"}, {Float, "01234567890E-10"},
{Comment, "// chars\n"}, {Comment, "// chars"},
{Char, `' '`}, {Char, `' '`},
{Char, `'a'`}, {Char, `'a'`},
{Char, `'本'`}, {Char, `'本'`},
...@@ -195,7 +195,7 @@ var tokenList = []token{ ...@@ -195,7 +195,7 @@ var tokenList = []token{
{Char, `'\U00000000'`}, {Char, `'\U00000000'`},
{Char, `'\U0000ffAB'`}, {Char, `'\U0000ffAB'`},
{Comment, "// strings\n"}, {Comment, "// strings"},
{String, `" "`}, {String, `" "`},
{String, `"a"`}, {String, `"a"`},
{String, `"本"`}, {String, `"本"`},
...@@ -217,13 +217,13 @@ var tokenList = []token{ ...@@ -217,13 +217,13 @@ var tokenList = []token{
{String, `"\U0000ffAB"`}, {String, `"\U0000ffAB"`},
{String, `"` + f100 + `"`}, {String, `"` + f100 + `"`},
{Comment, "// raw strings\n"}, {Comment, "// raw strings"},
{String, "``"}, {String, "``"},
{String, "`\\`"}, {String, "`\\`"},
{String, "`" + "\n\n/* foobar */\n\n" + "`"}, {String, "`" + "\n\n/* foobar */\n\n" + "`"},
{String, "`" + f100 + "`"}, {String, "`" + f100 + "`"},
{Comment, "// individual characters\n"}, {Comment, "// individual characters"},
// NUL character is not allowed // NUL character is not allowed
{'\x01', "\x01"}, {'\x01', "\x01"},
{' ' - 1, string(' ' - 1)}, {' ' - 1, string(' ' - 1)},
...@@ -276,7 +276,7 @@ func countNewlines(s string) int { ...@@ -276,7 +276,7 @@ func countNewlines(s string) int {
func testScan(t *testing.T, mode uint) { func testScan(t *testing.T, mode uint) {
s := new(Scanner).Init(makeSource(" \t%s\t\n\r")) s := new(Scanner).Init(makeSource(" \t%s\n"))
s.Mode = mode s.Mode = mode
tok := s.Scan() tok := s.Scan()
line := 1 line := 1
...@@ -287,7 +287,7 @@ func testScan(t *testing.T, mode uint) { ...@@ -287,7 +287,7 @@ func testScan(t *testing.T, mode uint) {
} }
line += countNewlines(k.text) + 1 // each token is on a new line line += countNewlines(k.text) + 1 // each token is on a new line
} }
checkTok(t, s, line, tok, -1, "") checkTok(t, s, line, tok, EOF, "")
} }
...@@ -317,6 +317,10 @@ func TestPosition(t *testing.T) { ...@@ -317,6 +317,10 @@ func TestPosition(t *testing.T) {
pos.Line += countNewlines(k.text) + 1 // each token is on a new line pos.Line += countNewlines(k.text) + 1 // each token is on a new line
s.Scan() s.Scan()
} }
// make sure there were no token-internal errors reported by scanner
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
} }
...@@ -336,6 +340,9 @@ func TestScanZeroMode(t *testing.T) { ...@@ -336,6 +340,9 @@ func TestScanZeroMode(t *testing.T) {
if tok != EOF { if tok != EOF {
t.Fatalf("tok = %s, want EOF", TokenString(tok)) t.Fatalf("tok = %s, want EOF", TokenString(tok))
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
} }
...@@ -350,6 +357,9 @@ func testScanSelectedMode(t *testing.T, mode uint, class int) { ...@@ -350,6 +357,9 @@ func testScanSelectedMode(t *testing.T, mode uint, class int) {
} }
tok = s.Scan() tok = s.Scan()
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
} }
...@@ -367,7 +377,7 @@ func TestScanSelectedMask(t *testing.T) { ...@@ -367,7 +377,7 @@ func TestScanSelectedMask(t *testing.T) {
func TestScanNext(t *testing.T) { func TestScanNext(t *testing.T) {
s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}")) s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n} // line comment ending in eof"))
checkTok(t, s, 1, s.Scan(), Ident, "if") checkTok(t, s, 1, s.Scan(), Ident, "if")
checkTok(t, s, 1, s.Scan(), Ident, "a") checkTok(t, s, 1, s.Scan(), Ident, "a")
checkTok(t, s, 1, s.Scan(), '=', "=") checkTok(t, s, 1, s.Scan(), '=', "=")
...@@ -382,6 +392,9 @@ func TestScanNext(t *testing.T) { ...@@ -382,6 +392,9 @@ func TestScanNext(t *testing.T) {
checkTok(t, s, 2, s.Scan(), Ident, "c") checkTok(t, s, 2, s.Scan(), Ident, "c")
checkTok(t, s, 3, s.Scan(), '}', "}") checkTok(t, s, 3, s.Scan(), '}', "}")
checkTok(t, s, 3, s.Scan(), -1, "") checkTok(t, s, 3, s.Scan(), -1, "")
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
} }
...@@ -441,7 +454,6 @@ func TestError(t *testing.T) { ...@@ -441,7 +454,6 @@ func TestError(t *testing.T) {
testError(t, `"\'"`, "illegal char escape", String) testError(t, `"\'"`, "illegal char escape", String)
testError(t, `"abc`, "literal not terminated", String) testError(t, `"abc`, "literal not terminated", String)
testError(t, "`abc", "literal not terminated", String) testError(t, "`abc", "literal not terminated", String)
testError(t, `//`, "comment not terminated", EOF)
testError(t, `/*/`, "comment not terminated", EOF) testError(t, `/*/`, "comment not terminated", EOF)
testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String) testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String) testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
...@@ -493,6 +505,9 @@ func TestPos(t *testing.T) { ...@@ -493,6 +505,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- { for i := 10; i > 0; i-- {
checkScanPos(t, s, 1, 2, 1, EOF) checkScanPos(t, s, 1, 2, 1, EOF)
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// corner case: source with only a single character // corner case: source with only a single character
s = new(Scanner).Init(bytes.NewBufferString("本")) s = new(Scanner).Init(bytes.NewBufferString("本"))
...@@ -502,6 +517,9 @@ func TestPos(t *testing.T) { ...@@ -502,6 +517,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- { for i := 10; i > 0; i-- {
checkScanPos(t, s, 3, 1, 2, EOF) checkScanPos(t, s, 3, 1, 2, EOF)
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// positions after calling Next // positions after calling Next
s = new(Scanner).Init(bytes.NewBufferString(" foo६४ \n\n本語\n")) s = new(Scanner).Init(bytes.NewBufferString(" foo६४ \n\n本語\n"))
...@@ -524,6 +542,9 @@ func TestPos(t *testing.T) { ...@@ -524,6 +542,9 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- { for i := 10; i > 0; i-- {
checkScanPos(t, s, 22, 4, 1, EOF) checkScanPos(t, s, 22, 4, 1, EOF)
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
// positions after calling Scan // positions after calling Scan
s = new(Scanner).Init(bytes.NewBufferString("abc\n本語\n\nx")) s = new(Scanner).Init(bytes.NewBufferString("abc\n本語\n\nx"))
...@@ -543,4 +564,7 @@ func TestPos(t *testing.T) { ...@@ -543,4 +564,7 @@ func TestPos(t *testing.T) {
for i := 10; i > 0; i-- { for i := 10; i > 0; i-- {
checkScanPos(t, s, 13, 4, 2, EOF) checkScanPos(t, s, 13, 4, 2, EOF)
} }
if s.ErrorCount != 0 {
t.Errorf("%d errors", s.ErrorCount)
}
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment