Commit aa5b44ae authored by Robert Griesemer's avatar Robert Griesemer

cmd/compile: set lexer nlsemi state directly

The old code used an extra function call and switch to inspect the
current token and determine the new state of curio.nlsemi. However,
the lexer knows the token w/o the need of an extra test and thus
can set curio.nlsemi directly:

- removed need for extra function call in next
- renamed _yylex to next
- set nlsemi at the point a token is identified
- moved nlsemi from curio to lexer - it's really part of the lexer state

This change makes the lexer call sequence less convoluted and should
also speed up the lexing a bit.

Change-Id: Iaf2683081f04231cb62c94e1400d455f98f6f82a
Reviewed-on: https://go-review.googlesource.com/19765Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
parent 20ee67ac
...@@ -389,7 +389,6 @@ type Io struct { ...@@ -389,7 +389,6 @@ type Io struct {
last int last int
peekc int peekc int
peekc1 int // second peekc for ... peekc1 int // second peekc for ...
nlsemi bool
eofnl bool eofnl bool
} }
......
...@@ -869,6 +869,7 @@ func isfrog(c int) bool { ...@@ -869,6 +869,7 @@ func isfrog(c int) bool {
type lexer struct { type lexer struct {
// TODO(gri) move other lexer state here and out of global variables // TODO(gri) move other lexer state here and out of global variables
// (source, current line number, etc.) // (source, current line number, etc.)
nlsemi bool // if set, '\n' and EOF translate to ';'
// current token // current token
tok int32 tok int32
...@@ -925,7 +926,7 @@ const ( ...@@ -925,7 +926,7 @@ const (
LRSH LRSH
) )
func (yylval *lexer) _yylex() int32 { func (l *lexer) next() {
var c1 int var c1 int
var op Op var op Op
var escflag int var escflag int
...@@ -936,27 +937,31 @@ func (yylval *lexer) _yylex() int32 { ...@@ -936,27 +937,31 @@ func (yylval *lexer) _yylex() int32 {
prevlineno = lineno prevlineno = lineno
nlsemi := l.nlsemi
l.nlsemi = false
l0: l0:
// skip white space
c := getc() c := getc()
if isSpace(c) { for isSpace(c) {
if c == '\n' && curio.nlsemi { if c == '\n' && nlsemi {
ungetc(c) ungetc(c)
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: implicit semi\n") fmt.Printf("lex: implicit semi\n")
} }
return ';' l.tok = ';'
return
} }
c = getc()
goto l0
} }
lineno = lexlineno // start of token // start of token
lineno = lexlineno
if c >= utf8.RuneSelf { if c >= utf8.RuneSelf {
// all multibyte runes are alpha // all multibyte runes are alpha
cp = &lexbuf cp = &lexbuf
cp.Reset() cp.Reset()
goto talph goto talph
} }
...@@ -1049,7 +1054,17 @@ l0: ...@@ -1049,7 +1054,17 @@ l0:
case EOF: case EOF:
lineno = prevlineno lineno = prevlineno
ungetc(EOF) ungetc(EOF)
return -1 // Treat EOF as "end of line" for the purposes
// of inserting a semicolon.
if nlsemi {
if Debug['x'] != 0 {
fmt.Printf("lex: implicit semi\n")
}
l.tok = ';'
return
}
l.tok = -1
return
case '_': case '_':
cp = &lexbuf cp = &lexbuf
...@@ -1137,14 +1152,16 @@ l0: ...@@ -1137,14 +1152,16 @@ l0:
} }
x := new(Mpint) x := new(Mpint)
yylval.val.U = x l.val.U = x
Mpmovecfix(x, v) Mpmovecfix(x, v)
x.Rune = true x.Rune = true
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: codepoint literal\n") fmt.Printf("lex: codepoint literal\n")
} }
litbuf = "string literal" litbuf = "string literal"
return LLITERAL l.nlsemi = true
l.tok = LLITERAL
return
case '/': case '/':
c1 = getc() c1 = getc()
...@@ -1217,6 +1234,7 @@ l0: ...@@ -1217,6 +1234,7 @@ l0:
case '+': case '+':
c1 = getc() c1 = getc()
if c1 == '+' { if c1 == '+' {
l.nlsemi = true
c = int(LINC) c = int(LINC)
goto lx goto lx
} }
...@@ -1229,6 +1247,7 @@ l0: ...@@ -1229,6 +1247,7 @@ l0:
case '-': case '-':
c1 = getc() c1 = getc()
if c1 == '-' { if c1 == '-' {
l.nlsemi = true
c = int(LDEC) c = int(LDEC)
goto lx goto lx
} }
...@@ -1339,6 +1358,10 @@ l0: ...@@ -1339,6 +1358,10 @@ l0:
goto asop goto asop
} }
case ')', ']', '}':
l.nlsemi = true
goto lx
default: default:
goto lx goto lx
} }
...@@ -1363,14 +1386,16 @@ lx: ...@@ -1363,14 +1386,16 @@ lx:
goto l0 goto l0
} }
return int32(c) l.tok = int32(c)
return
asop: asop:
yylval.op = op l.op = op
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op]) fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
} }
return LASOP l.tok = LASOP
return
// cp is set to lexbuf and some // cp is set to lexbuf and some
// prefix has been stored // prefix has been stored
...@@ -1407,26 +1432,33 @@ talph: ...@@ -1407,26 +1432,33 @@ talph:
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: %s %s\n", s, lexname(int(s.Lexical))) fmt.Printf("lex: %s %s\n", s, lexname(int(s.Lexical)))
} }
yylval.sym_ = s l.sym_ = s
return int32(s.Lexical) switch s.Lexical {
case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
l.nlsemi = true
}
l.tok = int32(s.Lexical)
return
ncu: ncu:
cp = nil cp = nil
ungetc(c) ungetc(c)
str = lexbuf.String() str = lexbuf.String()
yylval.val.U = new(Mpint) l.val.U = new(Mpint)
mpatofix(yylval.val.U.(*Mpint), str) mpatofix(l.val.U.(*Mpint), str)
if yylval.val.U.(*Mpint).Ovf { if l.val.U.(*Mpint).Ovf {
Yyerror("overflow in constant") Yyerror("overflow in constant")
Mpmovecfix(yylval.val.U.(*Mpint), 0) Mpmovecfix(l.val.U.(*Mpint), 0)
} }
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: integer literal\n") fmt.Printf("lex: integer literal\n")
} }
litbuf = "literal " + str litbuf = "literal " + str
return LLITERAL l.nlsemi = true
l.tok = LLITERAL
return
casedot: casedot:
for { for {
...@@ -1475,45 +1507,50 @@ casei: ...@@ -1475,45 +1507,50 @@ casei:
cp = nil cp = nil
str = lexbuf.String() str = lexbuf.String()
yylval.val.U = new(Mpcplx) l.val.U = new(Mpcplx)
Mpmovecflt(&yylval.val.U.(*Mpcplx).Real, 0.0) Mpmovecflt(&l.val.U.(*Mpcplx).Real, 0.0)
mpatoflt(&yylval.val.U.(*Mpcplx).Imag, str) mpatoflt(&l.val.U.(*Mpcplx).Imag, str)
if yylval.val.U.(*Mpcplx).Imag.Val.IsInf() { if l.val.U.(*Mpcplx).Imag.Val.IsInf() {
Yyerror("overflow in imaginary constant") Yyerror("overflow in imaginary constant")
Mpmovecflt(&yylval.val.U.(*Mpcplx).Imag, 0.0) Mpmovecflt(&l.val.U.(*Mpcplx).Imag, 0.0)
} }
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: imaginary literal\n") fmt.Printf("lex: imaginary literal\n")
} }
litbuf = "literal " + str litbuf = "literal " + str
return LLITERAL l.nlsemi = true
l.tok = LLITERAL
return
caseout: caseout:
cp = nil cp = nil
ungetc(c) ungetc(c)
str = lexbuf.String() str = lexbuf.String()
yylval.val.U = newMpflt() l.val.U = newMpflt()
mpatoflt(yylval.val.U.(*Mpflt), str) mpatoflt(l.val.U.(*Mpflt), str)
if yylval.val.U.(*Mpflt).Val.IsInf() { if l.val.U.(*Mpflt).Val.IsInf() {
Yyerror("overflow in float constant") Yyerror("overflow in float constant")
Mpmovecflt(yylval.val.U.(*Mpflt), 0.0) Mpmovecflt(l.val.U.(*Mpflt), 0.0)
} }
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: floating literal\n") fmt.Printf("lex: floating literal\n")
} }
litbuf = "literal " + str litbuf = "literal " + str
return LLITERAL l.nlsemi = true
l.tok = LLITERAL
return
strlit: strlit:
yylval.val.U = internString(cp.Bytes()) l.val.U = internString(cp.Bytes())
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: string literal\n") fmt.Printf("lex: string literal\n")
} }
litbuf = "string literal" litbuf = "string literal"
return LLITERAL l.nlsemi = true
l.tok = LLITERAL
} }
var internedStrings = map[string]string{} var internedStrings = map[string]string{}
...@@ -1833,36 +1870,6 @@ func pragcgo(text string) { ...@@ -1833,36 +1870,6 @@ func pragcgo(text string) {
} }
} }
func (l *lexer) next() {
tok := l._yylex()
if curio.nlsemi && tok == EOF {
// Treat EOF as "end of line" for the purposes
// of inserting a semicolon.
tok = ';'
}
switch tok {
case LNAME,
LLITERAL,
LBREAK,
LCONTINUE,
LFALL,
LRETURN,
LINC,
LDEC,
')',
'}',
']':
curio.nlsemi = true
default:
curio.nlsemi = false
}
l.tok = tok
}
func getc() int { func getc() int {
c := curio.peekc c := curio.peekc
if c != 0 { if c != 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment