Commit f198bbc8 authored by Robert Griesemer's avatar Robert Griesemer

godoc: use scanner instead of go/scanner for ebnf processing of spec

Also: Fewer calls to flush for faster processing (once per identifier
or error instead of once per token).

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5236041
parent af207059
...@@ -680,7 +680,7 @@ func serveHTMLDoc(w http.ResponseWriter, r *http.Request, abspath, relpath strin ...@@ -680,7 +680,7 @@ func serveHTMLDoc(w http.ResponseWriter, r *http.Request, abspath, relpath strin
// if it's the language spec, add tags to EBNF productions // if it's the language spec, add tags to EBNF productions
if strings.HasSuffix(abspath, "go_spec.html") { if strings.HasSuffix(abspath, "go_spec.html") {
var buf bytes.Buffer var buf bytes.Buffer
linkify(&buf, src) Linkify(&buf, src)
src = buf.Bytes() src = buf.Bytes()
} }
......
...@@ -2,118 +2,103 @@ ...@@ -2,118 +2,103 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main
// This file contains the mechanism to "linkify" html source // This file contains the mechanism to "linkify" html source
// text containing EBNF sections (as found in go_spec.html). // text containing EBNF sections (as found in go_spec.html).
// The result is the input source text with the EBNF sections // The result is the input source text with the EBNF sections
// modified such that identifiers are linked to the respective // modified such that identifiers are linked to the respective
// definitions. // definitions.
package main
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"go/scanner"
"go/token"
"io" "io"
"scanner"
) )
type ebnfParser struct { type ebnfParser struct {
out io.Writer // parser output out io.Writer // parser output
src []byte // parser source src []byte // parser input
file *token.File // for position information
scanner scanner.Scanner scanner scanner.Scanner
prev int // offset of previous token prev int // offset of previous token
pos token.Pos // token position pos int // offset of current token
tok token.Token // one token look-ahead tok int // one token look-ahead
lit string // token literal lit string // token literal
} }
func (p *ebnfParser) flush() { func (p *ebnfParser) flush() {
offs := p.file.Offset(p.pos) p.out.Write(p.src[p.prev:p.pos])
p.out.Write(p.src[p.prev:offs]) p.prev = p.pos
p.prev = offs
} }
func (p *ebnfParser) next() { func (p *ebnfParser) next() {
if p.pos.IsValid() { p.tok = p.scanner.Scan()
p.flush() p.pos = p.scanner.Position.Offset
} p.lit = p.scanner.TokenText()
p.pos, p.tok, p.lit = p.scanner.Scan()
if p.tok.IsKeyword() {
// TODO Should keyword mapping always happen outside scanner?
// Or should there be a flag to scanner to enable keyword mapping?
p.tok = token.IDENT
}
} }
func (p *ebnfParser) Error(pos token.Position, msg string) { func (p *ebnfParser) printf(format string, args ...interface{}) {
fmt.Fprintf(p.out, `<span class="alert">error: %s</span>`, msg) p.flush()
fmt.Fprintf(p.out, format, args...)
} }
func (p *ebnfParser) errorExpected(pos token.Pos, msg string) { func (p *ebnfParser) errorExpected(msg string) {
msg = "expected " + msg p.printf(`<span class="highlight">error: expected %s, found %s</span>`, msg, scanner.TokenString(p.tok))
if pos == p.pos {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'"
if p.tok.IsLiteral() {
msg += " " + p.lit
}
}
p.Error(p.file.Position(pos), msg)
} }
func (p *ebnfParser) expect(tok token.Token) token.Pos { func (p *ebnfParser) expect(tok int) {
pos := p.pos
if p.tok != tok { if p.tok != tok {
p.errorExpected(pos, "'"+tok.String()+"'") p.errorExpected(scanner.TokenString(tok))
} }
p.next() // make progress in any case p.next() // make progress in any case
return pos
} }
func (p *ebnfParser) parseIdentifier(def bool) { func (p *ebnfParser) parseIdentifier(def bool) {
name := p.lit if p.tok == scanner.Ident {
p.expect(token.IDENT) name := p.lit
if def { if def {
fmt.Fprintf(p.out, `<a id="%s">%s</a>`, name, name) p.printf(`<a id="%s">%s</a>`, name, name)
} else {
p.printf(`<a href="#%s" class="noline">%s</a>`, name, name)
}
p.prev += len(name) // skip identifier when printing next time
p.next()
} else { } else {
fmt.Fprintf(p.out, `<a href="#%s" class="noline">%s</a>`, name, name) p.expect(scanner.Ident)
} }
p.prev += len(name) // skip identifier when calling flush
} }
func (p *ebnfParser) parseTerm() bool { func (p *ebnfParser) parseTerm() bool {
switch p.tok { switch p.tok {
case token.IDENT: case scanner.Ident:
p.parseIdentifier(false) p.parseIdentifier(false)
case token.STRING: case scanner.String:
p.next() p.next()
const ellipsis = "…" // U+2026, the horizontal ellipsis character const ellipsis = '…' // U+2026, the horizontal ellipsis character
if p.tok == token.ILLEGAL && p.lit == ellipsis { if p.tok == ellipsis {
p.next() p.next()
p.expect(token.STRING) p.expect(scanner.String)
} }
case token.LPAREN: case '(':
p.next() p.next()
p.parseExpression() p.parseExpression()
p.expect(token.RPAREN) p.expect(')')
case token.LBRACK: case '[':
p.next() p.next()
p.parseExpression() p.parseExpression()
p.expect(token.RBRACK) p.expect(']')
case token.LBRACE: case '{':
p.next() p.next()
p.parseExpression() p.parseExpression()
p.expect(token.RBRACE) p.expect('}')
default: default:
return false return false // no term found
} }
return true return true
...@@ -121,7 +106,7 @@ func (p *ebnfParser) parseTerm() bool { ...@@ -121,7 +106,7 @@ func (p *ebnfParser) parseTerm() bool {
func (p *ebnfParser) parseSequence() { func (p *ebnfParser) parseSequence() {
if !p.parseTerm() { if !p.parseTerm() {
p.errorExpected(p.pos, "term") p.errorExpected("term")
} }
for p.parseTerm() { for p.parseTerm() {
} }
...@@ -130,7 +115,7 @@ func (p *ebnfParser) parseSequence() { ...@@ -130,7 +115,7 @@ func (p *ebnfParser) parseSequence() {
func (p *ebnfParser) parseExpression() { func (p *ebnfParser) parseExpression() {
for { for {
p.parseSequence() p.parseSequence()
if p.tok != token.OR { if p.tok != '|' {
break break
} }
p.next() p.next()
...@@ -139,23 +124,22 @@ func (p *ebnfParser) parseExpression() { ...@@ -139,23 +124,22 @@ func (p *ebnfParser) parseExpression() {
func (p *ebnfParser) parseProduction() { func (p *ebnfParser) parseProduction() {
p.parseIdentifier(true) p.parseIdentifier(true)
p.expect(token.ASSIGN) p.expect('=')
if p.tok != token.PERIOD { if p.tok != '.' {
p.parseExpression() p.parseExpression()
} }
p.expect(token.PERIOD) p.expect('.')
} }
func (p *ebnfParser) parse(fset *token.FileSet, out io.Writer, src []byte) { func (p *ebnfParser) parse(out io.Writer, src []byte) {
// initialize ebnfParser // initialize ebnfParser
p.out = out p.out = out
p.src = src p.src = src
p.file = fset.AddFile("", fset.Base(), len(src)) p.scanner.Init(bytes.NewBuffer(src))
p.scanner.Init(p.file, src, p, scanner.AllowIllegalChars)
p.next() // initializes pos, tok, lit p.next() // initializes pos, tok, lit
// process source // process source
for p.tok != token.EOF { for p.tok != scanner.EOF {
p.parseProduction() p.parseProduction()
} }
p.flush() p.flush()
...@@ -167,32 +151,29 @@ var ( ...@@ -167,32 +151,29 @@ var (
closeTag = []byte(`</pre>`) closeTag = []byte(`</pre>`)
) )
func linkify(out io.Writer, src []byte) { func Linkify(out io.Writer, src []byte) {
fset := token.NewFileSet()
for len(src) > 0 { for len(src) > 0 {
n := len(src)
// i: beginning of EBNF text (or end of source) // i: beginning of EBNF text (or end of source)
i := bytes.Index(src, openTag) i := bytes.Index(src, openTag)
if i < 0 { if i < 0 {
i = n - len(openTag) i = len(src) - len(openTag)
} }
i += len(openTag) i += len(openTag)
// j: end of EBNF text (or end of source) // j: end of EBNF text (or end of source)
j := bytes.Index(src[i:n], closeTag) // close marker j := bytes.Index(src[i:], closeTag) // close marker
if j < 0 { if j < 0 {
j = n - i j = len(src) - i
} }
j += i j += i
// write text before EBNF // write text before EBNF
out.Write(src[0:i]) out.Write(src[0:i])
// parse and write EBNF // process EBNF
var p ebnfParser var p ebnfParser
p.parse(fset, out, src[i:j]) p.parse(out, src[i:j])
// advance // advance
src = src[j:n] src = src[j:]
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment