Commit 24a088d2 authored by Rob Pike's avatar Rob Pike

text/template: efficient reporting of line numbers

Instead of scanning the text to count newlines, which is n², keep track as we go
and store the line number in the token.

benchmark                 old ns/op      new ns/op     delta
BenchmarkParseLarge-4     1589721293     38783310      -97.56%

Fixes #17851

Change-Id: I231225c61e667535e2ce55cd2facea6d279cc59d
Reviewed-on: https://go-review.googlesource.com/33234
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent bb00a8d9
...@@ -1152,7 +1152,7 @@ func TestUnterminatedStringError(t *testing.T) { ...@@ -1152,7 +1152,7 @@ func TestUnterminatedStringError(t *testing.T) {
t.Fatal("expected error") t.Fatal("expected error")
} }
str := err.Error() str := err.Error()
if !strings.Contains(str, "X:3: unexpected unterminated raw quoted strin") { if !strings.Contains(str, "X:3: unexpected unterminated raw quoted string") {
t.Fatalf("unexpected error: %s", str) t.Fatalf("unexpected error: %s", str)
} }
} }
......
...@@ -16,6 +16,7 @@ type item struct { ...@@ -16,6 +16,7 @@ type item struct {
typ itemType // The type of this item. typ itemType // The type of this item.
pos Pos // The starting position, in bytes, of this item in the input string. pos Pos // The starting position, in bytes, of this item in the input string.
val string // The value of this item. val string // The value of this item.
line int // The line number at the start of this item.
} }
func (i item) String() string { func (i item) String() string {
...@@ -116,6 +117,7 @@ type lexer struct { ...@@ -116,6 +117,7 @@ type lexer struct {
lastPos Pos // position of most recent item returned by nextItem lastPos Pos // position of most recent item returned by nextItem
items chan item // channel of scanned items items chan item // channel of scanned items
parenDepth int // nesting depth of ( ) exprs parenDepth int // nesting depth of ( ) exprs
line int // 1+number of newlines seen
} }
// next returns the next rune in the input. // next returns the next rune in the input.
...@@ -127,6 +129,9 @@ func (l *lexer) next() rune { ...@@ -127,6 +129,9 @@ func (l *lexer) next() rune {
r, w := utf8.DecodeRuneInString(l.input[l.pos:]) r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = Pos(w) l.width = Pos(w)
l.pos += l.width l.pos += l.width
if r == '\n' {
l.line++
}
return r return r
} }
...@@ -140,11 +145,20 @@ func (l *lexer) peek() rune { ...@@ -140,11 +145,20 @@ func (l *lexer) peek() rune {
// backup steps back one rune. Can only be called once per call of next. // backup steps back one rune. Can only be called once per call of next.
func (l *lexer) backup() { func (l *lexer) backup() {
l.pos -= l.width l.pos -= l.width
// Correct newline count.
if l.width == 1 && l.input[l.pos] == '\n' {
l.line--
}
} }
// emit passes an item back to the client. // emit passes an item back to the client.
func (l *lexer) emit(t itemType) { func (l *lexer) emit(t itemType) {
l.items <- item{t, l.start, l.input[l.start:l.pos]} l.items <- item{t, l.start, l.input[l.start:l.pos], l.line}
// Some items contain text internally. If so, count their newlines.
switch t {
case itemText, itemRawString, itemLeftDelim, itemRightDelim:
l.line += strings.Count(l.input[l.start:l.pos], "\n")
}
l.start = l.pos l.start = l.pos
} }
...@@ -169,17 +183,10 @@ func (l *lexer) acceptRun(valid string) { ...@@ -169,17 +183,10 @@ func (l *lexer) acceptRun(valid string) {
l.backup() l.backup()
} }
// lineNumber reports which line we're on, based on the position of
// the previous item returned by nextItem. Doing it this way
// means we don't have to worry about peek double counting.
func (l *lexer) lineNumber() int {
return 1 + strings.Count(l.input[:l.lastPos], "\n")
}
// errorf returns an error token and terminates the scan by passing // errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem. // back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...interface{}) stateFn { func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)} l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line}
return nil return nil
} }
...@@ -212,6 +219,7 @@ func lex(name, input, left, right string) *lexer { ...@@ -212,6 +219,7 @@ func lex(name, input, left, right string) *lexer {
leftDelim: left, leftDelim: left,
rightDelim: right, rightDelim: right,
items: make(chan item), items: make(chan item),
line: 1,
} }
go l.run() go l.run()
return l return l
...@@ -602,10 +610,14 @@ Loop: ...@@ -602,10 +610,14 @@ Loop:
// lexRawQuote scans a raw quoted string. // lexRawQuote scans a raw quoted string.
func lexRawQuote(l *lexer) stateFn { func lexRawQuote(l *lexer) stateFn {
startLine := l.line
Loop: Loop:
for { for {
switch l.next() { switch l.next() {
case eof: case eof:
// Restore line number to location of opening quote.
// We will error out so it's ok just to overwrite the field.
l.line = startLine
return l.errorf("unterminated raw quoted string") return l.errorf("unterminated raw quoted string")
case '`': case '`':
break Loop break Loop
......
This diff is collapsed.
...@@ -157,7 +157,7 @@ func (t *Tree) ErrorContext(n Node) (location, context string) { ...@@ -157,7 +157,7 @@ func (t *Tree) ErrorContext(n Node) (location, context string) {
// errorf formats the error and terminates processing. // errorf formats the error and terminates processing.
func (t *Tree) errorf(format string, args ...interface{}) { func (t *Tree) errorf(format string, args ...interface{}) {
t.Root = nil t.Root = nil
format = fmt.Sprintf("template: %s:%d: %s", t.ParseName, t.lex.lineNumber(), format) format = fmt.Sprintf("template: %s:%d: %s", t.ParseName, t.token[0].line, format)
panic(fmt.Errorf(format, args...)) panic(fmt.Errorf(format, args...))
} }
...@@ -376,15 +376,17 @@ func (t *Tree) action() (n Node) { ...@@ -376,15 +376,17 @@ func (t *Tree) action() (n Node) {
return t.withControl() return t.withControl()
} }
t.backup() t.backup()
token := t.peek()
// Do not pop variables; they persist until "end". // Do not pop variables; they persist until "end".
return t.newAction(t.peek().pos, t.lex.lineNumber(), t.pipeline("command")) return t.newAction(token.pos, token.line, t.pipeline("command"))
} }
// Pipeline: // Pipeline:
// declarations? command ('|' command)* // declarations? command ('|' command)*
func (t *Tree) pipeline(context string) (pipe *PipeNode) { func (t *Tree) pipeline(context string) (pipe *PipeNode) {
var decl []*VariableNode var decl []*VariableNode
pos := t.peekNonSpace().pos token := t.peekNonSpace()
pos := token.pos
// Are there declarations? // Are there declarations?
for { for {
if v := t.peekNonSpace(); v.typ == itemVariable { if v := t.peekNonSpace(); v.typ == itemVariable {
...@@ -413,7 +415,7 @@ func (t *Tree) pipeline(context string) (pipe *PipeNode) { ...@@ -413,7 +415,7 @@ func (t *Tree) pipeline(context string) (pipe *PipeNode) {
} }
break break
} }
pipe = t.newPipeline(pos, t.lex.lineNumber(), decl) pipe = t.newPipeline(pos, token.line, decl)
for { for {
switch token := t.nextNonSpace(); token.typ { switch token := t.nextNonSpace(); token.typ {
case itemRightDelim, itemRightParen: case itemRightDelim, itemRightParen:
...@@ -450,7 +452,6 @@ func (t *Tree) checkPipeline(pipe *PipeNode, context string) { ...@@ -450,7 +452,6 @@ func (t *Tree) checkPipeline(pipe *PipeNode, context string) {
func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) { func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) {
defer t.popVars(len(t.vars)) defer t.popVars(len(t.vars))
line = t.lex.lineNumber()
pipe = t.pipeline(context) pipe = t.pipeline(context)
var next Node var next Node
list, next = t.itemList() list, next = t.itemList()
...@@ -479,7 +480,7 @@ func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int ...@@ -479,7 +480,7 @@ func (t *Tree) parseControl(allowElseIf bool, context string) (pos Pos, line int
t.errorf("expected end; found %s", next) t.errorf("expected end; found %s", next)
} }
} }
return pipe.Position(), line, pipe, list, elseList return pipe.Position(), pipe.Line, pipe, list, elseList
} }
// If: // If:
...@@ -521,9 +522,10 @@ func (t *Tree) elseControl() Node { ...@@ -521,9 +522,10 @@ func (t *Tree) elseControl() Node {
peek := t.peekNonSpace() peek := t.peekNonSpace()
if peek.typ == itemIf { if peek.typ == itemIf {
// We see "{{else if ... " but in effect rewrite it to {{else}}{{if ... ". // We see "{{else if ... " but in effect rewrite it to {{else}}{{if ... ".
return t.newElse(peek.pos, t.lex.lineNumber()) return t.newElse(peek.pos, peek.line)
} }
return t.newElse(t.expect(itemRightDelim, "else").pos, t.lex.lineNumber()) token := t.expect(itemRightDelim, "else")
return t.newElse(token.pos, token.line)
} }
// Block: // Block:
...@@ -550,7 +552,7 @@ func (t *Tree) blockControl() Node { ...@@ -550,7 +552,7 @@ func (t *Tree) blockControl() Node {
block.add() block.add()
block.stopParse() block.stopParse()
return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe) return t.newTemplate(token.pos, token.line, name, pipe)
} }
// Template: // Template:
...@@ -567,7 +569,7 @@ func (t *Tree) templateControl() Node { ...@@ -567,7 +569,7 @@ func (t *Tree) templateControl() Node {
// Do not pop variables; they persist until "end". // Do not pop variables; they persist until "end".
pipe = t.pipeline(context) pipe = t.pipeline(context)
} }
return t.newTemplate(token.pos, t.lex.lineNumber(), name, pipe) return t.newTemplate(token.pos, token.line, name, pipe)
} }
func (t *Tree) parseTemplateName(token item, context string) (name string) { func (t *Tree) parseTemplateName(token item, context string) (name string) {
......
...@@ -484,3 +484,37 @@ func TestBlock(t *testing.T) { ...@@ -484,3 +484,37 @@ func TestBlock(t *testing.T) {
t.Errorf("inner template = %q, want %q", g, w) t.Errorf("inner template = %q, want %q", g, w)
} }
} }
func TestLineNum(t *testing.T) {
const count = 100
text := strings.Repeat("{{printf 1234}}\n", count)
tree, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil {
t.Fatal(err)
}
// Check the line numbers. Each line is an action containing a template, followed by text.
// That's two nodes per line.
nodes := tree.Root.Nodes
for i := 0; i < len(nodes); i += 2 {
line := 1 + i/2
// Action first.
action := nodes[i].(*ActionNode)
if action.Line != line {
t.Fatalf("line %d: action is line %d", line, action.Line)
}
pipe := action.Pipe
if pipe.Line != line {
t.Fatalf("line %d: pipe is line %d", line, pipe.Line)
}
}
}
func BenchmarkParseLarge(b *testing.B) {
text := strings.Repeat("{{1234}}\n", 10000)
for i := 0; i < b.N; i++ {
_, err := New("bench").Parse(text, "", "", make(map[string]*Tree), builtins)
if err != nil {
b.Fatal(err)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment