Commit 6520da6e authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Rob Pike

fmt: use public io.RuneScanner interface for ScanState reader

All io.Reader that are passed to newScanState in all the standard
library tests that implement io.RuneReader also implement io.RuneScanner.

Do not check on each call ScanState's UnreadRune that the used RuneReader
also implements the UnreadRune method by using a private interface.
Instead require the used Reader to implement the public RuneScanner
interface.

The extra implementation logic for UnreadRune is removed from ScanState.
Instead the readRune wrapper is extended to implement UnreadRune for the
RuneScanner interface. If the Reader passed to newScanstate does not
implement RuneScanner the readRune wrapper is used to implement the
missing functionality.

Note that a RuneReader that does not implement RuneScanner will also
be wrapped by runeRead which was not the case before.
Performance with the readRune wrapper is better than without before.

Add benchmark to compare performance with and without using the
readRune wrapper.

name                             old time/op  new time/op  delta
ScanInts-2                        704µs ± 0%   615µs ± 1%  -12.73%  (p=0.000 n=20+20)
ScanRecursiveInt-2               82.6ms ± 0%  51.4ms ± 0%  -37.71%  (p=0.000 n=20+20)
ScanRecursiveIntReaderWrapper-2  85.1ms ± 0%  52.4ms ± 0%  -38.36%  (p=0.000 n=20+20)

Change-Id: I8c6e85db9b87a8171caab12f020b6e256b498e81
Reviewed-on: https://go-review.googlesource.com/19895
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRob Pike <r@golang.org>
parent b30e1d72
...@@ -15,14 +15,6 @@ import ( ...@@ -15,14 +15,6 @@ import (
"unicode/utf8" "unicode/utf8"
) )
// runeUnreader is the interface to something that can unread runes.
// If the object provided to Scan does not satisfy this interface,
// a local buffer will be used to back up the input, but its contents
// will be lost when Scan returns.
type runeUnreader interface {
UnreadRune() error
}
// ScanState represents the scanner state passed to custom scanners. // ScanState represents the scanner state passed to custom scanners.
// Scanners may do rune-at-a-time scanning or ask the ScanState // Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token. // to discover the next space-delimited token.
...@@ -163,10 +155,8 @@ const eof = -1 ...@@ -163,10 +155,8 @@ const eof = -1
// ss is the internal implementation of ScanState. // ss is the internal implementation of ScanState.
type ss struct { type ss struct {
rr io.RuneReader // where to read input rs io.RuneScanner // where to read input
buf buffer // token accumulator buf buffer // token accumulator
peekRune rune // one-rune lookahead
prevRune rune // last rune returned by ReadRune
count int // runes consumed so far. count int // runes consumed so far.
atEOF bool // already read EOF atEOF bool // already read EOF
ssave ssave
...@@ -191,23 +181,17 @@ func (s *ss) Read(buf []byte) (n int, err error) { ...@@ -191,23 +181,17 @@ func (s *ss) Read(buf []byte) (n int, err error) {
} }
func (s *ss) ReadRune() (r rune, size int, err error) { func (s *ss) ReadRune() (r rune, size int, err error) {
if s.peekRune >= 0 { if s.atEOF || s.count >= s.argLimit {
s.count++
r = s.peekRune
size = utf8.RuneLen(r)
s.prevRune = r
s.peekRune = -1
return
}
if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
err = io.EOF err = io.EOF
return return
} }
r, size, err = s.rr.ReadRune() r, size, err = s.rs.ReadRune()
if err == nil { if err == nil {
s.count++ s.count++
s.prevRune = r if s.nlIsEnd && r == '\n' {
s.atEOF = true
}
} else if err == io.EOF { } else if err == io.EOF {
s.atEOF = true s.atEOF = true
} }
...@@ -246,12 +230,8 @@ func (s *ss) mustReadRune() (r rune) { ...@@ -246,12 +230,8 @@ func (s *ss) mustReadRune() (r rune) {
} }
func (s *ss) UnreadRune() error { func (s *ss) UnreadRune() error {
if u, ok := s.rr.(runeUnreader); ok { s.rs.UnreadRune()
u.UnreadRune() s.atEOF = false
} else {
s.peekRune = s.prevRune
}
s.prevRune = -1
s.count-- s.count--
return nil return nil
} }
...@@ -327,12 +307,13 @@ func (s *ss) SkipSpace() { ...@@ -327,12 +307,13 @@ func (s *ss) SkipSpace() {
// readRune is a structure to enable reading UTF-8 encoded code points // readRune is a structure to enable reading UTF-8 encoded code points
// from an io.Reader. It is used if the Reader given to the scanner does // from an io.Reader. It is used if the Reader given to the scanner does
// not already implement io.RuneReader. // not already implement io.RuneScanner.
type readRune struct { type readRune struct {
reader io.Reader reader io.Reader
buf [utf8.UTFMax]byte // used only inside ReadRune buf [utf8.UTFMax]byte // used only inside ReadRune
pending int // number of bytes in pendBuf; only >0 for bad UTF-8 pending int // number of bytes in pendBuf; only >0 for bad UTF-8
pendBuf [utf8.UTFMax]byte // bytes left over pendBuf [utf8.UTFMax]byte // bytes left over
peekRune rune // if >=0 next rune; when <0 is ^(previous Rune)
} }
// readByte returns the next byte from the input, which may be // readByte returns the next byte from the input, which may be
...@@ -344,33 +325,35 @@ func (r *readRune) readByte() (b byte, err error) { ...@@ -344,33 +325,35 @@ func (r *readRune) readByte() (b byte, err error) {
r.pending-- r.pending--
return return
} }
n, err := io.ReadFull(r.reader, r.pendBuf[0:1]) _, err = r.reader.Read(r.pendBuf[:1])
if n != 1 { if err != nil {
return 0, err return
} }
return r.pendBuf[0], err return r.pendBuf[0], err
} }
// unread saves the bytes for the next read.
func (r *readRune) unread(buf []byte) {
copy(r.pendBuf[r.pending:], buf)
r.pending += len(buf)
}
// ReadRune returns the next UTF-8 encoded code point from the // ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r. // io.Reader inside r.
func (r *readRune) ReadRune() (rr rune, size int, err error) { func (r *readRune) ReadRune() (rr rune, size int, err error) {
if r.peekRune >= 0 {
rr = r.peekRune
r.peekRune = ^r.peekRune
size = utf8.RuneLen(rr)
return
}
r.buf[0], err = r.readByte() r.buf[0], err = r.readByte()
if err != nil { if err != nil {
return 0, 0, err return
} }
if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
rr = rune(r.buf[0]) rr = rune(r.buf[0])
size = 1 // Known to be 1. size = 1 // Known to be 1.
// Flip the bits of the rune so it's available to UnreadRune.
r.peekRune = ^rr
return return
} }
var n int var n int
for n = 1; !utf8.FullRune(r.buf[0:n]); n++ { for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
r.buf[n], err = r.readByte() r.buf[n], err = r.readByte()
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
...@@ -380,13 +363,25 @@ func (r *readRune) ReadRune() (rr rune, size int, err error) { ...@@ -380,13 +363,25 @@ func (r *readRune) ReadRune() (rr rune, size int, err error) {
return return
} }
} }
rr, size = utf8.DecodeRune(r.buf[0:n]) rr, size = utf8.DecodeRune(r.buf[:n])
if size < n { // an error if size < n { // an error, save the bytes for the next read
r.unread(r.buf[size:n]) copy(r.pendBuf[r.pending:], r.buf[size:n])
r.pending += n - size
} }
// Flip the bits of the rune so it's available to UnreadRune.
r.peekRune = ^rr
return return
} }
func (r *readRune) UnreadRune() error {
if r.peekRune >= 0 {
return errors.New("fmt: scanning called UnreadRune with no rune available")
}
// Reverse bit flip of previously read rune to obtain valid >=0 state.
r.peekRune = ^r.peekRune
return nil
}
var ssFree = sync.Pool{ var ssFree = sync.Pool{
New: func() interface{} { return new(ss) }, New: func() interface{} { return new(ss) },
} }
...@@ -394,15 +389,13 @@ var ssFree = sync.Pool{ ...@@ -394,15 +389,13 @@ var ssFree = sync.Pool{
// newScanState allocates a new ss struct or grab a cached one. // newScanState allocates a new ss struct or grab a cached one.
func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) { func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
s = ssFree.Get().(*ss) s = ssFree.Get().(*ss)
if rr, ok := r.(io.RuneReader); ok { if rs, ok := r.(io.RuneScanner); ok {
s.rr = rr s.rs = rs
} else { } else {
s.rr = &readRune{reader: r} s.rs = &readRune{reader: r, peekRune: -1}
} }
s.nlIsSpace = nlIsSpace s.nlIsSpace = nlIsSpace
s.nlIsEnd = nlIsEnd s.nlIsEnd = nlIsEnd
s.prevRune = -1
s.peekRune = -1
s.atEOF = false s.atEOF = false
s.limit = hugeWid s.limit = hugeWid
s.argLimit = hugeWid s.argLimit = hugeWid
...@@ -424,7 +417,7 @@ func (s *ss) free(old ssave) { ...@@ -424,7 +417,7 @@ func (s *ss) free(old ssave) {
return return
} }
s.buf = s.buf[:0] s.buf = s.buf[:0]
s.rr = nil s.rs = nil
ssFree.Put(s) ssFree.Put(s)
} }
......
...@@ -1001,6 +1001,18 @@ func BenchmarkScanRecursiveInt(b *testing.B) { ...@@ -1001,6 +1001,18 @@ func BenchmarkScanRecursiveInt(b *testing.B) {
} }
} }
func BenchmarkScanRecursiveIntReaderWrapper(b *testing.B) {
b.ResetTimer()
ints := makeInts(intCount)
var r RecursiveInt
for i := b.N - 1; i >= 0; i-- {
buf := newReader(string(ints))
b.StartTimer()
Fscan(buf, &r)
b.StopTimer()
}
}
// Issue 9124. // Issue 9124.
// %x on bytes couldn't handle non-space bytes terminating the scan. // %x on bytes couldn't handle non-space bytes terminating the scan.
func TestHexBytes(t *testing.T) { func TestHexBytes(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment