Commit 6520da6e authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Rob Pike

fmt: use public io.RuneScanner interface for ScanState reader

All io.Reader that are passed to newScanState in all the standard
library tests that implement io.RuneReader also implement io.RuneScanner.

Do not check on each call ScanState's UnreadRune that the used RuneReader
also implements the UnreadRune method by using a private interface.
Instead require the used Reader to implement the public RuneScanner
interface.

The extra implementation logic for UnreadRune is removed from ScanState.
Instead the readRune wrapper is extended to implement UnreadRune for the
RuneScanner interface. If the Reader passed to newScanstate does not
implement RuneScanner the readRune wrapper is used to implement the
missing functionality.

Note that a RuneReader that does not implement RuneScanner will also
be wrapped by runeRead which was not the case before.
Performance with the readRune wrapper is better than without before.

Add benchmark to compare performance with and without using the
readRune wrapper.

name                             old time/op  new time/op  delta
ScanInts-2                        704µs ± 0%   615µs ± 1%  -12.73%  (p=0.000 n=20+20)
ScanRecursiveInt-2               82.6ms ± 0%  51.4ms ± 0%  -37.71%  (p=0.000 n=20+20)
ScanRecursiveIntReaderWrapper-2  85.1ms ± 0%  52.4ms ± 0%  -38.36%  (p=0.000 n=20+20)

Change-Id: I8c6e85db9b87a8171caab12f020b6e256b498e81
Reviewed-on: https://go-review.googlesource.com/19895
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRob Pike <r@golang.org>
parent b30e1d72
......@@ -15,14 +15,6 @@ import (
"unicode/utf8"
)
// runeUnreader is the interface to something that can unread runes.
// If the object provided to Scan does not satisfy this interface,
// a local buffer will be used to back up the input, but its contents
// will be lost when Scan returns.
type runeUnreader interface {
UnreadRune() error
}
// ScanState represents the scanner state passed to custom scanners.
// Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token.
......@@ -163,12 +155,10 @@ const eof = -1
// ss is the internal implementation of ScanState.
type ss struct {
rr io.RuneReader // where to read input
buf buffer // token accumulator
peekRune rune // one-rune lookahead
prevRune rune // last rune returned by ReadRune
count int // runes consumed so far.
atEOF bool // already read EOF
rs io.RuneScanner // where to read input
buf buffer // token accumulator
count int // runes consumed so far.
atEOF bool // already read EOF
ssave
}
......@@ -191,23 +181,17 @@ func (s *ss) Read(buf []byte) (n int, err error) {
}
func (s *ss) ReadRune() (r rune, size int, err error) {
if s.peekRune >= 0 {
s.count++
r = s.peekRune
size = utf8.RuneLen(r)
s.prevRune = r
s.peekRune = -1
return
}
if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.argLimit {
if s.atEOF || s.count >= s.argLimit {
err = io.EOF
return
}
r, size, err = s.rr.ReadRune()
r, size, err = s.rs.ReadRune()
if err == nil {
s.count++
s.prevRune = r
if s.nlIsEnd && r == '\n' {
s.atEOF = true
}
} else if err == io.EOF {
s.atEOF = true
}
......@@ -246,12 +230,8 @@ func (s *ss) mustReadRune() (r rune) {
}
func (s *ss) UnreadRune() error {
if u, ok := s.rr.(runeUnreader); ok {
u.UnreadRune()
} else {
s.peekRune = s.prevRune
}
s.prevRune = -1
s.rs.UnreadRune()
s.atEOF = false
s.count--
return nil
}
......@@ -326,13 +306,14 @@ func (s *ss) SkipSpace() {
}
// readRune is a structure to enable reading UTF-8 encoded code points
// from an io.Reader. It is used if the Reader given to the scanner does
// not already implement io.RuneReader.
// from an io.Reader. It is used if the Reader given to the scanner does
// not already implement io.RuneScanner.
type readRune struct {
reader io.Reader
buf [utf8.UTFMax]byte // used only inside ReadRune
pending int // number of bytes in pendBuf; only >0 for bad UTF-8
pendBuf [utf8.UTFMax]byte // bytes left over
reader io.Reader
buf [utf8.UTFMax]byte // used only inside ReadRune
pending int // number of bytes in pendBuf; only >0 for bad UTF-8
pendBuf [utf8.UTFMax]byte // bytes left over
peekRune rune // if >=0 next rune; when <0 is ^(previous Rune)
}
// readByte returns the next byte from the input, which may be
......@@ -344,33 +325,35 @@ func (r *readRune) readByte() (b byte, err error) {
r.pending--
return
}
n, err := io.ReadFull(r.reader, r.pendBuf[0:1])
if n != 1 {
return 0, err
_, err = r.reader.Read(r.pendBuf[:1])
if err != nil {
return
}
return r.pendBuf[0], err
}
// unread saves the bytes for the next read.
func (r *readRune) unread(buf []byte) {
copy(r.pendBuf[r.pending:], buf)
r.pending += len(buf)
}
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
func (r *readRune) ReadRune() (rr rune, size int, err error) {
if r.peekRune >= 0 {
rr = r.peekRune
r.peekRune = ^r.peekRune
size = utf8.RuneLen(rr)
return
}
r.buf[0], err = r.readByte()
if err != nil {
return 0, 0, err
return
}
if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
rr = rune(r.buf[0])
size = 1 // Known to be 1.
// Flip the bits of the rune so it's available to UnreadRune.
r.peekRune = ^rr
return
}
var n int
for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
r.buf[n], err = r.readByte()
if err != nil {
if err == io.EOF {
......@@ -380,13 +363,25 @@ func (r *readRune) ReadRune() (rr rune, size int, err error) {
return
}
}
rr, size = utf8.DecodeRune(r.buf[0:n])
if size < n { // an error
r.unread(r.buf[size:n])
rr, size = utf8.DecodeRune(r.buf[:n])
if size < n { // an error, save the bytes for the next read
copy(r.pendBuf[r.pending:], r.buf[size:n])
r.pending += n - size
}
// Flip the bits of the rune so it's available to UnreadRune.
r.peekRune = ^rr
return
}
func (r *readRune) UnreadRune() error {
if r.peekRune >= 0 {
return errors.New("fmt: scanning called UnreadRune with no rune available")
}
// Reverse bit flip of previously read rune to obtain valid >=0 state.
r.peekRune = ^r.peekRune
return nil
}
var ssFree = sync.Pool{
New: func() interface{} { return new(ss) },
}
......@@ -394,15 +389,13 @@ var ssFree = sync.Pool{
// newScanState allocates a new ss struct or grab a cached one.
func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
s = ssFree.Get().(*ss)
if rr, ok := r.(io.RuneReader); ok {
s.rr = rr
if rs, ok := r.(io.RuneScanner); ok {
s.rs = rs
} else {
s.rr = &readRune{reader: r}
s.rs = &readRune{reader: r, peekRune: -1}
}
s.nlIsSpace = nlIsSpace
s.nlIsEnd = nlIsEnd
s.prevRune = -1
s.peekRune = -1
s.atEOF = false
s.limit = hugeWid
s.argLimit = hugeWid
......@@ -424,7 +417,7 @@ func (s *ss) free(old ssave) {
return
}
s.buf = s.buf[:0]
s.rr = nil
s.rs = nil
ssFree.Put(s)
}
......@@ -1142,7 +1135,7 @@ func (s *ss) advance(format string) (i int) {
}
// doScanf does the real work when scanning with a format string.
// At the moment, it handles only pointers to basic types.
// At the moment, it handles only pointers to basic types.
func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err error) {
defer errorHandler(&err)
end := len(format) - 1
......
......@@ -1001,6 +1001,18 @@ func BenchmarkScanRecursiveInt(b *testing.B) {
}
}
func BenchmarkScanRecursiveIntReaderWrapper(b *testing.B) {
b.ResetTimer()
ints := makeInts(intCount)
var r RecursiveInt
for i := b.N - 1; i >= 0; i-- {
buf := newReader(string(ints))
b.StartTimer()
Fscan(buf, &r)
b.StopTimer()
}
}
// Issue 9124.
// %x on bytes couldn't handle non-space bytes terminating the scan.
func TestHexBytes(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment