Commit fed47706 authored by Rob Pike's avatar Rob Pike

step 2 of the great buffer shift.

make strings.Buffer handle strings and bytes with comparable efficiency.
if ok, next step will be to move this code to bytes.Buffer and terminate
strings.Buffer's short happy life.

R=rsc
DELTA=292  (212 added, 0 deleted, 80 changed)
OCL=34837
CL=34849
parent de0d762a
...@@ -112,7 +112,7 @@ func TestDecode(t *testing.T) { ...@@ -112,7 +112,7 @@ func TestDecode(t *testing.T) {
func TestDecoder(t *testing.T) { func TestDecoder(t *testing.T) {
for _, p := range pairs { for _, p := range pairs {
decoder := NewDecoder(StdEncoding, strings.NewBuffer(p.encoded)); decoder := NewDecoder(StdEncoding, strings.NewBufferString(p.encoded));
dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded))); dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded)));
count, err := decoder.Read(dbuf); count, err := decoder.Read(dbuf);
if err != nil && err != os.EOF { if err != nil && err != os.EOF {
...@@ -129,7 +129,7 @@ func TestDecoder(t *testing.T) { ...@@ -129,7 +129,7 @@ func TestDecoder(t *testing.T) {
func TestDecoderBuffering(t *testing.T) { func TestDecoderBuffering(t *testing.T) {
for bs := 1; bs <= 12; bs++ { for bs := 1; bs <= 12; bs++ {
decoder := NewDecoder(StdEncoding, strings.NewBuffer(bigtest.encoded)); decoder := NewDecoder(StdEncoding, strings.NewBufferString(bigtest.encoded));
buf := make([]byte, len(bigtest.decoded) + 12); buf := make([]byte, len(bigtest.decoded) + 12);
var total int; var total int;
for total = 0; total < len(bigtest.decoded); { for total = 0; total < len(bigtest.decoded); {
......
...@@ -61,12 +61,12 @@ func readBytes(buf *Reader) string { ...@@ -61,12 +61,12 @@ func readBytes(buf *Reader) string {
func TestReaderSimple(t *testing.T) { func TestReaderSimple(t *testing.T) {
data := "hello world"; data := "hello world";
b := NewReader(strings.NewBuffer(data)); b := NewReader(strings.NewBufferString(data));
if s := readBytes(b); s != "hello world" { if s := readBytes(b); s != "hello world" {
t.Errorf("simple hello world test failed: got %q", s); t.Errorf("simple hello world test failed: got %q", s);
} }
b = NewReader(newRot13Reader(strings.NewBuffer(data))); b = NewReader(newRot13Reader(strings.NewBufferString(data)));
if s := readBytes(b); s != "uryyb jbeyq" { if s := readBytes(b); s != "uryyb jbeyq" {
t.Error("rot13 hello world test failed: got %q", s); t.Error("rot13 hello world test failed: got %q", s);
} }
...@@ -154,7 +154,7 @@ func TestReader(t *testing.T) { ...@@ -154,7 +154,7 @@ func TestReader(t *testing.T) {
readmaker := readMakers[i]; readmaker := readMakers[i];
bufreader := bufreaders[j]; bufreader := bufreaders[j];
bufsize := bufsizes[k]; bufsize := bufsizes[k];
read := readmaker.fn(strings.NewBuffer(text)); read := readmaker.fn(strings.NewBufferString(text));
buf, _ := NewReaderSize(read, bufsize); buf, _ := NewReaderSize(read, bufsize);
s := bufreader.fn(buf); s := bufreader.fn(buf);
if s != text { if s != text {
...@@ -308,7 +308,7 @@ func TestWriteErrors(t *testing.T) { ...@@ -308,7 +308,7 @@ func TestWriteErrors(t *testing.T) {
func TestNewReaderSizeIdempotent(t *testing.T) { func TestNewReaderSizeIdempotent(t *testing.T) {
const BufSize = 1000; const BufSize = 1000;
b, err := NewReaderSize(strings.NewBuffer("hello world"), BufSize); b, err := NewReaderSize(strings.NewBufferString("hello world"), BufSize);
if err != nil { if err != nil {
t.Error("NewReaderSize create fail", err); t.Error("NewReaderSize create fail", err);
} }
......
...@@ -228,7 +228,7 @@ func TestWrongTypeDecoder(t *testing.T) { ...@@ -228,7 +228,7 @@ func TestWrongTypeDecoder(t *testing.T) {
} }
func corruptDataCheck(s string, err os.Error, t *testing.T) { func corruptDataCheck(s string, err os.Error, t *testing.T) {
b := strings.NewBuffer(s); b := strings.NewBufferString(s);
dec := NewDecoder(b); dec := NewDecoder(b);
dec.Decode(new(ET2)); dec.Decode(new(ET2));
if dec.state.err != err { if dec.state.err != err {
......
...@@ -6,20 +6,140 @@ package strings ...@@ -6,20 +6,140 @@ package strings
import "os" import "os"
// Efficient construction of large strings. // Efficient construction of large strings and byte arrays.
// Implements io.Reader and io.Writer. // Implements io.Reader and io.Writer.
// A Buffer is a variable-sized buffer of strings // A Buffer provides efficient construction of large strings
// with Read and Write methods. Appends (writes) are efficient. // and slices of bytes. It implements io.Reader and io.Writer.
// Appends (writes) are efficient.
// The zero value for Buffer is an empty buffer ready to use. // The zero value for Buffer is an empty buffer ready to use.
type Buffer struct { type Buffer struct {
str []string; blk []block;
len int; len int;
byteBuf [1]byte; oneByte [1]byte;
}
// There are two kinds of block: a string or a []byte.
// When the user writes big strings, we add string blocks;
// when the user writes big byte slices, we add []byte blocks.
// Small writes are coalesced onto the end of the last block,
// whatever it is.
// This strategy is intended to reduce unnecessary allocation.
type block interface {
Len() int;
String() string;
appendBytes(s []byte);
appendString(s string);
setSlice(m, n int);
}
// stringBlocks represent strings. We use pointer receivers
// so append and setSlice can overwrite the receiver.
type stringBlock string
func (b *stringBlock) Len() int {
return len(*b)
}
func (b *stringBlock) String() string {
return string(*b)
}
func (b *stringBlock) appendBytes(s []byte) {
*b += stringBlock(s)
}
func (b *stringBlock) appendString(s string) {
*b = stringBlock(s)
}
func (b *stringBlock) setSlice(m, n int) {
*b = (*b)[m:n]
}
// byteBlock represent slices of bytes. We use pointer receivers
// so append and setSlice can overwrite the receiver.
type byteBlock []byte
func (b *byteBlock) Len() int {
return len(*b)
}
func (b *byteBlock) String() string {
return string(*b)
}
func (b *byteBlock) resize(max int) {
by := []byte(*b);
if cap(by) >= max {
by = by[0:max];
} else {
nby := make([]byte, max, 3*(max+10)/2);
copyBytes(nby, 0, by);
by = nby;
}
*b = by;
}
func (b *byteBlock) appendBytes(s []byte) {
curLen := b.Len();
b.resize(curLen + len(s));
copyBytes([]byte(*b), curLen, s);
}
func (b *byteBlock) appendString(s string) {
curLen := b.Len();
b.resize(curLen + len(s));
copyString([]byte(*b), curLen, s);
}
func (b *byteBlock) setSlice(m, n int) {
*b = (*b)[m:n]
}
// Because the user may overwrite the contents of byte slices, we need
// to make a copy. Allocation strategy: leave some space on the end so
// small subsequent writes can avoid another allocation. The input
// is known to be non-empty.
func newByteBlock(s []byte) *byteBlock {
l := len(s);
// Capacity with room to grow. If small, allocate a mininum. If medium,
// double the size. If huge, use the size plus epsilon (room for a newline,
// at least).
c := l;
switch {
case l < 32:
c = 64
case l < 1<<18:
c *= 2;
default:
c += 8
}
b := make([]byte, l, c);
copyBytes(b, 0, s);
return &b;
}
// Copy from block to byte array at offset doff. Assume there's room.
func copy(dst []byte, doff int, src block) {
switch s := src.(type) {
case *stringBlock:
copyString(dst, doff, string(*s));
case *byteBlock:
copyBytes(dst, doff, []byte(*s));
}
} }
// Copy from string to byte array at offset doff. Assume there's room. // Copy from string to byte array at offset doff. Assume there's room.
func copy(dst []byte, doff int, src string) { func copyString(dst []byte, doff int, str string) {
for soff := 0; soff < len(str); soff++ {
dst[doff] = str[soff];
doff++;
}
}
// Copy from bytes to byte array at offset doff. Assume there's room.
func copyBytes(dst []byte, doff int, src []byte) {
for soff := 0; soff < len(src); soff++ { for soff := 0; soff < len(src); soff++ {
dst[doff] = src[soff]; dst[doff] = src[soff];
doff++; doff++;
...@@ -32,9 +152,9 @@ func (b *Buffer) Bytes() []byte { ...@@ -32,9 +152,9 @@ func (b *Buffer) Bytes() []byte {
n := b.len; n := b.len;
bytes := make([]byte, n); bytes := make([]byte, n);
nbytes := 0; nbytes := 0;
for _, s := range b.str { for _, s := range b.blk {
copy(bytes, nbytes, s); copy(bytes, nbytes, s);
nbytes += len(s); nbytes += s.Len();
} }
return bytes; return bytes;
} }
...@@ -42,33 +162,33 @@ func (b *Buffer) Bytes() []byte { ...@@ -42,33 +162,33 @@ func (b *Buffer) Bytes() []byte {
// String returns the contents of the unread portion of the buffer // String returns the contents of the unread portion of the buffer
// as a string. // as a string.
func (b *Buffer) String() string { func (b *Buffer) String() string {
if len(b.str) == 1 { // important special case if len(b.blk) == 1 { // important special case
return b.str[0] return b.blk[0].String()
} }
return string(b.Bytes()) return string(b.Bytes())
} }
// Len returns the number of bytes in the unread portion of the buffer; // Len returns the number of bytes in the unread portion of the buffer;
// b.Len() == len(b.Bytes()) == len(b.String()). // b.Len() == len(b.Bytes()) == len(b.String()).
func (b *Buffer) Len() (n int) { func (b *Buffer) Len() int {
return b.len return b.len
} }
// Truncate discards all but the first n unread bytes from the buffer. // Truncate discards all but the first n unread bytes from the buffer.
func (b *Buffer) Truncate(n int) { func (b *Buffer) Truncate(n int) {
b.len = 0; // recompute during scan. b.len = 0; // recompute during scan.
for i, s := range b.str { for i, s := range b.blk {
if n <= 0 { if n <= 0 {
b.str = b.str[0:i]; b.blk = b.blk[0:i];
break; break;
} }
if n < len(s) { if l := s.Len(); n < l {
b.str[i] = s[0:n]; b.blk[i].setSlice(0, n);
b.len += n; b.len += n;
n = 0; n = 0;
} else { } else {
b.len += len(s); b.len += l;
n -= len(s); n -= l;
} }
} }
} }
...@@ -76,58 +196,84 @@ func (b *Buffer) Truncate(n int) { ...@@ -76,58 +196,84 @@ func (b *Buffer) Truncate(n int) {
// Reset resets the buffer so it has no content. // Reset resets the buffer so it has no content.
// b.Reset() is the same as b.Truncate(0). // b.Reset() is the same as b.Truncate(0).
func (b *Buffer) Reset() { func (b *Buffer) Reset() {
b.str = b.str[0:0]; b.blk = b.blk[0:0];
b.len = 0; b.len = 0;
} }
// Can n bytes be appended efficiently to the end of the final string? // Can n bytes be appended efficiently to the end of the final string?
func (b *Buffer) canCombine(n int) bool { func (b *Buffer) canCombine(n int) bool {
return len(b.str) > 0 && n+len(b.str[len(b.str)-1]) <= 64 return len(b.blk) > 0 && n+b.blk[len(b.blk)-1].Len() <= 64
} }
// WriteString appends string s to the buffer. The return // WriteString appends string s to the buffer. The return
// value n is the length of s; err is always nil. // value n is the length of s; err is always nil.
func (b *Buffer) WriteString(s string) (n int, err os.Error) { func (b *Buffer) WriteString(s string) (n int, err os.Error) {
n = len(s); n = len(s);
if n == 0 {
return
}
b.len += n; b.len += n;
numStr := len(b.str); numStr := len(b.blk);
// Special case: If the last string is short and this one is short, // Special case: If the last piece is short and this one is short,
// combine them and avoid growing the list. // combine them and avoid growing the list.
if b.canCombine(n) { if b.canCombine(n) {
b.str[numStr-1] += s; b.blk[numStr-1].appendString(s);
return return
} }
if cap(b.str) == numStr { if cap(b.blk) == numStr {
nstr := make([]string, numStr, 3*(numStr+10)/2); nstr := make([]block, numStr, 3*(numStr+10)/2);
for i, s := range b.str { for i, s := range b.blk {
nstr[i] = s; nstr[i] = s;
} }
b.str = nstr; b.blk = nstr;
} }
b.str = b.str[0:numStr+1]; b.blk = b.blk[0:numStr+1];
b.str[numStr] = s; // The string is immutable; no need to make a copy.
b.blk[numStr] = (*stringBlock)(&s);
return return
} }
// Write appends the contents of p to the buffer. The return // Write appends the contents of p to the buffer. The return
// value n is the length of p; err is always nil. // value n is the length of p; err is always nil.
func (b *Buffer) Write(p []byte) (n int, err os.Error) { func (b *Buffer) Write(p []byte) (n int, err os.Error) {
return b.WriteString(string(p)) n = len(p);
if n == 0 {
return
}
b.len += n;
numStr := len(b.blk);
// Special case: If the last piece is short and this one is short,
// combine them and avoid growing the list.
if b.canCombine(n) {
b.blk[numStr-1].appendBytes(p);
return
}
if cap(b.blk) == numStr {
nstr := make([]block, numStr, 3*(numStr+10)/2);
for i, s := range b.blk {
nstr[i] = s;
}
b.blk = nstr;
}
b.blk = b.blk[0:numStr+1];
// Need to copy the data - user might overwrite the data.
b.blk[numStr] = newByteBlock(p);
return
} }
// WriteByte appends the byte c to the buffer. // WriteByte appends the byte c to the buffer.
// The returned error is always nil, but is included // The returned error is always nil, but is included
// to match bufio.Writer's WriteByte. // to match bufio.Writer's WriteByte.
func (b *Buffer) WriteByte(c byte) os.Error { func (b *Buffer) WriteByte(c byte) os.Error {
s := string(c); b.oneByte[0] = c;
// For WriteByte, canCombine is almost always true so it's worth // For WriteByte, canCombine is almost always true so it's worth
// doing here. // doing here.
if b.canCombine(1) { if b.canCombine(1) {
b.str[len(b.str)-1] += s; b.blk[len(b.blk)-1].appendBytes(&b.oneByte);
b.len++; b.len++;
return nil return nil
} }
b.WriteString(s); b.Write(&b.oneByte);
return nil; return nil;
} }
...@@ -136,22 +282,27 @@ func (b *Buffer) WriteByte(c byte) os.Error { ...@@ -136,22 +282,27 @@ func (b *Buffer) WriteByte(c byte) os.Error {
// buffer has no data to return, err is os.EOF even if len(p) is zero; // buffer has no data to return, err is os.EOF even if len(p) is zero;
// otherwise it is nil. // otherwise it is nil.
func (b *Buffer) Read(p []byte) (n int, err os.Error) { func (b *Buffer) Read(p []byte) (n int, err os.Error) {
if len(b.str) == 0 { if len(b.blk) == 0 {
return 0, os.EOF return 0, os.EOF
} }
for len(b.str) > 0 { for len(b.blk) > 0 {
s := b.str[0]; blk := b.blk[0];
m := len(p) - n; m := len(p) - n;
if m >= len(s) { if l := blk.Len(); m >= l {
// consume all of this string. // consume all of this string.
copy(p, n, s); copy(p, n, blk);
n += len(s); n += l;
b.str = b.str[1:len(b.str)]; b.blk = b.blk[1:len(b.blk)];
} else { } else {
// consume some of this string; it's the last piece. // consume some of this block; it's the last piece.
copy(p, n, s[0:m]); switch b := blk.(type) {
case *stringBlock:
copyString(p, n, string(*b)[0:m]);
case *byteBlock:
copyBytes(p, n, []byte(*b)[0:m]);
}
n += m; n += m;
b.str[0] = s[m:len(s)]; b.blk[0].setSlice(m, l);
break; break;
} }
} }
...@@ -162,18 +313,28 @@ func (b *Buffer) Read(p []byte) (n int, err os.Error) { ...@@ -162,18 +313,28 @@ func (b *Buffer) Read(p []byte) (n int, err os.Error) {
// ReadByte reads and returns the next byte from the buffer. // ReadByte reads and returns the next byte from the buffer.
// If no byte is available, it returns error os.EOF. // If no byte is available, it returns error os.EOF.
func (b *Buffer) ReadByte() (c byte, err os.Error) { func (b *Buffer) ReadByte() (c byte, err os.Error) {
if _, err := b.Read(&b.byteBuf); err != nil { if _, err := b.Read(&b.oneByte); err != nil {
return 0, err return 0, err
} }
return b.byteBuf[0], nil return b.oneByte[0], nil
} }
// NewBuffer creates and initializes a new Buffer // NewBufferString creates and initializes a new Buffer
// using str as its initial contents. // using a string as its initial contents.
func NewBuffer(str string) *Buffer { func NewBufferString(str string) *Buffer {
b := new(Buffer); b := new(Buffer);
b.str = make([]string, 1, 10); // room to grow b.blk = make([]block, 1, 10); // room to grow
b.str[0] = str; b.blk[0] = (*stringBlock)(&str);
b.len = len(str); b.len = len(str);
return b; return b;
} }
// NewBuffer creates and initializes a new Buffer
// using a byte slice as its initial contents.
func NewBuffer(by []byte) *Buffer {
b := new(Buffer);
b.blk = make([]block, 1, 10); // room to grow
b.blk[0] = (*byteBlock)(&by);
b.len = len(by);
return b;
}
...@@ -13,10 +13,11 @@ import ( ...@@ -13,10 +13,11 @@ import (
const N = 10000 // make this bigger for a larger (and slower) test const N = 10000 // make this bigger for a larger (and slower) test
var data string // test data for write tests var data string // test data for write tests
var bytes []byte // test data; same as data but as a slice.
func init() { func init() {
bytes := make([]byte, N); bytes = make([]byte, N);
for i := 0; i < N; i++ { for i := 0; i < N; i++ {
bytes[i] = 'a' + byte(i % 26) bytes[i] = 'a' + byte(i % 26)
} }
...@@ -45,10 +46,10 @@ func check(t *testing.T, testname string, buf *Buffer, s string) { ...@@ -45,10 +46,10 @@ func check(t *testing.T, testname string, buf *Buffer, s string) {
} }
// Fill buf through n writes of fus. // Fill buf through n writes of string fus.
// The initial contents of buf corresponds to the string s; // The initial contents of buf corresponds to the string s;
// the result is the final contents of buf returned as a string. // the result is the final contents of buf returned as a string.
func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string { func fillString(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string {
check(t, testname + " (fill 1)", buf, s); check(t, testname + " (fill 1)", buf, s);
for ; n > 0; n-- { for ; n > 0; n-- {
m, err := buf.WriteString(fus); m, err := buf.WriteString(fus);
...@@ -65,12 +66,38 @@ func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus strin ...@@ -65,12 +66,38 @@ func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus strin
} }
// Fill buf through n writes of byte slice fub.
// The initial contents of buf corresponds to the string s;
// the result is the final contents of buf returned as a string.
func fillBytes(t *testing.T, testname string, buf *Buffer, s string, n int, fub []byte) string {
check(t, testname + " (fill 1)", buf, s);
for ; n > 0; n-- {
m, err := buf.Write(fub);
if m != len(fub) {
t.Errorf(testname + " (fill 2): m == %d, expected %d\n", m, len(fub));
}
if err != nil {
t.Errorf(testname + " (fill 3): err should always be nil, found err == %s\n", err);
}
s += string(fub);
check(t, testname + " (fill 4)", buf, s);
}
return s;
}
func TestNewBuffer(t *testing.T) { func TestNewBuffer(t *testing.T) {
buf := NewBuffer(data); buf := NewBuffer(bytes);
check(t, "NewBuffer", buf, data); check(t, "NewBuffer", buf, data);
} }
func TestNewBufferString(t *testing.T) {
buf := NewBufferString(data);
check(t, "NewBufferString", buf, data);
}
// Empty buf through repeated reads into fub. // Empty buf through repeated reads into fub.
// The initial contents of buf corresponds to the string s. // The initial contents of buf corresponds to the string s.
func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) { func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) {
...@@ -147,23 +174,43 @@ func TestBasicOperations(t *testing.T) { ...@@ -147,23 +174,43 @@ func TestBasicOperations(t *testing.T) {
} }
func TestLargeWrites(t *testing.T) { func TestLargeStringWrites(t *testing.T) {
var buf Buffer;
for i := 3; i < 30; i += 3 {
s := fillString(t, "TestLargeWrites (1)", &buf, "", 5, data);
empty(t, "TestLargeStringWrites (2)", &buf, s, make([]byte, len(data)/i));
}
check(t, "TestLargeStringWrites (3)", &buf, "");
}
func TestLargeByteWrites(t *testing.T) {
var buf Buffer; var buf Buffer;
for i := 3; i < 30; i += 3 { for i := 3; i < 30; i += 3 {
s := fill(t, "TestLargeWrites (1)", &buf, "", 5, data); s := fillBytes(t, "TestLargeWrites (1)", &buf, "", 5, bytes);
empty(t, "TestLargeWrites (2)", &buf, s, make([]byte, len(data)/i)); empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(data)/i));
} }
check(t, "TestLargeWrites (3)", &buf, ""); check(t, "TestLargeByteWrites (3)", &buf, "");
} }
func TestLargeReads(t *testing.T) { func TestLargeStringReads(t *testing.T) {
var buf Buffer; var buf Buffer;
for i := 3; i < 30; i += 3 { for i := 3; i < 30; i += 3 {
s := fill(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]); s := fillString(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]);
empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data))); empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
} }
check(t, "TestLargeReads (3)", &buf, ""); check(t, "TestLargeStringReads (3)", &buf, "");
}
func TestLargeByteReads(t *testing.T) {
var buf Buffer;
for i := 3; i < 30; i += 3 {
s := fillBytes(t, "TestLargeReads (1)", &buf, "", 5, bytes[0 : len(bytes)/i]);
empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
}
check(t, "TestLargeByteReads (3)", &buf, "");
} }
...@@ -172,7 +219,11 @@ func TestMixedReadsAndWrites(t *testing.T) { ...@@ -172,7 +219,11 @@ func TestMixedReadsAndWrites(t *testing.T) {
s := ""; s := "";
for i := 0; i < 50; i++ { for i := 0; i < 50; i++ {
wlen := rand.Intn(len(data)); wlen := rand.Intn(len(data));
s = fill(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]); if i % 2 == 0 {
s = fillString(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]);
} else {
s = fillBytes(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, bytes[0 : wlen]);
}
rlen := rand.Intn(len(data)); rlen := rand.Intn(len(data));
fub := make([]byte, rlen); fub := make([]byte, rlen);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment