Commit e157c6bc authored by Aaron Jacobs's avatar Aaron Jacobs

Eliminated an allocation and a copy for each ReadFileOp.

This adds up to a significant performance gain for gcsfuse, in its sequential
read benchmark.

Also ReadDirOp.
parents 314b93c7 ab0580a4
......@@ -86,9 +86,10 @@ type Connection struct {
// State that is maintained for each in-flight op. This is stuffed into the
// context that the user uses to reply to the op.
type opState struct {
inMsg *buffer.InMessage
op interface{}
opID uint32 // For logging
inMsg *buffer.InMessage
outMsg *buffer.OutMessage
op interface{}
opID uint32 // For logging
}
// Create a connection wrapping the supplied file descriptor connected to the
......@@ -370,15 +371,17 @@ func (c *Connection) ReadOp() (ctx context.Context, op interface{}, err error) {
// Keep going until we find a request we know how to convert.
for {
// Read the next message from the kernel.
var m *buffer.InMessage
m, err = c.readMessage()
var inMsg *buffer.InMessage
inMsg, err = c.readMessage()
if err != nil {
return
}
// Convert the message to an op.
op, err = convertInMessage(m, c.protocol)
outMsg := c.getOutMessage()
op, err = convertInMessage(inMsg, outMsg, c.protocol)
if err != nil {
c.putOutMessage(outMsg)
err = fmt.Errorf("convertInMessage: %v", err)
return
}
......@@ -396,8 +399,8 @@ func (c *Connection) ReadOp() (ctx context.Context, op interface{}, err error) {
}
// Set up a context that remembers information about this op.
ctx = c.beginOp(m.Header().Opcode, m.Header().Unique)
ctx = context.WithValue(ctx, contextKey, opState{m, op, opID})
ctx = c.beginOp(inMsg.Header().Opcode, inMsg.Header().Unique)
ctx = context.WithValue(ctx, contextKey, opState{inMsg, outMsg, op, opID})
// Special case: responding to statfs is required to make mounting work on
// OS X. We don't currently expose the capability for the file system to
......@@ -426,14 +429,16 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
}
op := state.op
m := state.inMsg
inMsg := state.inMsg
outMsg := state.outMsg
opID := state.opID
// Make sure we destroy the message when we're done.
defer c.putInMessage(m)
// Make sure we destroy the messages when we're done.
defer c.putInMessage(inMsg)
defer c.putOutMessage(outMsg)
// Clean up state for this op.
c.finishOp(m.Header().Opcode, m.Header().Unique)
c.finishOp(inMsg.Header().Opcode, inMsg.Header().Unique)
// Debug logging
if c.debugLogger != nil {
......@@ -450,11 +455,10 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
}
// Send the reply to the kernel, if one is required.
outMsg := c.kernelResponse(m.Header().Unique, op, opErr)
if outMsg != nil {
err := c.writeMessage(outMsg.Bytes())
c.putOutMessage(outMsg)
noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr)
if !noResponse {
err := c.writeMessage(outMsg.Bytes())
if err != nil && c.errorLogger != nil {
c.errorLogger.Printf("writeMessage: %v", err)
}
......
This diff is collapsed.
......@@ -384,25 +384,29 @@ type ReadDirOp struct {
// offset, and return array offsets into that cached listing.
Offset DirOffset
// The maximum number of bytes to return in ReadDirResponse.Data. A smaller
// number is acceptable.
Size int
// Set by the file system: a buffer consisting of a sequence of FUSE
// directory entries in the format generated by fuse_add_direntry
// (http://goo.gl/qCcHCV), which is consumed by parse_dirfile
// (http://goo.gl/2WUmD2). Use fuseutil.AppendDirent to generate this data.
// The destination buffer, whose length gives the size of the read.
//
// The buffer must not exceed the length specified in ReadDirRequest.Size. It
// is okay for the final entry to be truncated; parse_dirfile copes with this
// by ignoring the partial record.
// The output data should consist of a sequence of FUSE directory entries in
// the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is
// consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent
// to generate this data.
//
// Each entry returned exposes a directory offset to the user that may later
// show up in ReadDirRequest.Offset. See notes on that field for more
// information.
Dst []byte
// Set by the file system: the number of bytes read into Dst.
//
// An empty buffer indicates the end of the directory has been reached.
Data []byte
// It is okay for this to be less than len(Dst) if there are not enough
// entries available or the final entry would not fit.
//
// Zero means that the end of the directory has been reached. This is
// unambiguous because NAME_MAX (https://goo.gl/ZxzKaE) plus the size of
// fuse_dirent (https://goo.gl/WO8s3F) plus the 8-byte alignment of
// FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH) is less than the read size of
// PAGE_SIZE used by fuse_readdir (cf. https://goo.gl/VajtS2).
BytesRead int
}
// Release a previously-minted directory handle. The kernel sends this when
......@@ -455,20 +459,21 @@ type ReadFileOp struct {
Inode InodeID
Handle HandleID
// The range of the file to read.
// The offset within the file at which to read.
Offset int64
// The destination buffer, whose length gives the size of the read.
Dst []byte
// Set by the file system: the number of bytes read.
//
// The FUSE documentation requires that exactly the number of bytes be
// returned, except in the case of EOF or error (http://goo.gl/ZgfBkF). This
// appears to be because it uses file mmapping machinery
// The FUSE documentation requires that exactly the requested number of bytes
// be returned, except in the case of EOF or error (http://goo.gl/ZgfBkF).
// This appears to be because it uses file mmapping machinery
// (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand
// where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned
// by a previous call to LookUpInode, GetInodeAttributes, etc.
Offset int64
Size int
// Set by the file system: the data read. If this is less than the requested
// size, it indicates EOF. An error should not be returned in this case.
Data []byte
BytesRead int
}
// Write data to a file previously opened with CreateFile or OpenFile.
......
......@@ -35,7 +35,7 @@ const (
)
// A struct representing an entry within a directory file, describing a child.
// See notes on fuseops.ReadDirOp and on AppendDirent for details.
// See notes on fuseops.ReadDirOp and on WriteDirent for details.
type Dirent struct {
// The (opaque) offset within the directory file of the entry following this
// one. See notes on fuseops.ReadDirOp.Offset for details.
......@@ -50,10 +50,11 @@ type Dirent struct {
Type DirentType
}
// Append the supplied directory entry to the given buffer in the format
// expected in fuseops.ReadFileOp.Data, returning the resulting buffer.
func AppendDirent(input []byte, d Dirent) (output []byte) {
// We want to append bytes with the layout of fuse_dirent
// Write the supplied directory entry intto the given buffer in the format
// expected in fuseops.ReadFileOp.Data, returning the number of bytes written.
// Return zero if the entry would not fit.
func WriteDirent(buf []byte, d Dirent) (n int) {
// We want to write bytes with the layout of fuse_dirent
// (http://goo.gl/BmFxob) in host order. The struct must be aligned according
// to FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH), which dictates 8-byte
// alignment.
......@@ -65,10 +66,23 @@ func AppendDirent(input []byte, d Dirent) (output []byte) {
name [0]byte
}
const alignment = 8
const nameOffset = 8 + 8 + 4 + 4
const direntAlignment = 8
const direntSize = 8 + 8 + 4 + 4
// Write the header into the buffer.
// Compute the number of bytes of padding we'll need to maintain alignment
// for the next entry.
var padLen int
if len(d.Name)%direntAlignment != 0 {
padLen = direntAlignment - (len(d.Name) % direntAlignment)
}
// Do we have enough room?
totalLen := direntSize + len(d.Name) + padLen
if totalLen > len(buf) {
return
}
// Write the header.
de := fuse_dirent{
ino: uint64(d.Inode),
off: uint64(d.Offset),
......@@ -76,17 +90,15 @@ func AppendDirent(input []byte, d Dirent) (output []byte) {
type_: uint32(d.Type),
}
output = append(input, (*[nameOffset]byte)(unsafe.Pointer(&de))[:]...)
n += copy(buf[n:], (*[direntSize]byte)(unsafe.Pointer(&de))[:])
// Write the name afterward.
output = append(output, d.Name...)
n += copy(buf[n:], d.Name)
// Add any necessary padding.
if len(d.Name)%alignment != 0 {
padLen := alignment - (len(d.Name) % alignment)
var padding [alignment]byte
output = append(output, padding[:padLen]...)
if padLen != 0 {
var padding [direntAlignment]byte
n += copy(buf[n:], padding[:padLen])
}
return
......
......@@ -25,6 +25,9 @@ import (
const outHeaderSize = unsafe.Sizeof(fusekernel.OutHeader{})
// OutMessage structs begin life with Len() == OutMessageInitialSize.
const OutMessageInitialSize = outHeaderSize
// We size out messages to be large enough to hold a header for the response
// plus the largest read that may come in.
const outMessageSize = outHeaderSize + MaxReadSize
......@@ -53,8 +56,8 @@ func init() {
// Reset the message so that it is ready to be used again. Afterward, the
// contents are solely a zeroed header.
func (m *OutMessage) Reset() {
m.offset = outHeaderSize
memclr(unsafe.Pointer(&m.storage), outHeaderSize)
m.offset = OutMessageInitialSize
memclr(unsafe.Pointer(&m.storage), OutMessageInitialSize)
}
// Return a pointer to the header at the start of the message.
......@@ -87,6 +90,15 @@ func (b *OutMessage) GrowNoZero(size uintptr) (p unsafe.Pointer) {
return
}
// Throw away the last n bytes. Panics if n is out of range.
func (b *OutMessage) Shrink(n uintptr) {
if n > b.offset-OutMessageInitialSize {
panic(fmt.Sprintf("Shrink(%d) out of range for offset %d", n, b.offset))
}
b.offset -= n
}
// Equivalent to growing by the length of p, then copying p over the new
// segment. Panics if there is not enough room available.
func (b *OutMessage) Append(src []byte) {
......
......@@ -180,8 +180,7 @@ func (fs *flushFS) ReadFile(
}
// Read what we can.
op.Data = make([]byte, op.Size)
copy(op.Data, fs.fooContents[op.Offset:])
op.BytesRead = copy(op.Dst, fs.fooContents[op.Offset:])
return
}
......@@ -298,13 +297,15 @@ func (fs *flushFS) ReadDir(
// Fill in the listing.
for _, de := range dirents {
op.Data = fuseutil.AppendDirent(op.Data, de)
}
n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], de)
// We don't support doing this in anything more than one shot.
if len(op.Data) > op.Size {
err = fmt.Errorf("Couldn't fit listing in %v bytes", op.Size)
return
// We don't support doing this in anything more than one shot.
if n == 0 {
err = fmt.Errorf("Couldn't fit listing in %v bytes", len(op.Dst))
return
}
op.BytesRead += n
}
return
......
......@@ -228,11 +228,12 @@ func (fs *helloFS) ReadDir(
// Resume at the specified offset into the array.
for _, e := range entries {
op.Data = fuseutil.AppendDirent(op.Data, e)
if len(op.Data) > op.Size {
op.Data = op.Data[:op.Size]
n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], e)
if n == 0 {
break
}
op.BytesRead += n
}
return
......@@ -251,9 +252,7 @@ func (fs *helloFS) ReadFile(
// Let io.ReaderAt deal with the semantics.
reader := strings.NewReader("Hello, world!")
op.Data = make([]byte, op.Size)
n, err := reader.ReadAt(op.Data, op.Offset)
op.Data = op.Data[:n]
op.BytesRead, err = reader.ReadAt(op.Dst, op.Offset)
// Special case: FUSE doesn't expect us to return io.EOF.
if err == io.EOF {
......
......@@ -278,7 +278,7 @@ func (in *inode) RemoveChild(name string) {
// Serve a ReadDir request.
//
// REQUIRES: in.isDir()
func (in *inode) ReadDir(offset int, size int) (data []byte) {
func (in *inode) ReadDir(p []byte, offset int) (n int) {
if !in.isDir() {
panic("ReadDir called on non-directory.")
}
......@@ -291,13 +291,12 @@ func (in *inode) ReadDir(offset int, size int) (data []byte) {
continue
}
data = fuseutil.AppendDirent(data, in.entries[i])
// Trim and stop early if we've exceeded the requested size.
if len(data) > size {
data = data[:size]
tmp := fuseutil.WriteDirent(p[n:], in.entries[i])
if tmp == 0 {
break
}
n += tmp
}
return
......
......@@ -428,7 +428,9 @@ func (fs *memFS) Rename(
existingID, _, ok := newParent.LookUpChild(op.NewName)
if ok {
existing := fs.getInodeOrDie(existingID)
if existing.isDir() && len(existing.ReadDir(0, 1024)) > 0 {
var buf [4096]byte
if existing.isDir() && existing.ReadDir(buf[:], 0) > 0 {
err = fuse.ENOTEMPTY
return
}
......@@ -538,7 +540,7 @@ func (fs *memFS) ReadDir(
inode := fs.getInodeOrDie(op.Inode)
// Serve the request.
op.Data = inode.ReadDir(int(op.Offset), op.Size)
op.BytesRead = inode.ReadDir(op.Dst, int(op.Offset))
return
}
......@@ -571,9 +573,7 @@ func (fs *memFS) ReadFile(
inode := fs.getInodeOrDie(op.Inode)
// Serve the request.
op.Data = make([]byte, op.Size)
n, err := inode.ReadAt(op.Data, op.Offset)
op.Data = op.Data[:n]
op.BytesRead, err = inode.ReadAt(op.Dst, op.Offset)
// Don't return EOF errors; we just indicate EOF to fuse using a short read.
if err == io.EOF {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment