Commit ef3df189 authored by Russ Cox's avatar Russ Cox

mime/multipart: simplify Part.Read

The basic structure of Part.Read should be simple:
do what you can with the current buffered data,
reading more as you need it. Make it that way.

Working entirely in the bufio.Reader's buffer eliminates
the need for an additional bytes.Buffer.

This structure should be easier to extend in the future as
more special cases arise.

Change-Id: I83cb24a755a1767c4c037f9ece6716460c3ecd01
Reviewed-on: https://go-review.googlesource.com/32092
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 14f3284d
......@@ -42,9 +42,7 @@ type Part struct {
// during Read calls.
Header textproto.MIMEHeader
buffer *bytes.Buffer
mr *Reader
bytesRead int
mr *Reader
disposition string
dispositionParams map[string]string
......@@ -53,6 +51,11 @@ type Part struct {
// wrapper around such a reader, decoding the
// Content-Transfer-Encoding
r io.Reader
n int // known data bytes waiting in mr.bufReader
total int64 // total data bytes read already
err error // error to return when n == 0
readErr error // read error observed from mr.bufReader
}
// FormName returns the name parameter if p has a Content-Disposition
......@@ -126,7 +129,6 @@ func newPart(mr *Reader) (*Part, error) {
bp := &Part{
Header: make(map[string][]string),
mr: mr,
buffer: new(bytes.Buffer),
}
if err := bp.populateHeaders(); err != nil {
return nil, err
......@@ -161,65 +163,118 @@ type partReader struct {
p *Part
}
func (pr partReader) Read(d []byte) (n int, err error) {
func (pr partReader) Read(d []byte) (int, error) {
p := pr.p
defer func() {
p.bytesRead += n
}()
if p.buffer.Len() >= len(d) {
// Internal buffer of unconsumed data is large enough for
// the read request. No need to parse more at the moment.
return p.buffer.Read(d)
br := p.mr.bufReader
// Read into buffer until we identify some data to return,
// or we find a reason to stop (boundary or read error).
for p.n == 0 && p.err == nil {
peek, _ := br.Peek(br.Buffered())
p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
if p.n == 0 && p.err == nil {
// Force buffered I/O to read more into buffer.
_, p.readErr = br.Peek(len(peek) + 1)
if p.readErr == io.EOF {
p.readErr = io.ErrUnexpectedEOF
}
}
}
peek, err := p.mr.bufReader.Peek(peekBufferSize) // TODO(bradfitz): add buffer size accessor
// Look for an immediate empty part without a leading \r\n
// before the boundary separator. Some MIME code makes empty
// parts like this. Most browsers, however, write the \r\n
// before the subsequent boundary even for empty parts and
// won't hit this path.
if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
return 0, io.EOF
// Read out from "data to return" part of buffer.
if p.n == 0 {
return 0, p.err
}
unexpectedEOF := err == io.EOF
if err != nil && !unexpectedEOF {
return 0, fmt.Errorf("multipart: Part Read: %v", err)
n := len(d)
if n > p.n {
n = p.n
}
if peek == nil {
panic("nil peek buf")
n, _ = br.Read(d[:n])
p.total += int64(n)
p.n -= n
if p.n == 0 {
return n, p.err
}
// Search the peek buffer for "\r\n--boundary". If found,
// consume everything up to the boundary. If not, consume only
// as much of the peek buffer as cannot hold the boundary
// string.
nCopy := 0
foundBoundary := false
if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 {
nCopy = idx
foundBoundary = isEnd
if !isEnd && nCopy == 0 {
nCopy = 1 // make some progress.
return n, nil
}
// scanUntilBoundary scans buf to identify how much of it can be safely
// returned as part of the Part body.
// dashBoundary is "--boundary".
// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
// The comments below (and the name) assume "\n--boundary", but either is accepted.
// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
// readErr is the read error, if any, that followed reading the bytes in buf.
// scanUntilBoundary returns the number of data bytes from buf that can be
// returned as part of the Part body and also the error to return (if any)
// once those data bytes are done.
func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
if total == 0 {
// At beginning of body, allow dashBoundary.
if bytes.HasPrefix(buf, dashBoundary) {
switch matchAfterPrefix(buf, dashBoundary, readErr) {
case -1:
return len(dashBoundary), nil
case 0:
return 0, nil
case +1:
return 0, io.EOF
}
}
if bytes.HasPrefix(dashBoundary, buf) {
return 0, readErr
}
} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
nCopy = safeCount
} else if unexpectedEOF {
// If we've run out of peek buffer and the boundary
// wasn't found (and can't possibly fit), we must have
// hit the end of the file unexpectedly.
return 0, io.ErrUnexpectedEOF
}
if nCopy > 0 {
if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil {
return 0, err
// Search for "\n--boundary".
if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
case -1:
return i + len(nlDashBoundary), nil
case 0:
return i, nil
case +1:
return i, io.EOF
}
}
if bytes.HasPrefix(nlDashBoundary, buf) {
return 0, readErr
}
// Otherwise, anything up to the final \n is not part of the boundary
// and so must be part of the body.
// Also if the section from the final \n onward is not a prefix of the boundary,
// it too must be part of the body.
i := bytes.LastIndexByte(buf, nlDashBoundary[0])
if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
return i, nil
}
return len(buf), readErr
}
// matchAfterPrefix checks whether buf should be considered to match the boundary.
// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
//
// matchAfterPrefix returns +1 if the buffer does match the boundary,
// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input.
// It returns -1 if the buffer definitely does NOT match the boundary,
// meaning the prefix is followed by some other character.
// For example, "--foobar" does not match "--foo".
// It returns 0 more input needs to be read to make the decision,
// meaning that len(buf) == len(prefix) and readErr == nil.
func matchAfterPrefix(buf, prefix []byte, readErr error) int {
if len(buf) == len(prefix) {
if readErr != nil {
return +1
}
return 0
}
n, err = p.buffer.Read(d)
if err == io.EOF && !foundBoundary {
// If the boundary hasn't been reached there's more to
// read, so don't pass through an EOF from the buffer
err = nil
c := buf[len(prefix)]
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' {
return +1
}
return
return -1
}
func (p *Part) Close() error {
......@@ -337,64 +392,6 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
return bytes.Equal(rest, mr.nl)
}
// peekBufferIsEmptyPart reports whether the provided peek-ahead
// buffer represents an empty part. It is called only if we've not
// already read any bytes in this part and checks for the case of MIME
// software not writing the \r\n on empty parts. Some does, some
// doesn't.
//
// This checks that what follows the "--boundary" is actually the end
// ("--boundary--" with optional whitespace) or optional whitespace
// and then a newline, so we don't catch "--boundaryFAKE", in which
// case the whole line is part of the data.
func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
// End of parts case.
// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
rest := peek[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
}
if !bytes.HasPrefix(peek, mr.dashBoundary) {
return false
}
// Test whether rest matches `^[ \t]*\r\n`)
rest := peek[len(mr.dashBoundary):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl)
}
// peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in
// peek and whether it is a real boundary (and not a prefix of an
// unrelated separator). To be the end, the peek buffer must contain a
// newline after the boundary or contain the ending boundary (--separator--).
func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) {
idx = bytes.Index(peek, mr.nlDashBoundary)
if idx == -1 {
return
}
peek = peek[idx+len(mr.nlDashBoundary):]
if len(peek) == 0 || len(peek) == 1 && peek[0] == '-' {
return idx, false
}
if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' {
return idx, true
}
peek = skipLWSPChar(peek)
// Don't have a complete line after the peek.
if bytes.IndexByte(peek, '\n') == -1 {
return idx, false
}
if len(peek) > 0 && peek[0] == '\n' {
return idx, true
}
if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' {
return idx, true
}
return idx, false
}
// skipLWSPChar returns b with leading spaces and tabs removed.
// RFC 822 defines:
// LWSP-char = SPACE / HTAB
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment