Commit 79480ca0 authored by Joe Tsai's avatar Joe Tsai Committed by Andrew Gerrand

archive/tar: fix bugs with sparseFileReader

The sparseFileReader is prone to two different forms of
denial-of-service attacks:
* A malicious tar file can cause an infinite loop
* A malicious tar file can cause arbitrary panics

This results because of poor error checking/handling, which this
CL fixes. While we are at it, add a plethora of unit tests to
test for possible malicious inputs.

Change-Id: I2f9446539d189f3c1738a1608b0ad4859c1be929
Reviewed-on: https://go-review.googlesource.com/15115Reviewed-by: default avatarAndrew Gerrand <adg@golang.org>
Run-TryBot: Andrew Gerrand <adg@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent b1797390
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"strconv" "strconv"
"strings" "strings"
...@@ -49,12 +50,36 @@ type regFileReader struct { ...@@ -49,12 +50,36 @@ type regFileReader struct {
nb int64 // number of unread bytes for current file entry nb int64 // number of unread bytes for current file entry
} }
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. // A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct { type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file sp []sparseEntry // The sparse map for the file
pos int64 // keeps track of file position pos int64 // Keeps track of file position
tot int64 // total size of the file total int64 // Total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
} }
// Keywords for GNU sparse files in a PAX extended header // Keywords for GNU sparse files in a PAX extended header
...@@ -128,7 +153,10 @@ func (tr *Reader) Next() (*Header, error) { ...@@ -128,7 +153,10 @@ func (tr *Reader) Next() (*Header, error) {
if sp != nil { if sp != nil {
// Current file is a PAX format GNU sparse file. // Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader. // Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
} }
return hdr, nil return hdr, nil
case TypeGNULongName: case TypeGNULongName:
...@@ -541,21 +569,17 @@ func (tr *Reader) readHeader() *Header { ...@@ -541,21 +569,17 @@ func (tr *Reader) readHeader() *Header {
if tr.err != nil { if tr.err != nil {
return nil return nil
} }
// Current file is a GNU sparse file. Update the current file reader. // Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil
}
} }
return hdr return hdr
} }
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map. // then one or more extension headers are used to store the rest of the sparse map.
...@@ -771,9 +795,33 @@ func (rfr *regFileReader) numBytes() int64 { ...@@ -771,9 +795,33 @@ func (rfr *regFileReader) numBytes() int64 {
return rfr.nb return rfr.nb
} }
// readHole reads a sparse file hole ending at offset toOffset // newSparseFileReader creates a new sparseFileReader, but validates all of the
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { // sparse entries before doing so.
n64 := toOffset - sfr.pos func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) { if n64 > int64(len(b)) {
n64 = int64(len(b)) n64 = int64(len(b))
} }
...@@ -787,49 +835,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { ...@@ -787,49 +835,54 @@ func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
// Read reads the sparse file data in expanded form. // Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
// Skip past all empty fragments.
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
sfr.sp = sfr.sp[1:]
}
// If there are no more fragments, then it is possible that there
// is one last sparse hole.
if len(sfr.sp) == 0 { if len(sfr.sp) == 0 {
// No more data fragments to read from. // This behavior matches the BSD tar utility.
if sfr.pos < sfr.tot { // However, GNU tar stops returning data even if sfr.total is unmet.
// We're in the last hole if sfr.pos < sfr.total {
n = sfr.readHole(b, sfr.tot) return sfr.readHole(b, sfr.total), nil
return
} }
// Otherwise, we're at the end of the file
return 0, io.EOF return 0, io.EOF
} }
if sfr.tot < sfr.sp[0].offset {
return 0, io.ErrUnexpectedEOF // In front of a data fragment, so read a hole.
}
if sfr.pos < sfr.sp[0].offset { if sfr.pos < sfr.sp[0].offset {
// We're in a hole return sfr.readHole(b, sfr.sp[0].offset), nil
n = sfr.readHole(b, sfr.sp[0].offset)
return
} }
// We're not in a hole, so we'll read from the next data fragment // In a data fragment, so read from it.
posInFragment := sfr.pos - sfr.sp[0].offset // This math is overflow free since we verify that offset and numBytes can
bytesLeft := sfr.sp[0].numBytes - posInFragment // be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft { if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft] b = b[:bytesLeft]
} }
n, err = sfr.rfr.Read(b) n, err = sfr.rfr.Read(b)
sfr.pos += int64(n) sfr.pos += int64(n)
if err == io.EOF {
if int64(n) == bytesLeft { if sfr.pos < endPos {
// We're done with this fragment err = io.ErrUnexpectedEOF // There was supposed to be more data
sfr.sp = sfr.sp[1:] } else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
}
} }
if err == io.EOF && sfr.pos < sfr.tot { if sfr.pos == endPos {
// We reached the end of the last fragment's data, but there's a final hole sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
err = nil
} }
return return n, err
} }
// numBytes returns the number of bytes left to read in the sparse file's // numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive. // sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 { func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb return sfr.rfr.numBytes()
} }
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"reflect" "reflect"
"strings" "strings"
...@@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) { ...@@ -560,80 +561,155 @@ func TestSparseEndToEnd(t *testing.T) {
} }
} }
type sparseFileReadTest struct { func TestSparseFileReader(t *testing.T) {
sparseData []byte var vectors = []struct {
sparseMap []sparseEntry realSize int64 // Real size of the output file
realSize int64 sparseMap []sparseEntry // Input sparse map
expected []byte sparseData string // Input compact data
} expected string // Expected output data
err error // Expected error outcome
var sparseFileReadTests = []sparseFileReadTest{ }{{
{ realSize: 8,
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{ sparseMap: []sparseEntry{
{offset: 0, numBytes: 2}, {offset: 0, numBytes: 2},
{offset: 5, numBytes: 3}, {offset: 5, numBytes: 3},
}, },
realSize: 8, sparseData: "abcde",
expected: []byte("ab\x00\x00\x00cde"), expected: "ab\x00\x00\x00cde",
}, }, {
{ realSize: 10,
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{ sparseMap: []sparseEntry{
{offset: 0, numBytes: 2}, {offset: 0, numBytes: 2},
{offset: 5, numBytes: 3}, {offset: 5, numBytes: 3},
}, },
realSize: 10, sparseData: "abcde",
expected: []byte("ab\x00\x00\x00cde\x00\x00"), expected: "ab\x00\x00\x00cde\x00\x00",
}, }, {
{ realSize: 8,
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{ sparseMap: []sparseEntry{
{offset: 1, numBytes: 3}, {offset: 1, numBytes: 3},
{offset: 6, numBytes: 2}, {offset: 6, numBytes: 2},
}, },
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 8, realSize: 8,
expected: []byte("\x00abc\x00\x00de"),
},
{
sparseData: []byte("abcde"),
sparseMap: []sparseEntry{ sparseMap: []sparseEntry{
{offset: 1, numBytes: 3}, {offset: 1, numBytes: 3},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 2}, {offset: 6, numBytes: 2},
}, },
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 10, realSize: 10,
expected: []byte("\x00abc\x00\x00de\x00\x00"), sparseMap: []sparseEntry{
}, {offset: 1, numBytes: 3},
{ {offset: 6, numBytes: 2},
sparseData: []byte(""), },
sparseMap: nil, sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 2, realSize: 2,
expected: []byte("\x00\x00"), sparseMap: []sparseEntry{},
}, sparseData: "",
} expected: "\x00\x00",
}, {
realSize: -2,
sparseMap: []sparseEntry{},
err: ErrHeader,
}, {
realSize: -10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: io.ErrUnexpectedEOF,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: math.MaxInt64, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 2, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}}
func TestSparseFileReader(t *testing.T) { for i, v := range vectors {
for i, test := range sparseFileReadTests { r := bytes.NewReader([]byte(v.sparseData))
r := bytes.NewReader(test.sparseData) rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
nb := int64(r.Len())
sfr := &sparseFileReader{ var sfr *sparseFileReader
rfr: &regFileReader{r: r, nb: nb}, var err error
sp: test.sparseMap, var buf []byte
pos: 0,
tot: test.realSize, sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
if err != nil {
goto fail
} }
if sfr.numBytes() != nb { if sfr.numBytes() != int64(len(v.sparseData)) {
t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
} }
buf, err := ioutil.ReadAll(sfr) buf, err = ioutil.ReadAll(sfr)
if err != nil { if err != nil {
t.Errorf("test %d: Unexpected error: %v", i, err) goto fail
} }
if e := test.expected; !bytes.Equal(buf, e) { if string(buf) != v.expected {
t.Errorf("test %d: Contents = %v, want %v", i, buf, e) t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
} }
if sfr.numBytes() != 0 { if sfr.numBytes() != 0 {
t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
}
fail:
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
} }
} }
} }
...@@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) { ...@@ -646,10 +722,10 @@ func TestSparseIncrementalRead(t *testing.T) {
r := bytes.NewReader(sparseData) r := bytes.NewReader(sparseData)
nb := int64(r.Len()) nb := int64(r.Len())
sfr := &sparseFileReader{ sfr := &sparseFileReader{
rfr: &regFileReader{r: r, nb: nb}, rfr: &regFileReader{r: r, nb: nb},
sp: sparseMap, sp: sparseMap,
pos: 0, pos: 0,
tot: int64(len(expected)), total: int64(len(expected)),
} }
// We'll read the data 6 bytes at a time, with a hole of size 10 at // We'll read the data 6 bytes at a time, with a hole of size 10 at
...@@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) { ...@@ -747,6 +823,11 @@ func TestUninitializedRead(t *testing.T) {
} }
// TODO(dsnet): TestNegativeHdrSize, TestIssue10968, and TestIssue11169 tests
// that Reader properly handles corrupted tar files. Given the increasing number
// of invalid/malicious that can crash Reader, we should modify TestReader to
// be able to test that intentionally corrupt tar files don't succeed or crash.
// Negative header size should not cause panic. // Negative header size should not cause panic.
// Issues 10959 and 10960. // Issues 10959 and 10960.
func TestNegativeHdrSize(t *testing.T) { func TestNegativeHdrSize(t *testing.T) {
...@@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) { ...@@ -771,14 +852,11 @@ func TestIssue10968(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
defer f.Close() defer f.Close()
r := NewReader(f) r := NewReader(f)
_, err = r.Next() _, err = r.Next()
if err != nil { if err == nil {
t.Fatal(err) t.Fatal("Unexpected success")
}
_, err = io.Copy(ioutil.Discard, r)
if err != io.ErrUnexpectedEOF {
t.Fatalf("expected %q, got %q", io.ErrUnexpectedEOF, err)
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment