Commit 3bece2fa authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: refactor Reader support for sparse files

This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.

As a result of this CL, there are some new publicly visible API changes:
	type SparseEntry struct { Offset, Length int64 }
	type Header struct { ...; SparseHoles []SparseEntry }

A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.

It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.

Some unexported helper functions were added to common.go:
	func validateSparseEntries(sp []SparseEntry, size int64) bool
	func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
	func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry

The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).

Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.

Updates #13548

Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent b2174a16
...@@ -15,6 +15,7 @@ package tar ...@@ -15,6 +15,7 @@ package tar
import ( import (
"errors" "errors"
"fmt" "fmt"
"math"
"os" "os"
"path" "path"
"strconv" "strconv"
...@@ -30,6 +31,8 @@ var ( ...@@ -30,6 +31,8 @@ var (
ErrWriteTooLong = errors.New("tar: write too long") ErrWriteTooLong = errors.New("tar: write too long")
ErrFieldTooLong = errors.New("tar: header field too long") ErrFieldTooLong = errors.New("tar: header field too long")
ErrWriteAfterClose = errors.New("tar: write after close") ErrWriteAfterClose = errors.New("tar: write after close")
errMissData = errors.New("tar: sparse file references non-existent data")
errUnrefData = errors.New("tar: sparse file contains unreferenced data")
) )
// Header type flags. // Header type flags.
...@@ -68,6 +71,131 @@ type Header struct { ...@@ -68,6 +71,131 @@ type Header struct {
AccessTime time.Time // access time AccessTime time.Time // access time
ChangeTime time.Time // status change time ChangeTime time.Time // status change time
Xattrs map[string]string Xattrs map[string]string
// SparseHoles represents a sequence of holes in a sparse file.
//
// The regions must be sorted in ascending order, not overlap with
// each other, and not extend past the specified Size.
// The file is sparse if either len(SparseHoles) > 0 or
// the Typeflag is set to TypeGNUSparse.
SparseHoles []SparseEntry
}
// SparseEntry represents a Length-sized fragment at Offset in the file.
type SparseEntry struct{ Offset, Length int64 }
func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length }
// A sparse file can be represented as either a sparseDatas or a sparseHoles.
// As long as the total size is known, they are equivalent and one can be
// converted to the other form and back. The various tar formats with sparse
// file support represent sparse files in the sparseDatas form. That is, they
// specify the fragments in the file that has data, and treat everything else as
// having zero bytes. As such, the encoding and decoding logic in this package
// deals with sparseDatas.
//
// However, the external API uses sparseHoles instead of sparseDatas because the
// zero value of sparseHoles logically represents a normal file (i.e., there are
// no holes in it). On the other hand, the zero value of sparseDatas implies
// that the file has no data in it, which is rather odd.
//
// As an example, if the underlying raw file contains the 10-byte data:
// var compactFile = "abcdefgh"
//
// And the sparse map has the following entries:
// var spd sparseDatas = []sparseEntry{
// {Offset: 2, Length: 5}, // Data fragment for 2..6
// {Offset: 18, Length: 3}, // Data fragment for 18..20
// }
// var sph sparseHoles = []SparseEntry{
// {Offset: 0, Length: 2}, // Hole fragment for 0..1
// {Offset: 7, Length: 11}, // Hole fragment for 7..17
// {Offset: 21, Length: 4}, // Hole fragment for 21..24
// }
//
// Then the content of the resulting sparse file with a Header.Size of 25 is:
// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type (
sparseDatas []SparseEntry
sparseHoles []SparseEntry
)
// validateSparseEntries reports whether sp is a valid sparse map.
// It does not matter whether sp represents data fragments or hole fragments.
func validateSparseEntries(sp []SparseEntry, size int64) bool {
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
if size < 0 {
return false
}
var pre SparseEntry
for _, cur := range sp {
switch {
case cur.Offset < 0 || cur.Length < 0:
return false // Negative values are never okay
case cur.Offset > math.MaxInt64-cur.Length:
return false // Integer overflow with large length
case cur.endOffset() > size:
return false // Region extends beyond the actual size
case pre.endOffset() > cur.Offset:
return false // Regions cannot overlap and must be in order
}
pre = cur
}
return true
}
// alignSparseEntries mutates src and returns dst where each fragment's
// starting offset is aligned up to the nearest block edge, and each
// ending offset is aligned down to the nearest block edge.
//
// Even though the Go tar Reader and the BSD tar utility can handle entries
// with arbitrary offsets and lengths, the GNU tar utility can only handle
// offsets and lengths that are multiples of blockSize.
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry {
dst := src[:0]
for _, s := range src {
pos, end := s.Offset, s.endOffset()
pos += blockPadding(+pos) // Round-up to nearest blockSize
if end != size {
end -= blockPadding(-end) // Round-down to nearest blockSize
}
if pos < end {
dst = append(dst, SparseEntry{Offset: pos, Length: end - pos})
}
}
return dst
}
// invertSparseEntries converts a sparse map from one form to the other.
// If the input is sparseHoles, then it will output sparseDatas and vice-versa.
// The input must have been already validated.
//
// This function mutates src and returns a normalized map where:
// * adjacent fragments are coalesced together
// * only the last fragment may be empty
// * the endOffset of the last fragment is the total size
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry {
dst := src[:0]
var pre SparseEntry
for _, cur := range src {
if cur.Length == 0 {
continue // Skip empty fragments
}
pre.Length = cur.Offset - pre.Offset
if pre.Length > 0 {
dst = append(dst, pre) // Only add non-empty fragments
}
pre.Offset = cur.endOffset()
}
pre.Length = size - pre.Offset // Possibly the only empty fragment
return append(dst, pre)
}
type fileState interface {
// Remaining reports the number of remaining bytes in the current file.
// This count includes any sparse holes that may exist.
Remaining() int64
} }
// FileInfo returns an os.FileInfo for the Header. // FileInfo returns an os.FileInfo for the Header.
...@@ -300,6 +428,17 @@ const ( ...@@ -300,6 +428,17 @@ const (
paxUname = "uname" paxUname = "uname"
paxXattr = "SCHILY.xattr." paxXattr = "SCHILY.xattr."
paxNone = "" paxNone = ""
// Keywords for GNU sparse files in a PAX extended header.
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
paxGNUSparseOffset = "GNU.sparse.offset"
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
paxGNUSparseMap = "GNU.sparse.map"
paxGNUSparseName = "GNU.sparse.name"
paxGNUSparseMajor = "GNU.sparse.major"
paxGNUSparseMinor = "GNU.sparse.minor"
paxGNUSparseSize = "GNU.sparse.size"
paxGNUSparseRealSize = "GNU.sparse.realsize"
) )
// FileInfoHeader creates a partially-populated Header from fi. // FileInfoHeader creates a partially-populated Header from fi.
...@@ -373,6 +512,9 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { ...@@ -373,6 +512,9 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
h.Size = 0 h.Size = 0
h.Linkname = sys.Linkname h.Linkname = sys.Linkname
} }
if sys.SparseHoles != nil {
h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...)
}
} }
if sysStat != nil { if sysStat != nil {
return h, sysStat(fi, h) return h, sysStat(fi, h)
...@@ -390,3 +532,10 @@ func isHeaderOnlyType(flag byte) bool { ...@@ -390,3 +532,10 @@ func isHeaderOnlyType(flag byte) bool {
return false return false
} }
} }
func min(a, b int64) int64 {
if a < b {
return a
}
return b
}
...@@ -50,6 +50,12 @@ const ( ...@@ -50,6 +50,12 @@ const (
prefixSize = 155 // Max length of the prefix field in USTAR format prefixSize = 155 // Max length of the prefix field in USTAR format
) )
// blockPadding computes the number of bytes needed to pad offset up to the
// nearest block edge where 0 <= n < blockSize.
func blockPadding(offset int64) (n int64) {
return -offset & (blockSize - 1)
}
var zeroBlock block var zeroBlock block
type block [blockSize]byte type block [blockSize]byte
...@@ -192,11 +198,11 @@ func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } ...@@ -192,11 +198,11 @@ func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
type sparseArray []byte type sparseArray []byte
func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) } func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
func (s sparseArray) MaxEntries() int { return len(s) / 24 } func (s sparseArray) MaxEntries() int { return len(s) / 24 }
type sparseNode []byte type sparseElem []byte
func (s sparseNode) Offset() []byte { return s[00:][:12] } func (s sparseElem) Offset() []byte { return s[00:][:12] }
func (s sparseNode) NumBytes() []byte { return s[12:][:12] } func (s sparseElem) Length() []byte { return s[12:][:12] }
...@@ -11,7 +11,6 @@ import ( ...@@ -11,7 +11,6 @@ import (
"bytes" "bytes"
"io" "io"
"io/ioutil" "io/ioutil"
"math"
"strconv" "strconv"
"strings" "strings"
"time" "time"
...@@ -23,9 +22,9 @@ import ( ...@@ -23,9 +22,9 @@ import (
// and then it can be treated as an io.Reader to access the file's data. // and then it can be treated as an io.Reader to access the file's data.
type Reader struct { type Reader struct {
r io.Reader r io.Reader
pad int64 // amount of padding (ignored) after current file entry pad int64 // Amount of padding (ignored) after current file entry
curr numBytesReader // reader for current file entry curr fileReader // Reader for current file entry
blk block // buffer to use as temporary local storage blk block // Buffer to use as temporary local storage
// err is a persistent error. // err is a persistent error.
// It is only the responsibility of every exported method of Reader to // It is only the responsibility of every exported method of Reader to
...@@ -33,66 +32,17 @@ type Reader struct { ...@@ -33,66 +32,17 @@ type Reader struct {
err error err error
} }
// A numBytesReader is an io.Reader with a numBytes method, returning the number type fileReader interface {
// of bytes remaining in the underlying encoded data.
type numBytesReader interface {
io.Reader io.Reader
numBytes() int64 fileState
}
// A regFileReader is a numBytesReader for reading file data from a tar archive. Discard(n int64) (int64, error)
type regFileReader struct {
r io.Reader // underlying reader
nb int64 // number of unread bytes for current file entry
}
// A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct {
rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // The sparse map for the file
pos int64 // Keeps track of file position
total int64 // Total size of the file
} }
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
}
// Keywords for GNU sparse files in a PAX extended header
const (
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
paxGNUSparseOffset = "GNU.sparse.offset"
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
paxGNUSparseMap = "GNU.sparse.map"
paxGNUSparseName = "GNU.sparse.name"
paxGNUSparseMajor = "GNU.sparse.major"
paxGNUSparseMinor = "GNU.sparse.minor"
paxGNUSparseSize = "GNU.sparse.size"
paxGNUSparseRealSize = "GNU.sparse.realsize"
)
// NewReader creates a new Reader reading from r. // NewReader creates a new Reader reading from r.
func NewReader(r io.Reader) *Reader { return &Reader{r: r} } func NewReader(r io.Reader) *Reader {
return &Reader{r: r, curr: &regFileReader{r, 0}}
}
// Next advances to the next entry in the tar archive. // Next advances to the next entry in the tar archive.
// //
...@@ -116,9 +66,15 @@ func (tr *Reader) next() (*Header, error) { ...@@ -116,9 +66,15 @@ func (tr *Reader) next() (*Header, error) {
// one or more "header files" until it finds a "normal file". // one or more "header files" until it finds a "normal file".
loop: loop:
for { for {
if err := tr.skipUnread(); err != nil { // Discard the remainder of the file and any padding.
if _, err := tr.curr.Discard(tr.curr.Remaining()); err != nil {
return nil, err
}
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
return nil, err return nil, err
} }
tr.pad = 0
hdr, rawHdr, err := tr.readHeader() hdr, rawHdr, err := tr.readHeader()
if err != nil { if err != nil {
return nil, err return nil, err
...@@ -192,7 +148,7 @@ func (tr *Reader) handleRegularFile(hdr *Header) error { ...@@ -192,7 +148,7 @@ func (tr *Reader) handleRegularFile(hdr *Header) error {
return ErrHeader return ErrHeader
} }
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two tr.pad = blockPadding(nb)
tr.curr = &regFileReader{r: tr.r, nb: nb} tr.curr = &regFileReader{r: tr.r, nb: nb}
return nil return nil
} }
...@@ -200,87 +156,70 @@ func (tr *Reader) handleRegularFile(hdr *Header) error { ...@@ -200,87 +156,70 @@ func (tr *Reader) handleRegularFile(hdr *Header) error {
// handleSparseFile checks if the current file is a sparse format of any type // handleSparseFile checks if the current file is a sparse format of any type
// and sets the curr reader appropriately. // and sets the curr reader appropriately.
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error {
var sp []sparseEntry var spd sparseDatas
var err error var err error
if hdr.Typeflag == TypeGNUSparse { if hdr.Typeflag == TypeGNUSparse {
sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
if err != nil {
return err
}
} else { } else {
sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) spd, err = tr.readGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil {
return err
}
} }
// If sp is non-nil, then this is a sparse file. // If sp is non-nil, then this is a sparse file.
// Note that it is possible for len(sp) to be zero. // Note that it is possible for len(sp) == 0.
if sp != nil { if err == nil && spd != nil {
tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
return ErrHeader
}
sph := invertSparseEntries(spd, hdr.Size)
tr.curr = &sparseFileReader{tr.curr, sph, 0}
hdr.SparseHoles = append([]SparseEntry{}, sph...)
} }
return err return err
} }
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then // readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to // If they are found, then this function reads the sparse map and returns it.
// be treated as a regular file. // This assumes that 0.0 headers have already been converted to 0.1 headers
func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { // by the the PAX header parsing logic.
var sparseFormat string func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header, extHdrs map[string]string) (sparseDatas, error) {
// Identify the version of GNU headers.
// Check for sparse format indicators var is1x0 bool
major, majorOk := headers[paxGNUSparseMajor] major, minor := extHdrs[paxGNUSparseMajor], extHdrs[paxGNUSparseMinor]
minor, minorOk := headers[paxGNUSparseMinor] switch {
sparseName, sparseNameOk := headers[paxGNUSparseName] case major == "0" && (minor == "0" || minor == "1"):
_, sparseMapOk := headers[paxGNUSparseMap] is1x0 = false
sparseSize, sparseSizeOk := headers[paxGNUSparseSize] case major == "1" && minor == "0":
sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] is1x0 = true
case major != "" || minor != "":
// Identify which, if any, sparse format applies from which PAX headers are set return nil, nil // Unknown GNU sparse PAX version
if majorOk && minorOk { case extHdrs[paxGNUSparseMap] != "":
sparseFormat = major + "." + minor is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
} else if sparseNameOk && sparseMapOk { default:
sparseFormat = "0.1" return nil, nil // Not a PAX format GNU sparse file.
} else if sparseSizeOk { }
sparseFormat = "0.0"
} else { // Update hdr from GNU sparse PAX headers.
// Not a PAX format GNU sparse file. if name := extHdrs[paxGNUSparseName]; name != "" {
return nil, nil hdr.Name = name
} }
size := extHdrs[paxGNUSparseSize]
// Check for unknown sparse format if size == "" {
if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { size = extHdrs[paxGNUSparseRealSize]
return nil, nil }
} if size != "" {
n, err := strconv.ParseInt(size, 10, 64)
// Update hdr from GNU sparse PAX headers
if sparseNameOk {
hdr.Name = sparseName
}
if sparseSizeOk {
realSize, err := strconv.ParseInt(sparseSize, 10, 64)
if err != nil { if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
hdr.Size = realSize hdr.Size = n
} else if sparseRealSizeOk {
realSize, err := strconv.ParseInt(sparseRealSize, 10, 64)
if err != nil {
return nil, ErrHeader
}
hdr.Size = realSize
} }
// Set up the sparse map, according to the particular sparse format in use // Read the sparse map according to the appropriate format.
var sp []sparseEntry if is1x0 {
var err error return readGNUSparseMap1x0(tr.curr)
switch sparseFormat { } else {
case "0.0", "0.1": return readGNUSparseMap0x1(extHdrs)
sp, err = readGNUSparseMap0x1(headers)
case "1.0":
sp, err = readGNUSparseMap1x0(tr.curr)
} }
return sp, err
} }
// mergePAX merges well known headers according to PAX standard. // mergePAX merges well known headers according to PAX standard.
...@@ -376,45 +315,6 @@ func parsePAX(r io.Reader) (map[string]string, error) { ...@@ -376,45 +315,6 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return extHdrs, nil return extHdrs, nil
} }
// skipUnread skips any unread bytes in the existing file entry, as well as any
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
// encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
if err != nil {
return err
}
seekSkipped = pos2 - pos1
}
}
copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if err == io.EOF && seekSkipped+copySkipped < dataSkip {
err = io.ErrUnexpectedEOF
}
return err
}
// readHeader reads the next block header and assumes that the underlying reader // readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary. It returns the raw block of the // is already aligned to a block boundary. It returns the raw block of the
// header in case further processing is required. // header in case further processing is required.
...@@ -530,7 +430,7 @@ func (tr *Reader) readHeader() (*Header, *block, error) { ...@@ -530,7 +430,7 @@ func (tr *Reader) readHeader() (*Header, *block, error) {
// The Header.Size does not reflect the size of any extended headers used. // The Header.Size does not reflect the size of any extended headers used.
// Thus, this function will read from the raw io.Reader to fetch extra headers. // Thus, this function will read from the raw io.Reader to fetch extra headers.
// This method mutates blk in the process. // This method mutates blk in the process.
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
// Make sure that the input format is GNU. // Make sure that the input format is GNU.
// Unfortunately, the STAR format also has a sparse header format that uses // Unfortunately, the STAR format also has a sparse header format that uses
// the same type flag but has a completely different layout. // the same type flag but has a completely different layout.
...@@ -543,8 +443,8 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e ...@@ -543,8 +443,8 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
if p.err != nil { if p.err != nil {
return nil, p.err return nil, p.err
} }
var s sparseArray = blk.GNU().Sparse() s := blk.GNU().Sparse()
var sp = make([]sparseEntry, 0, s.MaxEntries()) spd := make(sparseDatas, 0, s.MaxEntries())
for { for {
for i := 0; i < s.MaxEntries(); i++ { for i := 0; i < s.MaxEntries(); i++ {
// This termination condition is identical to GNU and BSD tar. // This termination condition is identical to GNU and BSD tar.
...@@ -552,25 +452,22 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e ...@@ -552,25 +452,22 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
break // Don't return, need to process extended headers (even if empty) break // Don't return, need to process extended headers (even if empty)
} }
offset := p.parseNumeric(s.Entry(i).Offset()) offset := p.parseNumeric(s.Entry(i).Offset())
numBytes := p.parseNumeric(s.Entry(i).NumBytes()) length := p.parseNumeric(s.Entry(i).Length())
if p.err != nil { if p.err != nil {
return nil, p.err return nil, p.err
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) spd = append(spd, SparseEntry{Offset: offset, Length: length})
} }
if s.IsExtended()[0] > 0 { if s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries. // There are more entries. Read an extension header and parse its entries.
if _, err := io.ReadFull(tr.r, blk[:]); err != nil { if _, err := mustReadFull(tr.r, blk[:]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return nil, err return nil, err
} }
s = blk.Sparse() s = blk.Sparse()
continue continue
} }
return sp, nil // Done return spd, nil // Done
} }
} }
...@@ -578,28 +475,27 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e ...@@ -578,28 +475,27 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
// version 1.0. The format of the sparse map consists of a series of // version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries // newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two // and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end // fields (offset, length). This function must stop reading at the end
// boundary of the block containing the last newline. // boundary of the block containing the last newline.
// //
// Note that the GNU manual says that numeric values should be encoded in octal // Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal. // format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal. // As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
var cntNewline int64 var (
var buf bytes.Buffer cntNewline int64
var blk = make([]byte, blockSize) buf bytes.Buffer
blk block
// feedTokens copies data in numBlock chunks from r into buf until there are )
// feedTokens copies data in blocks from r into buf until there are
// at least cnt newlines in buf. It will not read more blocks than needed. // at least cnt newlines in buf. It will not read more blocks than needed.
var feedTokens = func(cnt int64) error { feedTokens := func(n int64) error {
for cntNewline < cnt { for cntNewline < n {
if _, err := io.ReadFull(r, blk); err != nil { if _, err := mustReadFull(r, blk[:]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err return err
} }
buf.Write(blk) buf.Write(blk[:])
for _, c := range blk { for _, c := range blk {
if c == '\n' { if c == '\n' {
cntNewline++ cntNewline++
...@@ -611,10 +507,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { ...@@ -611,10 +507,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
// nextToken gets the next token delimited by a newline. This assumes that // nextToken gets the next token delimited by a newline. This assumes that
// at least one newline exists in the buffer. // at least one newline exists in the buffer.
var nextToken = func() string { nextToken := func() string {
cntNewline-- cntNewline--
tok, _ := buf.ReadString('\n') tok, _ := buf.ReadString('\n')
return tok[:len(tok)-1] // Cut off newline return strings.TrimRight(tok, "\n")
} }
// Parse for the number of entries. // Parse for the number of entries.
...@@ -633,24 +529,21 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { ...@@ -633,24 +529,21 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
if err := feedTokens(2 * numEntries); err != nil { if err := feedTokens(2 * numEntries); err != nil {
return nil, err return nil, err
} }
sp := make([]sparseEntry, 0, numEntries) spd := make(sparseDatas, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(nextToken(), 10, 64) offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
if err != nil { length, err2 := strconv.ParseInt(nextToken(), 10, 64)
return nil, ErrHeader if err1 != nil || err2 != nil {
}
numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
if err != nil {
return nil, ErrHeader return nil, ErrHeader
} }
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) spd = append(spd, SparseEntry{Offset: offset, Length: length})
} }
return sp, nil return spd, nil
} }
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// version 0.1. The sparse map is stored in the PAX headers. // version 0.1. The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { func readGNUSparseMap0x1(extHdrs map[string]string) (sparseDatas, error) {
// Get number of entries. // Get number of entries.
// Use integer overflow resistant math to check this. // Use integer overflow resistant math to check this.
numEntriesStr := extHdrs[paxGNUSparseNumBlocks] numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
...@@ -661,52 +554,42 @@ func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { ...@@ -661,52 +554,42 @@ func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
// There should be two numbers in sparseMap for each entry. // There should be two numbers in sparseMap for each entry.
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
if len(sparseMap) == 1 && sparseMap[0] == "" {
sparseMap = sparseMap[:0]
}
if int64(len(sparseMap)) != 2*numEntries { if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader return nil, ErrHeader
} }
// Loop through the entries in the sparse map. // Loop through the entries in the sparse map.
// numEntries is trusted now. // numEntries is trusted now.
sp := make([]sparseEntry, 0, numEntries) spd := make(sparseDatas, 0, numEntries)
for i := int64(0); i < numEntries; i++ { for len(sparseMap) >= 2 {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
if err != nil { length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
if err1 != nil || err2 != nil {
return nil, ErrHeader return nil, ErrHeader
} }
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) spd = append(spd, SparseEntry{Offset: offset, Length: length})
if err != nil { sparseMap = sparseMap[2:]
return nil, ErrHeader
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
}
// numBytes returns the number of bytes left to read in the current file's entry
// in the tar archive, or 0 if there is no current file.
func (tr *Reader) numBytes() int64 {
if tr.curr == nil {
// No current file, so no bytes
return 0
} }
return tr.curr.numBytes() return spd, nil
} }
// Read reads from the current entry in the tar archive. // Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry, // It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry. // until Next is called to advance to the next entry.
// //
// If the current file is sparse, then the regions marked as a sparse hole
// will read back NUL-bytes.
//
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
// the Header.Size claims. // the Header.Size claims.
func (tr *Reader) Read(b []byte) (int, error) { func (tr *Reader) Read(b []byte) (int, error) {
if tr.err != nil { if tr.err != nil {
return 0, tr.err return 0, tr.err
} }
if tr.curr == nil {
return 0, io.EOF
}
n, err := tr.curr.Read(b) n, err := tr.curr.Read(b)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
tr.err = err tr.err = err
...@@ -714,116 +597,210 @@ func (tr *Reader) Read(b []byte) (int, error) { ...@@ -714,116 +597,210 @@ func (tr *Reader) Read(b []byte) (int, error) {
return n, err return n, err
} }
func (rfr *regFileReader) Read(b []byte) (n int, err error) { // TODO(dsnet): Export the Reader.Discard method to assist in quickly
if rfr.nb == 0 { // skipping over sections of a file. This is especially useful:
// file consumed // * when skipping through an underlying io.Reader that is also an io.Seeker.
return 0, io.EOF // * when skipping over large holes in a sparse file.
}
if int64(len(b)) > rfr.nb {
b = b[0:rfr.nb]
}
n, err = rfr.r.Read(b)
rfr.nb -= int64(n)
if err == io.EOF && rfr.nb > 0 { // discard skips the next n bytes in the current file,
err = io.ErrUnexpectedEOF // returning the number of bytes discarded.
// If fewer than n bytes are discarded, it returns an non-nil error,
// which may be io.EOF if there are no more remaining bytes in the current file.
func (tr *Reader) discard(n int64) (int64, error) {
if tr.err != nil {
return 0, tr.err
} }
return n, err := tr.curr.Discard(n)
if err != nil && err != io.EOF {
tr.err = err
}
return n, err
} }
// numBytes returns the number of bytes left to read in the file's data in the tar archive. // regFileReader is a fileReader for reading data from a regular file entry.
func (rfr *regFileReader) numBytes() int64 { type regFileReader struct {
return rfr.nb r io.Reader // Underlying Reader
nb int64 // Number of remaining bytes to read
} }
// newSparseFileReader creates a new sparseFileReader, but validates all of the func (fr *regFileReader) Read(b []byte) (int, error) {
// sparse entries before doing so. if int64(len(b)) > fr.nb {
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { b = b[:fr.nb]
if total < 0 { }
return nil, ErrHeader // Total size cannot be negative n, err := fr.r.Read(b)
} fr.nb -= int64(n)
switch {
// Validate all sparse entries. These are the same checks as performed by case err == io.EOF && fr.nb > 0:
// the BSD tar utility. return n, io.ErrUnexpectedEOF
for i, s := range sp { case err == nil && fr.nb == 0:
switch { return n, io.EOF
case s.offset < 0 || s.numBytes < 0: default:
return nil, ErrHeader // Negative values are never okay return n, err
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
} }
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
} }
// readHole reads a sparse hole ending at endOffset. func (fr *regFileReader) Discard(n int64) (int64, error) {
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { overread := n > fr.Remaining()
n64 := endOffset - sfr.pos if overread {
if n64 > int64(len(b)) { n = fr.Remaining()
n64 = int64(len(b))
} }
n := int(n64)
for i := 0; i < n; i++ { // If possible, Seek to the last byte before the end of the data section.
b[i] = 0 // Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := fr.r.(io.Seeker); ok && n > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if pos1 >= 0 && err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(n-1, io.SeekCurrent)
if pos2 < 0 || err != nil {
return 0, err
}
seekSkipped = pos2 - pos1
}
} }
sfr.pos += n64
return n
}
// Read reads the sparse file data in expanded form. copySkipped, err := io.CopyN(ioutil.Discard, fr.r, n-seekSkipped)
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { discarded := seekSkipped + copySkipped
// Skip past all empty fragments. fr.nb -= discarded
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { switch {
sfr.sp = sfr.sp[1:] case err == io.EOF && discarded < n:
return discarded, io.ErrUnexpectedEOF
case err == nil && overread:
return discarded, io.EOF
default:
return discarded, err
} }
}
// If there are no more fragments, then it is possible that there func (rf regFileReader) Remaining() int64 {
// is one last sparse hole. return rf.nb
if len(sfr.sp) == 0 { }
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet. // sparseFileReader is a fileReader for reading data from a sparse file entry.
if sfr.pos < sfr.total { type sparseFileReader struct {
return sfr.readHole(b, sfr.total), nil fr fileReader // Underlying fileReader
sp sparseHoles // Normalized list of sparse holes
pos int64 // Current position in sparse file
}
func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
finished := int64(len(b)) >= sr.Remaining()
if finished {
b = b[:sr.Remaining()]
}
b0 := b
endPos := sr.pos + int64(len(b))
for endPos > sr.pos && err == nil {
var nf int // Bytes read in fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
bf := b[:min(int64(len(b)), holeStart-sr.pos)]
nf, err = tryReadFull(sr.fr, bf)
} else { // In a hole fragment
bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
nf, err = tryReadFull(zeroReader{}, bf)
}
b = b[nf:]
sr.pos += int64(nf)
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
} }
return 0, io.EOF
} }
// In front of a data fragment, so read a hole. n = len(b0) - len(b)
if sfr.pos < sfr.sp[0].offset { switch {
return sfr.readHole(b, sfr.sp[0].offset), nil case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file
case err != nil:
return n, err
case sr.Remaining() == 0 && sr.fr.Remaining() > 0:
return n, errUnrefData // More data in dense file than sparse file
case finished:
return n, io.EOF
default:
return n, nil
}
}
func (sr *sparseFileReader) Discard(n int64) (int64, error) {
overread := n > sr.Remaining()
if overread {
n = sr.Remaining()
}
var realDiscard int64 // Number of real data bytes to discard
endPos := sr.pos + n
for endPos > sr.pos {
var nf int64 // Size of fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
nf = min(endPos-sr.pos, holeStart-sr.pos)
realDiscard += nf
} else { // In a hole fragment
nf = min(endPos-sr.pos, holeEnd-sr.pos)
}
sr.pos += nf
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
} }
// In a data fragment, so read from it. _, err := sr.fr.Discard(realDiscard)
// This math is overflow free since we verify that offset and numBytes can switch {
// be safely added when creating the sparseFileReader. case err == io.EOF:
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment return n, errMissData // Less data in dense file than sparse file
bytesLeft := endPos - sfr.pos // Bytes left in fragment case err != nil:
if int64(len(b)) > bytesLeft { return n, err
b = b[:bytesLeft] case sr.Remaining() == 0 && sr.fr.Remaining() > 0:
return n, errUnrefData // More data in dense file than sparse file
case overread:
return n, io.EOF
default:
return n, nil
} }
}
n, err = sfr.rfr.Read(b) func (sr sparseFileReader) Remaining() int64 {
sfr.pos += int64(n) return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
if err == io.EOF { }
if sfr.pos < endPos {
err = io.ErrUnexpectedEOF // There was supposed to be more data type zeroReader struct{}
} else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end func (zeroReader) Read(b []byte) (int, error) {
} for i := range b {
b[i] = 0
} }
return len(b), nil
}
if sfr.pos == endPos { // mustReadFull is like io.ReadFull except it returns
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it // io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
func mustReadFull(r io.Reader, b []byte) (int, error) {
n, err := tryReadFull(r, b)
if err == io.EOF {
err = io.ErrUnexpectedEOF
} }
return n, err return n, err
} }
// numBytes returns the number of bytes left to read in the sparse file's // tryReadFull is like io.ReadFull except it returns
// sparse-encoded data in the tar archive. // io.EOF when it is hit before len(b) bytes are read.
func (sfr *sparseFileReader) numBytes() int64 { func tryReadFull(r io.Reader, b []byte) (n int, err error) {
return sfr.rfr.numBytes() for len(b) > n && err == nil {
var nn int
nn, err = r.Read(b[n:])
n += nn
}
if len(b) == n && err == io.EOF {
err = nil
}
return n, err
} }
...@@ -14,6 +14,7 @@ import ( ...@@ -14,6 +14,7 @@ import (
"os" "os"
"path" "path"
"reflect" "reflect"
"strconv"
"strings" "strings"
"testing" "testing"
"time" "time"
...@@ -67,6 +68,23 @@ func TestReader(t *testing.T) { ...@@ -67,6 +68,23 @@ func TestReader(t *testing.T) {
Gname: "david", Gname: "david",
Devmajor: 0, Devmajor: 0,
Devminor: 0, Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, { }, {
Name: "sparse-posix-0.0", Name: "sparse-posix-0.0",
Mode: 420, Mode: 420,
...@@ -80,6 +98,23 @@ func TestReader(t *testing.T) { ...@@ -80,6 +98,23 @@ func TestReader(t *testing.T) {
Gname: "david", Gname: "david",
Devmajor: 0, Devmajor: 0,
Devminor: 0, Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, { }, {
Name: "sparse-posix-0.1", Name: "sparse-posix-0.1",
Mode: 420, Mode: 420,
...@@ -93,6 +128,23 @@ func TestReader(t *testing.T) { ...@@ -93,6 +128,23 @@ func TestReader(t *testing.T) {
Gname: "david", Gname: "david",
Devmajor: 0, Devmajor: 0,
Devminor: 0, Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, { }, {
Name: "sparse-posix-1.0", Name: "sparse-posix-1.0",
Mode: 420, Mode: 420,
...@@ -106,6 +158,23 @@ func TestReader(t *testing.T) { ...@@ -106,6 +158,23 @@ func TestReader(t *testing.T) {
Gname: "david", Gname: "david",
Devmajor: 0, Devmajor: 0,
Devminor: 0, Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, { }, {
Name: "end", Name: "end",
Mode: 420, Mode: 420,
...@@ -314,17 +383,18 @@ func TestReader(t *testing.T) { ...@@ -314,17 +383,18 @@ func TestReader(t *testing.T) {
AccessTime: time.Unix(1441974501, 0), AccessTime: time.Unix(1441974501, 0),
ChangeTime: time.Unix(1441973436, 0), ChangeTime: time.Unix(1441973436, 0),
}, { }, {
Name: "test2/sparse", Name: "test2/sparse",
Mode: 33188, Mode: 33188,
Uid: 1000, Uid: 1000,
Gid: 1000, Gid: 1000,
Size: 536870912, Size: 536870912,
ModTime: time.Unix(1441973427, 0), ModTime: time.Unix(1441973427, 0),
Typeflag: 'S', Typeflag: 'S',
Uname: "rawr", Uname: "rawr",
Gname: "dsnet", Gname: "dsnet",
AccessTime: time.Unix(1441991948, 0), AccessTime: time.Unix(1441991948, 0),
ChangeTime: time.Unix(1441973436, 0), ChangeTime: time.Unix(1441973436, 0),
SparseHoles: []SparseEntry{{0, 536870912}},
}}, }},
}, { }, {
// Matches the behavior of GNU and BSD tar utilities. // Matches the behavior of GNU and BSD tar utilities.
...@@ -555,375 +625,6 @@ func TestPartialRead(t *testing.T) { ...@@ -555,375 +625,6 @@ func TestPartialRead(t *testing.T) {
} }
} }
func TestSparseFileReader(t *testing.T) {
vectors := []struct {
realSize int64 // Real size of the output file
sparseMap []sparseEntry // Input sparse map
sparseData string // Input compact data
expected string // Expected output data
err error // Expected error outcome
}{{
realSize: 8,
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
sparseData: "abcde",
expected: "ab\x00\x00\x00cde",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
sparseData: "abcde",
expected: "ab\x00\x00\x00cde\x00\x00",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 2,
sparseMap: []sparseEntry{},
sparseData: "",
expected: "\x00\x00",
}, {
realSize: -2,
sparseMap: []sparseEntry{},
err: ErrHeader,
}, {
realSize: -10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: io.ErrUnexpectedEOF,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: math.MaxInt64, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 2, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}}
for i, v := range vectors {
r := bytes.NewReader([]byte(v.sparseData))
rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
var (
sfr *sparseFileReader
err error
buf []byte
)
sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
if err != nil {
goto fail
}
if sfr.numBytes() != int64(len(v.sparseData)) {
t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
}
buf, err = ioutil.ReadAll(sfr)
if err != nil {
goto fail
}
if string(buf) != v.expected {
t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
}
if sfr.numBytes() != 0 {
t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
}
fail:
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadOldGNUSparseMap(t *testing.T) {
const (
t00 = "00000000000\x0000000000000\x00"
t11 = "00000000001\x0000000000001\x00"
t12 = "00000000001\x0000000000002\x00"
t21 = "00000000002\x0000000000001\x00"
)
mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block {
var blk block
copy(blk.GNU().RealSize(), size)
copy(blk.GNU().Sparse().Entry(0), sp0)
copy(blk.GNU().Sparse().Entry(1), sp1)
copy(blk.GNU().Sparse().Entry(2), sp2)
copy(blk.GNU().Sparse().Entry(3), sp3)
copy(blk.GNU().Sparse().IsExtended(), ext)
if format != formatUnknown {
blk.SetFormat(format)
}
return &blk
}
vectors := []struct {
data string // Input data
rawHdr *block // Input raw header
want []sparseEntry // Expected sparse entries to be outputted
err error // Expected error to be returned
}{
{"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader},
{"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader},
{"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil},
{"", mkBlk("1234", t00, t11, "", "", "", formatGNU),
[]sparseEntry{{0, 0}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t11,
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t21 + strings.Repeat("\x00", 512),
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil},
}
for i, v := range vectors {
tr := Reader{r: strings.NewReader(v.data)}
hdr := new(Header)
got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr)
if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) {
t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap0x1(t *testing.T) {
const (
maxUint = ^uint(0)
maxInt = int(maxUint >> 1)
)
var (
big1 = fmt.Sprintf("%d", int64(maxInt))
big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1)
big3 = fmt.Sprintf("%d", (int64(maxInt) / 3))
)
vectors := []struct {
extHdrs map[string]string // Input data
sparseMap []sparseEntry // Expected sparse entries to be outputted
err error // Expected errors that may be raised
}{{
extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"},
err: ErrHeader,
}, {
extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big1,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big2,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big3,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0.5,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5.5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
}}
for i, v := range vectors {
sp, err := readGNUSparseMap0x1(v.extHdrs)
if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap1x0(t *testing.T) {
sp := []sparseEntry{{1, 2}, {3, 4}}
for i := 0; i < 98; i++ {
sp = append(sp, sparseEntry{54321, 12345})
}
vectors := []struct {
input string // Input data
sparseMap []sparseEntry // Expected sparse entries to be outputted
cnt int // Expected number of bytes read
err error // Expected errors that may be raised
}{{
input: "",
cnt: 0,
err: io.ErrUnexpectedEOF,
}, {
input: "ab",
cnt: 2,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 512),
cnt: 512,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 511) + "\n",
cnt: 512,
err: ErrHeader,
}, {
input: strings.Repeat("\n", 512),
cnt: 512,
err: ErrHeader,
}, {
input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512),
sparseMap: []sparseEntry{},
cnt: 512,
}, {
input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510),
sparseMap: []sparseEntry{},
cnt: 1024,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506),
sparseMap: []sparseEntry{{2, 3}},
cnt: 1536,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509),
cnt: 1536,
err: ErrHeader,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508),
cnt: 1536,
err: io.ErrUnexpectedEOF,
}, {
input: "-1\n2\n\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "1\nk\n2\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512),
cnt: 2560,
sparseMap: sp,
}}
for i, v := range vectors {
r := strings.NewReader(v.input)
sp, err := readGNUSparseMap1x0(r)
if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap)
}
if numBytes := len(v.input) - r.Len(); numBytes != v.cnt {
t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestUninitializedRead(t *testing.T) { func TestUninitializedRead(t *testing.T) {
f, err := os.Open("testdata/gnu.tar") f, err := os.Open("testdata/gnu.tar")
if err != nil { if err != nil {
...@@ -1192,3 +893,539 @@ func TestParsePAX(t *testing.T) { ...@@ -1192,3 +893,539 @@ func TestParsePAX(t *testing.T) {
} }
} }
} }
func TestReadOldGNUSparseMap(t *testing.T) {
populateSparseMap := func(sa sparseArray, sps []string) []string {
for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ {
copy(sa.Entry(i), sps[0])
sps = sps[1:]
}
if len(sps) > 0 {
copy(sa.IsExtended(), "\x80")
}
return sps
}
makeInput := func(format int, size string, sps ...string) (out []byte) {
// Write the initial GNU header.
var blk block
gnu := blk.GNU()
sparse := gnu.Sparse()
copy(gnu.RealSize(), size)
sps = populateSparseMap(sparse, sps)
if format != formatUnknown {
blk.SetFormat(format)
}
out = append(out, blk[:]...)
// Write extended sparse blocks.
for len(sps) > 0 {
var blk block
sps = populateSparseMap(blk.Sparse(), sps)
out = append(out, blk[:]...)
}
return out
}
makeSparseStrings := func(sp []SparseEntry) (out []string) {
var f formatter
for _, s := range sp {
var b [24]byte
f.formatNumeric(b[:12], s.Offset)
f.formatNumeric(b[12:], s.Length)
out = append(out, string(b[:]))
}
return out
}
vectors := []struct {
input []byte
wantMap sparseDatas
wantSize int64
wantErr error
}{{
input: makeInput(formatUnknown, ""),
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "1234", "fewa"),
wantSize: 01234,
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "0031"),
wantSize: 031,
}, {
input: makeInput(formatGNU, "80"),
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "1234",
makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...),
wantMap: sparseDatas{{0, 0}, {1, 1}},
wantSize: 01234,
}, {
input: makeInput(formatGNU, "1234",
append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...),
wantMap: sparseDatas{{0, 0}, {1, 1}},
wantSize: 01234,
}, {
input: makeInput(formatGNU, "3333",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}},
wantSize: 03333,
}, {
input: makeInput(formatGNU, "",
append(append(
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}),
[]string{"", ""}...),
makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}},
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize],
wantErr: io.ErrUnexpectedEOF,
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2],
wantErr: io.ErrUnexpectedEOF,
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}},
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...),
wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}},
}}
for i, v := range vectors {
var blk block
var hdr Header
v.input = v.input[copy(blk[:], v.input):]
tr := Reader{r: bytes.NewReader(v.input)}
got, err := tr.readOldGNUSparseMap(&hdr, &blk)
if !equalSparseEntries(got, v.wantMap) {
t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap)
}
if err != v.wantErr {
t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr)
}
if hdr.Size != v.wantSize {
t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize)
}
}
}
func TestReadGNUSparsePAXHeaders(t *testing.T) {
padInput := func(s string) string {
return s + string(zeroBlock[:blockPadding(int64(len(s)))])
}
vectors := []struct {
inputData string
inputHdrs map[string]string
wantMap sparseDatas
wantSize int64
wantName string
wantErr error
}{{
inputHdrs: nil,
wantErr: nil,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10),
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "4\x00",
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,2,3",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0, 1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,02,3",
paxGNUSparseRealSize: "4321",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
wantSize: 4321,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,one1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,2,3",
paxGNUSparseSize: "1234",
paxGNUSparseRealSize: "4321",
paxGNUSparseName: "realname",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
wantSize: 1234,
wantName: "realname",
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "1",
paxGNUSparseMap: "10737418240,512",
paxGNUSparseSize: "10737418240",
paxGNUSparseName: "realname",
},
wantMap: sparseDatas{{10737418240, 512}},
wantSize: 10737418240,
wantName: "realname",
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "0",
paxGNUSparseMap: "",
},
wantMap: sparseDatas{},
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "1",
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "1",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("0\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{},
}, {
inputData: padInput("0\n")[:blockSize-1] + "#",
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{},
}, {
inputData: padInput("0"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("ab\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: padInput("1\n2\n3\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{2, 3}},
}, {
inputData: padInput("1\n2\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("1\n2\n\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: string(zeroBlock[:]) + padInput("0\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{5, 1}},
}, {
inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{5, 2}},
}, {
inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}},
}, {
inputData: padInput("100\n" + func() string {
var ss []string
for i := 0; i < 100; i++ {
ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512))
}
return strings.Join(ss, "")
}()),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: func() (spd sparseDatas) {
for i := 0; i < 100; i++ {
spd = append(spd, SparseEntry{int64(i) << 30, 512})
}
return spd
}(),
}}
for i, v := range vectors {
var hdr Header
r := strings.NewReader(v.inputData + "#") // Add canary byte
tr := Reader{curr: &regFileReader{r, int64(r.Len())}}
got, err := tr.readGNUSparsePAXHeaders(&hdr, v.inputHdrs)
if !equalSparseEntries(got, v.wantMap) {
t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap)
}
if err != v.wantErr {
t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr)
}
if hdr.Size != v.wantSize {
t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize)
}
if hdr.Name != v.wantName {
t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName)
}
if v.wantErr == nil && r.Len() == 0 {
t.Errorf("test %d, canary byte unexpectedly consumed", i)
}
}
}
func TestFileReader(t *testing.T) {
type (
testRead struct { // ReadN(cnt) == (wantStr, wantErr)
cnt int
wantStr string
wantErr error
}
testDiscard struct { // Discard(cnt) == (wantCnt, wantErr)
cnt int64
wantCnt int64
wantErr error
}
testRemaining struct { // Remaining() == wantCnt
wantCnt int64
}
testFnc interface{} // testRead | testDiscard | testRemaining
)
makeReg := func(s string, n int) fileReader {
return &regFileReader{strings.NewReader(s), int64(n)}
}
makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader {
if !validateSparseEntries(spd, size) {
t.Fatalf("invalid sparse map: %v", spd)
}
sph := invertSparseEntries(append([]SparseEntry{}, spd...), size)
return &sparseFileReader{fr, sph, 0}
}
vectors := []struct {
fr fileReader
tests []testFnc
}{{
fr: makeReg("", 0),
tests: []testFnc{
testRemaining{0},
testRead{0, "", io.EOF},
testRead{1, "", io.EOF},
testDiscard{0, 0, nil},
testDiscard{1, 0, io.EOF},
testRemaining{0},
},
}, {
fr: makeReg("", 1),
tests: []testFnc{
testRemaining{1},
testRead{0, "", io.ErrUnexpectedEOF},
testRead{5, "", io.ErrUnexpectedEOF},
testDiscard{0, 0, nil},
testDiscard{1, 0, io.ErrUnexpectedEOF},
testRemaining{1},
},
}, {
fr: makeReg("hello", 5),
tests: []testFnc{
testRemaining{5},
testRead{5, "hello", io.EOF},
testRemaining{0},
},
}, {
fr: makeReg("hello, world", 50),
tests: []testFnc{
testRemaining{50},
testDiscard{7, 7, nil},
testRemaining{43},
testRead{5, "world", nil},
testRemaining{38},
testDiscard{1, 0, io.ErrUnexpectedEOF},
testRead{1, "", io.ErrUnexpectedEOF},
testRemaining{38},
},
}, {
fr: makeReg("hello, world", 5),
tests: []testFnc{
testRemaining{5},
testRead{0, "", nil},
testRead{4, "hell", nil},
testRemaining{1},
testDiscard{5, 1, io.EOF},
testRemaining{0},
testDiscard{5, 0, io.EOF},
testRead{0, "", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{3, "ab\x00", nil},
testRead{10, "\x00\x00cde", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
tests: []testFnc{
testRemaining{8},
testDiscard{100, 8, io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF},
testRemaining{4},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 0), sparseDatas{}, 2),
tests: []testFnc{
testRead{100, "\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00ab", errMissData},
},
}, {
fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00ab", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", errMissData},
},
}, {
fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", errMissData},
},
}, {
fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRemaining{15},
testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData},
testDiscard{100, 0, errUnrefData},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRemaining{15},
testDiscard{100, 15, errUnrefData},
testRead{100, "", errUnrefData},
testRemaining{0},
},
}}
for i, v := range vectors {
for j, tf := range v.tests {
switch tf := tf.(type) {
case testRead:
b := make([]byte, tf.cnt)
n, err := v.fr.Read(b)
if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr {
t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr)
}
case testDiscard:
got, err := v.fr.Discard(tf.cnt)
if got != tf.wantCnt || err != tf.wantErr {
t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr)
}
case testRemaining:
got := v.fr.Remaining()
if got != tf.wantCnt {
t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt)
}
default:
t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf)
}
}
}
}
...@@ -19,6 +19,116 @@ import ( ...@@ -19,6 +19,116 @@ import (
"time" "time"
) )
func equalSparseEntries(x, y []SparseEntry) bool {
return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y)
}
func TestSparseEntries(t *testing.T) {
vectors := []struct {
in []SparseEntry
size int64
wantValid bool // Result of validateSparseEntries
wantAligned []SparseEntry // Result of alignSparseEntries
wantInverted []SparseEntry // Result of invertSparseEntries
}{{
in: []SparseEntry{}, size: 0,
wantValid: true,
wantInverted: []SparseEntry{{0, 0}},
}, {
in: []SparseEntry{}, size: 5000,
wantValid: true,
wantInverted: []SparseEntry{{0, 5000}},
}, {
in: []SparseEntry{{0, 5000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{0, 5000}},
wantInverted: []SparseEntry{{5000, 0}},
}, {
in: []SparseEntry{{1000, 4000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{1024, 3976}},
wantInverted: []SparseEntry{{0, 1000}, {5000, 0}},
}, {
in: []SparseEntry{{0, 3000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{0, 2560}},
wantInverted: []SparseEntry{{3000, 2000}},
}, {
in: []SparseEntry{{3000, 2000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{3072, 1928}},
wantInverted: []SparseEntry{{0, 3000}, {5000, 0}},
}, {
in: []SparseEntry{{2000, 2000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{2048, 1536}},
wantInverted: []SparseEntry{{0, 2000}, {4000, 1000}},
}, {
in: []SparseEntry{{0, 2000}, {8000, 2000}}, size: 10000,
wantValid: true,
wantAligned: []SparseEntry{{0, 1536}, {8192, 1808}},
wantInverted: []SparseEntry{{2000, 6000}, {10000, 0}},
}, {
in: []SparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000,
wantValid: true,
wantAligned: []SparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}},
wantInverted: []SparseEntry{{10000, 0}},
}, {
in: []SparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000,
wantValid: true,
wantInverted: []SparseEntry{{0, 5000}},
}, {
in: []SparseEntry{{1, 0}}, size: 0,
wantValid: false,
}, {
in: []SparseEntry{{-1, 0}}, size: 100,
wantValid: false,
}, {
in: []SparseEntry{{0, -1}}, size: 100,
wantValid: false,
}, {
in: []SparseEntry{{0, 0}}, size: -100,
wantValid: false,
}, {
in: []SparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35,
wantValid: false,
}, {
in: []SparseEntry{{1, 3}, {6, -5}}, size: 35,
wantValid: false,
}, {
in: []SparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64,
wantValid: false,
}, {
in: []SparseEntry{{3, 3}}, size: 5,
wantValid: false,
}, {
in: []SparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3,
wantValid: false,
}, {
in: []SparseEntry{{1, 3}, {2, 2}}, size: 10,
wantValid: false,
}}
for i, v := range vectors {
gotValid := validateSparseEntries(v.in, v.size)
if gotValid != v.wantValid {
t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid)
}
if !v.wantValid {
continue
}
gotAligned := alignSparseEntries(append([]SparseEntry{}, v.in...), v.size)
if !equalSparseEntries(gotAligned, v.wantAligned) {
t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned)
}
gotInverted := invertSparseEntries(append([]SparseEntry{}, v.in...), v.size)
if !equalSparseEntries(gotInverted, v.wantInverted) {
t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted)
}
}
}
func TestFileInfoHeader(t *testing.T) { func TestFileInfoHeader(t *testing.T) {
fi, err := os.Stat("testdata/small.txt") fi, err := os.Stat("testdata/small.txt")
if err != nil { if err != nil {
......
...@@ -250,7 +250,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { ...@@ -250,7 +250,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
size = 0 size = 0
} }
tw.nb = size tw.nb = size
tw.pad = -size & (blockSize - 1) // blockSize is a power of two tw.pad = blockPadding(size)
return nil return nil
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment