Commit 3bece2fa authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: refactor Reader support for sparse files

This CL is the first step (of two) for adding sparse file support
to the Writer. This CL only refactors the logic of sparse-file handling
in the Reader so that common logic can be easily shared by the Writer.

As a result of this CL, there are some new publicly visible API changes:
	type SparseEntry struct { Offset, Length int64 }
	type Header struct { ...; SparseHoles []SparseEntry }

A new type is defined to represent a sparse fragment and a new field
Header.SparseHoles is added to represent the sparse holes in a file.
The API intentionally represent sparse files using hole fragments,
rather than data fragments so that the zero value of SparseHoles
naturally represents a normal file (i.e., a file without any holes).
The Reader now populates SparseHoles for sparse files.

It is necessary to export the sparse hole information, otherwise it would
be impossible for the Writer to specify that it is trying to encode
a sparse file, and what it looks like.

Some unexported helper functions were added to common.go:
	func validateSparseEntries(sp []SparseEntry, size int64) bool
	func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry
	func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry

The validation logic that used to be in newSparseFileReader is now moved
to validateSparseEntries so that the Writer can use it in the future.
alignSparseEntries is currently unused by the Reader, but will be used
by the Writer in the future. Since TAR represents sparse files by
only recording the data fragments, we add the invertSparseEntries
function to convert a list of data fragments to a normalized list
of hole fragments (and vice-versa).

Some other high-level changes:
* skipUnread is deleted, where most of it's logic is moved to the
Discard methods on regFileReader and sparseFileReader.
* readGNUSparsePAXHeaders was rewritten to be simpler.
* regFileReader and sparseFileReader were completely rewritten
in simpler and easier to understand logic.
* A bug was fixed in sparseFileReader.Read where it failed to
report an error if the logical size of the file ends before
consuming all of the underlying data.
* The tests for sparse-file support was completely rewritten.

Updates #13548

Change-Id: Ic1233ae5daf3b3f4278fe1115d34a90c4aeaf0c2
Reviewed-on: https://go-review.googlesource.com/56771
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent b2174a16
......@@ -15,6 +15,7 @@ package tar
import (
"errors"
"fmt"
"math"
"os"
"path"
"strconv"
......@@ -30,6 +31,8 @@ var (
ErrWriteTooLong = errors.New("tar: write too long")
ErrFieldTooLong = errors.New("tar: header field too long")
ErrWriteAfterClose = errors.New("tar: write after close")
errMissData = errors.New("tar: sparse file references non-existent data")
errUnrefData = errors.New("tar: sparse file contains unreferenced data")
)
// Header type flags.
......@@ -68,6 +71,131 @@ type Header struct {
AccessTime time.Time // access time
ChangeTime time.Time // status change time
Xattrs map[string]string
// SparseHoles represents a sequence of holes in a sparse file.
//
// The regions must be sorted in ascending order, not overlap with
// each other, and not extend past the specified Size.
// The file is sparse if either len(SparseHoles) > 0 or
// the Typeflag is set to TypeGNUSparse.
SparseHoles []SparseEntry
}
// SparseEntry represents a Length-sized fragment at Offset in the file.
type SparseEntry struct{ Offset, Length int64 }
func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length }
// A sparse file can be represented as either a sparseDatas or a sparseHoles.
// As long as the total size is known, they are equivalent and one can be
// converted to the other form and back. The various tar formats with sparse
// file support represent sparse files in the sparseDatas form. That is, they
// specify the fragments in the file that has data, and treat everything else as
// having zero bytes. As such, the encoding and decoding logic in this package
// deals with sparseDatas.
//
// However, the external API uses sparseHoles instead of sparseDatas because the
// zero value of sparseHoles logically represents a normal file (i.e., there are
// no holes in it). On the other hand, the zero value of sparseDatas implies
// that the file has no data in it, which is rather odd.
//
// As an example, if the underlying raw file contains the 10-byte data:
// var compactFile = "abcdefgh"
//
// And the sparse map has the following entries:
// var spd sparseDatas = []sparseEntry{
// {Offset: 2, Length: 5}, // Data fragment for 2..6
// {Offset: 18, Length: 3}, // Data fragment for 18..20
// }
// var sph sparseHoles = []SparseEntry{
// {Offset: 0, Length: 2}, // Hole fragment for 0..1
// {Offset: 7, Length: 11}, // Hole fragment for 7..17
// {Offset: 21, Length: 4}, // Hole fragment for 21..24
// }
//
// Then the content of the resulting sparse file with a Header.Size of 25 is:
// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type (
sparseDatas []SparseEntry
sparseHoles []SparseEntry
)
// validateSparseEntries reports whether sp is a valid sparse map.
// It does not matter whether sp represents data fragments or hole fragments.
func validateSparseEntries(sp []SparseEntry, size int64) bool {
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
if size < 0 {
return false
}
var pre SparseEntry
for _, cur := range sp {
switch {
case cur.Offset < 0 || cur.Length < 0:
return false // Negative values are never okay
case cur.Offset > math.MaxInt64-cur.Length:
return false // Integer overflow with large length
case cur.endOffset() > size:
return false // Region extends beyond the actual size
case pre.endOffset() > cur.Offset:
return false // Regions cannot overlap and must be in order
}
pre = cur
}
return true
}
// alignSparseEntries mutates src and returns dst where each fragment's
// starting offset is aligned up to the nearest block edge, and each
// ending offset is aligned down to the nearest block edge.
//
// Even though the Go tar Reader and the BSD tar utility can handle entries
// with arbitrary offsets and lengths, the GNU tar utility can only handle
// offsets and lengths that are multiples of blockSize.
func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry {
dst := src[:0]
for _, s := range src {
pos, end := s.Offset, s.endOffset()
pos += blockPadding(+pos) // Round-up to nearest blockSize
if end != size {
end -= blockPadding(-end) // Round-down to nearest blockSize
}
if pos < end {
dst = append(dst, SparseEntry{Offset: pos, Length: end - pos})
}
}
return dst
}
// invertSparseEntries converts a sparse map from one form to the other.
// If the input is sparseHoles, then it will output sparseDatas and vice-versa.
// The input must have been already validated.
//
// This function mutates src and returns a normalized map where:
// * adjacent fragments are coalesced together
// * only the last fragment may be empty
// * the endOffset of the last fragment is the total size
func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry {
dst := src[:0]
var pre SparseEntry
for _, cur := range src {
if cur.Length == 0 {
continue // Skip empty fragments
}
pre.Length = cur.Offset - pre.Offset
if pre.Length > 0 {
dst = append(dst, pre) // Only add non-empty fragments
}
pre.Offset = cur.endOffset()
}
pre.Length = size - pre.Offset // Possibly the only empty fragment
return append(dst, pre)
}
type fileState interface {
// Remaining reports the number of remaining bytes in the current file.
// This count includes any sparse holes that may exist.
Remaining() int64
}
// FileInfo returns an os.FileInfo for the Header.
......@@ -300,6 +428,17 @@ const (
paxUname = "uname"
paxXattr = "SCHILY.xattr."
paxNone = ""
// Keywords for GNU sparse files in a PAX extended header.
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
paxGNUSparseOffset = "GNU.sparse.offset"
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
paxGNUSparseMap = "GNU.sparse.map"
paxGNUSparseName = "GNU.sparse.name"
paxGNUSparseMajor = "GNU.sparse.major"
paxGNUSparseMinor = "GNU.sparse.minor"
paxGNUSparseSize = "GNU.sparse.size"
paxGNUSparseRealSize = "GNU.sparse.realsize"
)
// FileInfoHeader creates a partially-populated Header from fi.
......@@ -373,6 +512,9 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
h.Size = 0
h.Linkname = sys.Linkname
}
if sys.SparseHoles != nil {
h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...)
}
}
if sysStat != nil {
return h, sysStat(fi, h)
......@@ -390,3 +532,10 @@ func isHeaderOnlyType(flag byte) bool {
return false
}
}
func min(a, b int64) int64 {
if a < b {
return a
}
return b
}
......@@ -50,6 +50,12 @@ const (
prefixSize = 155 // Max length of the prefix field in USTAR format
)
// blockPadding computes the number of bytes needed to pad offset up to the
// nearest block edge where 0 <= n < blockSize.
func blockPadding(offset int64) (n int64) {
return -offset & (blockSize - 1)
}
var zeroBlock block
type block [blockSize]byte
......@@ -192,11 +198,11 @@ func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
type sparseArray []byte
func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) }
func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
func (s sparseArray) MaxEntries() int { return len(s) / 24 }
type sparseNode []byte
type sparseElem []byte
func (s sparseNode) Offset() []byte { return s[00:][:12] }
func (s sparseNode) NumBytes() []byte { return s[12:][:12] }
func (s sparseElem) Offset() []byte { return s[00:][:12] }
func (s sparseElem) Length() []byte { return s[12:][:12] }
......@@ -11,7 +11,6 @@ import (
"bytes"
"io"
"io/ioutil"
"math"
"strconv"
"strings"
"time"
......@@ -23,9 +22,9 @@ import (
// and then it can be treated as an io.Reader to access the file's data.
type Reader struct {
r io.Reader
pad int64 // amount of padding (ignored) after current file entry
curr numBytesReader // reader for current file entry
blk block // buffer to use as temporary local storage
pad int64 // Amount of padding (ignored) after current file entry
curr fileReader // Reader for current file entry
blk block // Buffer to use as temporary local storage
// err is a persistent error.
// It is only the responsibility of every exported method of Reader to
......@@ -33,66 +32,17 @@ type Reader struct {
err error
}
// A numBytesReader is an io.Reader with a numBytes method, returning the number
// of bytes remaining in the underlying encoded data.
type numBytesReader interface {
type fileReader interface {
io.Reader
numBytes() int64
}
fileState
// A regFileReader is a numBytesReader for reading file data from a tar archive.
type regFileReader struct {
r io.Reader // underlying reader
nb int64 // number of unread bytes for current file entry
}
// A sparseFileReader is a numBytesReader for reading sparse file data from a
// tar archive.
type sparseFileReader struct {
rfr numBytesReader // Reads the sparse-encoded file data
sp []sparseEntry // The sparse map for the file
pos int64 // Keeps track of file position
total int64 // Total size of the file
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
//
// Sparse files are represented using a series of sparseEntrys.
// Despite the name, a sparseEntry represents an actual data fragment that
// references data found in the underlying archive stream. All regions not
// covered by a sparseEntry are logically filled with zeros.
//
// For example, if the underlying raw file contains the 10-byte data:
// var compactData = "abcdefgh"
//
// And the sparse map has the following entries:
// var sp = []sparseEntry{
// {offset: 2, numBytes: 5} // Data fragment for [2..7]
// {offset: 18, numBytes: 3} // Data fragment for [18..21]
// }
//
// Then the content of the resulting sparse file with a "real" size of 25 is:
// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
type sparseEntry struct {
offset int64 // Starting position of the fragment
numBytes int64 // Length of the fragment
Discard(n int64) (int64, error)
}
// Keywords for GNU sparse files in a PAX extended header
const (
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
paxGNUSparseOffset = "GNU.sparse.offset"
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
paxGNUSparseMap = "GNU.sparse.map"
paxGNUSparseName = "GNU.sparse.name"
paxGNUSparseMajor = "GNU.sparse.major"
paxGNUSparseMinor = "GNU.sparse.minor"
paxGNUSparseSize = "GNU.sparse.size"
paxGNUSparseRealSize = "GNU.sparse.realsize"
)
// NewReader creates a new Reader reading from r.
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
func NewReader(r io.Reader) *Reader {
return &Reader{r: r, curr: &regFileReader{r, 0}}
}
// Next advances to the next entry in the tar archive.
//
......@@ -116,9 +66,15 @@ func (tr *Reader) next() (*Header, error) {
// one or more "header files" until it finds a "normal file".
loop:
for {
if err := tr.skipUnread(); err != nil {
// Discard the remainder of the file and any padding.
if _, err := tr.curr.Discard(tr.curr.Remaining()); err != nil {
return nil, err
}
if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
return nil, err
}
tr.pad = 0
hdr, rawHdr, err := tr.readHeader()
if err != nil {
return nil, err
......@@ -192,7 +148,7 @@ func (tr *Reader) handleRegularFile(hdr *Header) error {
return ErrHeader
}
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
tr.pad = blockPadding(nb)
tr.curr = &regFileReader{r: tr.r, nb: nb}
return nil
}
......@@ -200,87 +156,70 @@ func (tr *Reader) handleRegularFile(hdr *Header) error {
// handleSparseFile checks if the current file is a sparse format of any type
// and sets the curr reader appropriately.
func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error {
var sp []sparseEntry
var spd sparseDatas
var err error
if hdr.Typeflag == TypeGNUSparse {
sp, err = tr.readOldGNUSparseMap(hdr, rawHdr)
if err != nil {
return err
}
spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
} else {
sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil {
return err
}
spd, err = tr.readGNUSparsePAXHeaders(hdr, extHdrs)
}
// If sp is non-nil, then this is a sparse file.
// Note that it is possible for len(sp) to be zero.
if sp != nil {
tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size)
// Note that it is possible for len(sp) == 0.
if err == nil && spd != nil {
if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
return ErrHeader
}
sph := invertSparseEntries(spd, hdr.Size)
tr.curr = &sparseFileReader{tr.curr, sph, 0}
hdr.SparseHoles = append([]SparseEntry{}, sph...)
}
return err
}
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
// be treated as a regular file.
func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
var sparseFormat string
// Check for sparse format indicators
major, majorOk := headers[paxGNUSparseMajor]
minor, minorOk := headers[paxGNUSparseMinor]
sparseName, sparseNameOk := headers[paxGNUSparseName]
_, sparseMapOk := headers[paxGNUSparseMap]
sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
// Identify which, if any, sparse format applies from which PAX headers are set
if majorOk && minorOk {
sparseFormat = major + "." + minor
} else if sparseNameOk && sparseMapOk {
sparseFormat = "0.1"
} else if sparseSizeOk {
sparseFormat = "0.0"
} else {
// Not a PAX format GNU sparse file.
return nil, nil
}
// Check for unknown sparse format
if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
return nil, nil
// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
// If they are found, then this function reads the sparse map and returns it.
// This assumes that 0.0 headers have already been converted to 0.1 headers
// by the the PAX header parsing logic.
func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header, extHdrs map[string]string) (sparseDatas, error) {
// Identify the version of GNU headers.
var is1x0 bool
major, minor := extHdrs[paxGNUSparseMajor], extHdrs[paxGNUSparseMinor]
switch {
case major == "0" && (minor == "0" || minor == "1"):
is1x0 = false
case major == "1" && minor == "0":
is1x0 = true
case major != "" || minor != "":
return nil, nil // Unknown GNU sparse PAX version
case extHdrs[paxGNUSparseMap] != "":
is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
default:
return nil, nil // Not a PAX format GNU sparse file.
}
// Update hdr from GNU sparse PAX headers
if sparseNameOk {
hdr.Name = sparseName
// Update hdr from GNU sparse PAX headers.
if name := extHdrs[paxGNUSparseName]; name != "" {
hdr.Name = name
}
if sparseSizeOk {
realSize, err := strconv.ParseInt(sparseSize, 10, 64)
if err != nil {
return nil, ErrHeader
size := extHdrs[paxGNUSparseSize]
if size == "" {
size = extHdrs[paxGNUSparseRealSize]
}
hdr.Size = realSize
} else if sparseRealSizeOk {
realSize, err := strconv.ParseInt(sparseRealSize, 10, 64)
if size != "" {
n, err := strconv.ParseInt(size, 10, 64)
if err != nil {
return nil, ErrHeader
}
hdr.Size = realSize
hdr.Size = n
}
// Set up the sparse map, according to the particular sparse format in use
var sp []sparseEntry
var err error
switch sparseFormat {
case "0.0", "0.1":
sp, err = readGNUSparseMap0x1(headers)
case "1.0":
sp, err = readGNUSparseMap1x0(tr.curr)
// Read the sparse map according to the appropriate format.
if is1x0 {
return readGNUSparseMap1x0(tr.curr)
} else {
return readGNUSparseMap0x1(extHdrs)
}
return sp, err
}
// mergePAX merges well known headers according to PAX standard.
......@@ -376,45 +315,6 @@ func parsePAX(r io.Reader) (map[string]string, error) {
return extHdrs, nil
}
// skipUnread skips any unread bytes in the existing file entry, as well as any
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
// encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
if err != nil {
return err
}
seekSkipped = pos2 - pos1
}
}
copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if err == io.EOF && seekSkipped+copySkipped < dataSkip {
err = io.ErrUnexpectedEOF
}
return err
}
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary. It returns the raw block of the
// header in case further processing is required.
......@@ -530,7 +430,7 @@ func (tr *Reader) readHeader() (*Header, *block, error) {
// The Header.Size does not reflect the size of any extended headers used.
// Thus, this function will read from the raw io.Reader to fetch extra headers.
// This method mutates blk in the process.
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) {
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
// Make sure that the input format is GNU.
// Unfortunately, the STAR format also has a sparse header format that uses
// the same type flag but has a completely different layout.
......@@ -543,8 +443,8 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
if p.err != nil {
return nil, p.err
}
var s sparseArray = blk.GNU().Sparse()
var sp = make([]sparseEntry, 0, s.MaxEntries())
s := blk.GNU().Sparse()
spd := make(sparseDatas, 0, s.MaxEntries())
for {
for i := 0; i < s.MaxEntries(); i++ {
// This termination condition is identical to GNU and BSD tar.
......@@ -552,25 +452,22 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
break // Don't return, need to process extended headers (even if empty)
}
offset := p.parseNumeric(s.Entry(i).Offset())
numBytes := p.parseNumeric(s.Entry(i).NumBytes())
length := p.parseNumeric(s.Entry(i).Length())
if p.err != nil {
return nil, p.err
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
spd = append(spd, SparseEntry{Offset: offset, Length: length})
}
if s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
if _, err := mustReadFull(tr.r, blk[:]); err != nil {
return nil, err
}
s = blk.Sparse()
continue
}
return sp, nil // Done
return spd, nil // Done
}
}
......@@ -578,28 +475,27 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e
// version 1.0. The format of the sparse map consists of a series of
// newline-terminated numeric fields. The first field is the number of entries
// and is always present. Following this are the entries, consisting of two
// fields (offset, numBytes). This function must stop reading at the end
// fields (offset, length). This function must stop reading at the end
// boundary of the block containing the last newline.
//
// Note that the GNU manual says that numeric values should be encoded in octal
// format. However, the GNU tar utility itself outputs these values in decimal.
// As such, this library treats values as being encoded in decimal.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
var cntNewline int64
var buf bytes.Buffer
var blk = make([]byte, blockSize)
// feedTokens copies data in numBlock chunks from r into buf until there are
func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
var (
cntNewline int64
buf bytes.Buffer
blk block
)
// feedTokens copies data in blocks from r into buf until there are
// at least cnt newlines in buf. It will not read more blocks than needed.
var feedTokens = func(cnt int64) error {
for cntNewline < cnt {
if _, err := io.ReadFull(r, blk); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
feedTokens := func(n int64) error {
for cntNewline < n {
if _, err := mustReadFull(r, blk[:]); err != nil {
return err
}
buf.Write(blk)
buf.Write(blk[:])
for _, c := range blk {
if c == '\n' {
cntNewline++
......@@ -611,10 +507,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
// nextToken gets the next token delimited by a newline. This assumes that
// at least one newline exists in the buffer.
var nextToken = func() string {
nextToken := func() string {
cntNewline--
tok, _ := buf.ReadString('\n')
return tok[:len(tok)-1] // Cut off newline
return strings.TrimRight(tok, "\n")
}
// Parse for the number of entries.
......@@ -633,24 +529,21 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
if err := feedTokens(2 * numEntries); err != nil {
return nil, err
}
sp := make([]sparseEntry, 0, numEntries)
spd := make(sparseDatas, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(nextToken(), 10, 64)
if err != nil {
offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
length, err2 := strconv.ParseInt(nextToken(), 10, 64)
if err1 != nil || err2 != nil {
return nil, ErrHeader
}
numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
if err != nil {
return nil, ErrHeader
spd = append(spd, SparseEntry{Offset: offset, Length: length})
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
return spd, nil
}
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
// version 0.1. The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
func readGNUSparseMap0x1(extHdrs map[string]string) (sparseDatas, error) {
// Get number of entries.
// Use integer overflow resistant math to check this.
numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
......@@ -661,52 +554,42 @@ func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
// There should be two numbers in sparseMap for each entry.
sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
if len(sparseMap) == 1 && sparseMap[0] == "" {
sparseMap = sparseMap[:0]
}
if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader
}
// Loop through the entries in the sparse map.
// numEntries is trusted now.
sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
if err != nil {
spd := make(sparseDatas, 0, numEntries)
for len(sparseMap) >= 2 {
offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
if err1 != nil || err2 != nil {
return nil, ErrHeader
}
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
if err != nil {
return nil, ErrHeader
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
spd = append(spd, SparseEntry{Offset: offset, Length: length})
sparseMap = sparseMap[2:]
}
return sp, nil
}
// numBytes returns the number of bytes left to read in the current file's entry
// in the tar archive, or 0 if there is no current file.
func (tr *Reader) numBytes() int64 {
if tr.curr == nil {
// No current file, so no bytes
return 0
}
return tr.curr.numBytes()
return spd, nil
}
// Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry.
//
// If the current file is sparse, then the regions marked as a sparse hole
// will read back NUL-bytes.
//
// Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
// the Header.Size claims.
func (tr *Reader) Read(b []byte) (int, error) {
if tr.err != nil {
return 0, tr.err
}
if tr.curr == nil {
return 0, io.EOF
}
n, err := tr.curr.Read(b)
if err != nil && err != io.EOF {
tr.err = err
......@@ -714,116 +597,210 @@ func (tr *Reader) Read(b []byte) (int, error) {
return n, err
}
func (rfr *regFileReader) Read(b []byte) (n int, err error) {
if rfr.nb == 0 {
// file consumed
return 0, io.EOF
}
if int64(len(b)) > rfr.nb {
b = b[0:rfr.nb]
}
n, err = rfr.r.Read(b)
rfr.nb -= int64(n)
// TODO(dsnet): Export the Reader.Discard method to assist in quickly
// skipping over sections of a file. This is especially useful:
// * when skipping through an underlying io.Reader that is also an io.Seeker.
// * when skipping over large holes in a sparse file.
if err == io.EOF && rfr.nb > 0 {
err = io.ErrUnexpectedEOF
// discard skips the next n bytes in the current file,
// returning the number of bytes discarded.
// If fewer than n bytes are discarded, it returns an non-nil error,
// which may be io.EOF if there are no more remaining bytes in the current file.
func (tr *Reader) discard(n int64) (int64, error) {
if tr.err != nil {
return 0, tr.err
}
return
n, err := tr.curr.Discard(n)
if err != nil && err != io.EOF {
tr.err = err
}
return n, err
}
// numBytes returns the number of bytes left to read in the file's data in the tar archive.
func (rfr *regFileReader) numBytes() int64 {
return rfr.nb
// regFileReader is a fileReader for reading data from a regular file entry.
type regFileReader struct {
r io.Reader // Underlying Reader
nb int64 // Number of remaining bytes to read
}
// newSparseFileReader creates a new sparseFileReader, but validates all of the
// sparse entries before doing so.
func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
if total < 0 {
return nil, ErrHeader // Total size cannot be negative
func (fr *regFileReader) Read(b []byte) (int, error) {
if int64(len(b)) > fr.nb {
b = b[:fr.nb]
}
// Validate all sparse entries. These are the same checks as performed by
// the BSD tar utility.
for i, s := range sp {
n, err := fr.r.Read(b)
fr.nb -= int64(n)
switch {
case s.offset < 0 || s.numBytes < 0:
return nil, ErrHeader // Negative values are never okay
case s.offset > math.MaxInt64-s.numBytes:
return nil, ErrHeader // Integer overflow with large length
case s.offset+s.numBytes > total:
return nil, ErrHeader // Region extends beyond the "real" size
case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
return nil, ErrHeader // Regions can't overlap and must be in order
}
case err == io.EOF && fr.nb > 0:
return n, io.ErrUnexpectedEOF
case err == nil && fr.nb == 0:
return n, io.EOF
default:
return n, err
}
return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
}
// readHole reads a sparse hole ending at endOffset.
func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
n64 := endOffset - sfr.pos
if n64 > int64(len(b)) {
n64 = int64(len(b))
func (fr *regFileReader) Discard(n int64) (int64, error) {
overread := n > fr.Remaining()
if overread {
n = fr.Remaining()
}
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := fr.r.(io.Seeker); ok && n > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, io.SeekCurrent)
if pos1 >= 0 && err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(n-1, io.SeekCurrent)
if pos2 < 0 || err != nil {
return 0, err
}
seekSkipped = pos2 - pos1
}
n := int(n64)
for i := 0; i < n; i++ {
b[i] = 0
}
sfr.pos += n64
return n
copySkipped, err := io.CopyN(ioutil.Discard, fr.r, n-seekSkipped)
discarded := seekSkipped + copySkipped
fr.nb -= discarded
switch {
case err == io.EOF && discarded < n:
return discarded, io.ErrUnexpectedEOF
case err == nil && overread:
return discarded, io.EOF
default:
return discarded, err
}
}
func (rf regFileReader) Remaining() int64 {
return rf.nb
}
// sparseFileReader is a fileReader for reading data from a sparse file entry.
type sparseFileReader struct {
fr fileReader // Underlying fileReader
sp sparseHoles // Normalized list of sparse holes
pos int64 // Current position in sparse file
}
// Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
// Skip past all empty fragments.
for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
sfr.sp = sfr.sp[1:]
func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
finished := int64(len(b)) >= sr.Remaining()
if finished {
b = b[:sr.Remaining()]
}
// If there are no more fragments, then it is possible that there
// is one last sparse hole.
if len(sfr.sp) == 0 {
// This behavior matches the BSD tar utility.
// However, GNU tar stops returning data even if sfr.total is unmet.
if sfr.pos < sfr.total {
return sfr.readHole(b, sfr.total), nil
b0 := b
endPos := sr.pos + int64(len(b))
for endPos > sr.pos && err == nil {
var nf int // Bytes read in fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
bf := b[:min(int64(len(b)), holeStart-sr.pos)]
nf, err = tryReadFull(sr.fr, bf)
} else { // In a hole fragment
bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
nf, err = tryReadFull(zeroReader{}, bf)
}
b = b[nf:]
sr.pos += int64(nf)
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
return 0, io.EOF
}
// In front of a data fragment, so read a hole.
if sfr.pos < sfr.sp[0].offset {
return sfr.readHole(b, sfr.sp[0].offset), nil
n = len(b0) - len(b)
switch {
case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file
case err != nil:
return n, err
case sr.Remaining() == 0 && sr.fr.Remaining() > 0:
return n, errUnrefData // More data in dense file than sparse file
case finished:
return n, io.EOF
default:
return n, nil
}
}
// In a data fragment, so read from it.
// This math is overflow free since we verify that offset and numBytes can
// be safely added when creating the sparseFileReader.
endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
bytesLeft := endPos - sfr.pos // Bytes left in fragment
if int64(len(b)) > bytesLeft {
b = b[:bytesLeft]
func (sr *sparseFileReader) Discard(n int64) (int64, error) {
overread := n > sr.Remaining()
if overread {
n = sr.Remaining()
}
n, err = sfr.rfr.Read(b)
sfr.pos += int64(n)
if err == io.EOF {
if sfr.pos < endPos {
err = io.ErrUnexpectedEOF // There was supposed to be more data
} else if sfr.pos < sfr.total {
err = nil // There is still an implicit sparse hole at the end
var realDiscard int64 // Number of real data bytes to discard
endPos := sr.pos + n
for endPos > sr.pos {
var nf int64 // Size of fragment
holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
if sr.pos < holeStart { // In a data fragment
nf = min(endPos-sr.pos, holeStart-sr.pos)
realDiscard += nf
} else { // In a hole fragment
nf = min(endPos-sr.pos, holeEnd-sr.pos)
}
sr.pos += nf
if sr.pos >= holeEnd && len(sr.sp) > 1 {
sr.sp = sr.sp[1:] // Ensure last fragment always remains
}
}
if sfr.pos == endPos {
sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
_, err := sr.fr.Discard(realDiscard)
switch {
case err == io.EOF:
return n, errMissData // Less data in dense file than sparse file
case err != nil:
return n, err
case sr.Remaining() == 0 && sr.fr.Remaining() > 0:
return n, errUnrefData // More data in dense file than sparse file
case overread:
return n, io.EOF
default:
return n, nil
}
}
func (sr sparseFileReader) Remaining() int64 {
return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
}
type zeroReader struct{}
func (zeroReader) Read(b []byte) (int, error) {
for i := range b {
b[i] = 0
}
return len(b), nil
}
// mustReadFull is like io.ReadFull except it returns
// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
func mustReadFull(r io.Reader, b []byte) (int, error) {
n, err := tryReadFull(r, b)
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return n, err
}
// numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.numBytes()
// tryReadFull is like io.ReadFull except it returns
// io.EOF when it is hit before len(b) bytes are read.
func tryReadFull(r io.Reader, b []byte) (n int, err error) {
for len(b) > n && err == nil {
var nn int
nn, err = r.Read(b[n:])
n += nn
}
if len(b) == n && err == io.EOF {
err = nil
}
return n, err
}
......@@ -14,6 +14,7 @@ import (
"os"
"path"
"reflect"
"strconv"
"strings"
"testing"
"time"
......@@ -67,6 +68,23 @@ func TestReader(t *testing.T) {
Gname: "david",
Devmajor: 0,
Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, {
Name: "sparse-posix-0.0",
Mode: 420,
......@@ -80,6 +98,23 @@ func TestReader(t *testing.T) {
Gname: "david",
Devmajor: 0,
Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, {
Name: "sparse-posix-0.1",
Mode: 420,
......@@ -93,6 +128,23 @@ func TestReader(t *testing.T) {
Gname: "david",
Devmajor: 0,
Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, {
Name: "sparse-posix-1.0",
Mode: 420,
......@@ -106,6 +158,23 @@ func TestReader(t *testing.T) {
Gname: "david",
Devmajor: 0,
Devminor: 0,
SparseHoles: []SparseEntry{
{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}, {12, 1}, {14, 1},
{16, 1}, {18, 1}, {20, 1}, {22, 1}, {24, 1}, {26, 1}, {28, 1},
{30, 1}, {32, 1}, {34, 1}, {36, 1}, {38, 1}, {40, 1}, {42, 1},
{44, 1}, {46, 1}, {48, 1}, {50, 1}, {52, 1}, {54, 1}, {56, 1},
{58, 1}, {60, 1}, {62, 1}, {64, 1}, {66, 1}, {68, 1}, {70, 1},
{72, 1}, {74, 1}, {76, 1}, {78, 1}, {80, 1}, {82, 1}, {84, 1},
{86, 1}, {88, 1}, {90, 1}, {92, 1}, {94, 1}, {96, 1}, {98, 1},
{100, 1}, {102, 1}, {104, 1}, {106, 1}, {108, 1}, {110, 1},
{112, 1}, {114, 1}, {116, 1}, {118, 1}, {120, 1}, {122, 1},
{124, 1}, {126, 1}, {128, 1}, {130, 1}, {132, 1}, {134, 1},
{136, 1}, {138, 1}, {140, 1}, {142, 1}, {144, 1}, {146, 1},
{148, 1}, {150, 1}, {152, 1}, {154, 1}, {156, 1}, {158, 1},
{160, 1}, {162, 1}, {164, 1}, {166, 1}, {168, 1}, {170, 1},
{172, 1}, {174, 1}, {176, 1}, {178, 1}, {180, 1}, {182, 1},
{184, 1}, {186, 1}, {188, 1}, {190, 10},
},
}, {
Name: "end",
Mode: 420,
......@@ -325,6 +394,7 @@ func TestReader(t *testing.T) {
Gname: "dsnet",
AccessTime: time.Unix(1441991948, 0),
ChangeTime: time.Unix(1441973436, 0),
SparseHoles: []SparseEntry{{0, 536870912}},
}},
}, {
// Matches the behavior of GNU and BSD tar utilities.
......@@ -555,375 +625,6 @@ func TestPartialRead(t *testing.T) {
}
}
func TestSparseFileReader(t *testing.T) {
vectors := []struct {
realSize int64 // Real size of the output file
sparseMap []sparseEntry // Input sparse map
sparseData string // Input compact data
expected string // Expected output data
err error // Expected error outcome
}{{
realSize: 8,
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
sparseData: "abcde",
expected: "ab\x00\x00\x00cde",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 0, numBytes: 2},
{offset: 5, numBytes: 3},
},
sparseData: "abcde",
expected: "ab\x00\x00\x00cde\x00\x00",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 8,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 0},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
{offset: 8, numBytes: 0},
},
sparseData: "abcde",
expected: "\x00abc\x00\x00de\x00\x00",
}, {
realSize: 2,
sparseMap: []sparseEntry{},
sparseData: "",
expected: "\x00\x00",
}, {
realSize: -2,
sparseMap: []sparseEntry{},
err: ErrHeader,
}, {
realSize: -10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: 5},
},
sparseData: "abcde",
err: io.ErrUnexpectedEOF,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 35,
sparseMap: []sparseEntry{
{offset: math.MaxInt64, numBytes: 3},
{offset: 6, numBytes: -5},
},
sparseData: "abcde",
err: ErrHeader,
}, {
realSize: 10,
sparseMap: []sparseEntry{
{offset: 1, numBytes: 3},
{offset: 2, numBytes: 2},
},
sparseData: "abcde",
err: ErrHeader,
}}
for i, v := range vectors {
r := bytes.NewReader([]byte(v.sparseData))
rfr := &regFileReader{r: r, nb: int64(len(v.sparseData))}
var (
sfr *sparseFileReader
err error
buf []byte
)
sfr, err = newSparseFileReader(rfr, v.sparseMap, v.realSize)
if err != nil {
goto fail
}
if sfr.numBytes() != int64(len(v.sparseData)) {
t.Errorf("test %d, numBytes() before reading: got %d, want %d", i, sfr.numBytes(), len(v.sparseData))
}
buf, err = ioutil.ReadAll(sfr)
if err != nil {
goto fail
}
if string(buf) != v.expected {
t.Errorf("test %d, ReadAll(): got %q, want %q", i, string(buf), v.expected)
}
if sfr.numBytes() != 0 {
t.Errorf("test %d, numBytes() after reading: got %d, want %d", i, sfr.numBytes(), 0)
}
fail:
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadOldGNUSparseMap(t *testing.T) {
const (
t00 = "00000000000\x0000000000000\x00"
t11 = "00000000001\x0000000000001\x00"
t12 = "00000000001\x0000000000002\x00"
t21 = "00000000002\x0000000000001\x00"
)
mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block {
var blk block
copy(blk.GNU().RealSize(), size)
copy(blk.GNU().Sparse().Entry(0), sp0)
copy(blk.GNU().Sparse().Entry(1), sp1)
copy(blk.GNU().Sparse().Entry(2), sp2)
copy(blk.GNU().Sparse().Entry(3), sp3)
copy(blk.GNU().Sparse().IsExtended(), ext)
if format != formatUnknown {
blk.SetFormat(format)
}
return &blk
}
vectors := []struct {
data string // Input data
rawHdr *block // Input raw header
want []sparseEntry // Expected sparse entries to be outputted
err error // Expected error to be returned
}{
{"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader},
{"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader},
{"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil},
{"", mkBlk("1234", t00, t11, "", "", "", formatGNU),
[]sparseEntry{{0, 0}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t11,
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t21 + strings.Repeat("\x00", 512),
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil},
}
for i, v := range vectors {
tr := Reader{r: strings.NewReader(v.data)}
hdr := new(Header)
got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr)
if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) {
t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap0x1(t *testing.T) {
const (
maxUint = ^uint(0)
maxInt = int(maxUint >> 1)
)
var (
big1 = fmt.Sprintf("%d", int64(maxInt))
big2 = fmt.Sprintf("%d", (int64(maxInt)/2)+1)
big3 = fmt.Sprintf("%d", (int64(maxInt) / 3))
)
vectors := []struct {
extHdrs map[string]string // Input data
sparseMap []sparseEntry // Expected sparse entries to be outputted
err error // Expected errors that may be raised
}{{
extHdrs: map[string]string{paxGNUSparseNumBlocks: "-4"},
err: ErrHeader,
}, {
extHdrs: map[string]string{paxGNUSparseNumBlocks: "fee "},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big1,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big2,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: big3,
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0.5,5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5.5,10,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,fewafewa.5,fewafw,5,20,5,30,5",
},
err: ErrHeader,
}, {
extHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
sparseMap: []sparseEntry{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
}}
for i, v := range vectors {
sp, err := readGNUSparseMap0x1(v.extHdrs)
if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
t.Errorf("test %d, readGNUSparseMap0x1(...): got %v, want %v", i, sp, v.sparseMap)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap1x0(t *testing.T) {
sp := []sparseEntry{{1, 2}, {3, 4}}
for i := 0; i < 98; i++ {
sp = append(sp, sparseEntry{54321, 12345})
}
vectors := []struct {
input string // Input data
sparseMap []sparseEntry // Expected sparse entries to be outputted
cnt int // Expected number of bytes read
err error // Expected errors that may be raised
}{{
input: "",
cnt: 0,
err: io.ErrUnexpectedEOF,
}, {
input: "ab",
cnt: 2,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 512),
cnt: 512,
err: io.ErrUnexpectedEOF,
}, {
input: strings.Repeat("\x00", 511) + "\n",
cnt: 512,
err: ErrHeader,
}, {
input: strings.Repeat("\n", 512),
cnt: 512,
err: ErrHeader,
}, {
input: "0\n" + strings.Repeat("\x00", 510) + strings.Repeat("a", 512),
sparseMap: []sparseEntry{},
cnt: 512,
}, {
input: strings.Repeat("0", 512) + "0\n" + strings.Repeat("\x00", 510),
sparseMap: []sparseEntry{},
cnt: 1024,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n3\n" + strings.Repeat("\x00", 506),
sparseMap: []sparseEntry{{2, 3}},
cnt: 1536,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n\n" + strings.Repeat("\x00", 509),
cnt: 1536,
err: ErrHeader,
}, {
input: strings.Repeat("0", 1024) + "1\n2\n" + strings.Repeat("\x00", 508),
cnt: 1536,
err: io.ErrUnexpectedEOF,
}, {
input: "-1\n2\n\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "1\nk\n2\n" + strings.Repeat("\x00", 506),
cnt: 512,
err: ErrHeader,
}, {
input: "100\n1\n2\n3\n4\n" + strings.Repeat("54321\n0000000000000012345\n", 98) + strings.Repeat("\x00", 512),
cnt: 2560,
sparseMap: sp,
}}
for i, v := range vectors {
r := strings.NewReader(v.input)
sp, err := readGNUSparseMap1x0(r)
if !reflect.DeepEqual(sp, v.sparseMap) && !(len(sp) == 0 && len(v.sparseMap) == 0) {
t.Errorf("test %d, readGNUSparseMap1x0(...): got %v, want %v", i, sp, v.sparseMap)
}
if numBytes := len(v.input) - r.Len(); numBytes != v.cnt {
t.Errorf("test %d, bytes read: got %v, want %v", i, numBytes, v.cnt)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestUninitializedRead(t *testing.T) {
f, err := os.Open("testdata/gnu.tar")
if err != nil {
......@@ -1192,3 +893,539 @@ func TestParsePAX(t *testing.T) {
}
}
}
func TestReadOldGNUSparseMap(t *testing.T) {
populateSparseMap := func(sa sparseArray, sps []string) []string {
for i := 0; len(sps) > 0 && i < sa.MaxEntries(); i++ {
copy(sa.Entry(i), sps[0])
sps = sps[1:]
}
if len(sps) > 0 {
copy(sa.IsExtended(), "\x80")
}
return sps
}
makeInput := func(format int, size string, sps ...string) (out []byte) {
// Write the initial GNU header.
var blk block
gnu := blk.GNU()
sparse := gnu.Sparse()
copy(gnu.RealSize(), size)
sps = populateSparseMap(sparse, sps)
if format != formatUnknown {
blk.SetFormat(format)
}
out = append(out, blk[:]...)
// Write extended sparse blocks.
for len(sps) > 0 {
var blk block
sps = populateSparseMap(blk.Sparse(), sps)
out = append(out, blk[:]...)
}
return out
}
makeSparseStrings := func(sp []SparseEntry) (out []string) {
var f formatter
for _, s := range sp {
var b [24]byte
f.formatNumeric(b[:12], s.Offset)
f.formatNumeric(b[12:], s.Length)
out = append(out, string(b[:]))
}
return out
}
vectors := []struct {
input []byte
wantMap sparseDatas
wantSize int64
wantErr error
}{{
input: makeInput(formatUnknown, ""),
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "1234", "fewa"),
wantSize: 01234,
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "0031"),
wantSize: 031,
}, {
input: makeInput(formatGNU, "80"),
wantErr: ErrHeader,
}, {
input: makeInput(formatGNU, "1234",
makeSparseStrings(sparseDatas{{0, 0}, {1, 1}})...),
wantMap: sparseDatas{{0, 0}, {1, 1}},
wantSize: 01234,
}, {
input: makeInput(formatGNU, "1234",
append(makeSparseStrings(sparseDatas{{0, 0}, {1, 1}}), []string{"", "blah"}...)...),
wantMap: sparseDatas{{0, 0}, {1, 1}},
wantSize: 01234,
}, {
input: makeInput(formatGNU, "3333",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}})...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}},
wantSize: 03333,
}, {
input: makeInput(formatGNU, "",
append(append(
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}}),
[]string{"", ""}...),
makeSparseStrings(sparseDatas{{4, 1}, {6, 1}})...)...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}},
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:blockSize],
wantErr: io.ErrUnexpectedEOF,
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...)[:3*blockSize/2],
wantErr: io.ErrUnexpectedEOF,
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}})...),
wantMap: sparseDatas{{0, 1}, {2, 1}, {4, 1}, {6, 1}, {8, 1}, {10, 1}},
}, {
input: makeInput(formatGNU, "",
makeSparseStrings(sparseDatas{{10 << 30, 512}, {20 << 30, 512}})...),
wantMap: sparseDatas{{10 << 30, 512}, {20 << 30, 512}},
}}
for i, v := range vectors {
var blk block
var hdr Header
v.input = v.input[copy(blk[:], v.input):]
tr := Reader{r: bytes.NewReader(v.input)}
got, err := tr.readOldGNUSparseMap(&hdr, &blk)
if !equalSparseEntries(got, v.wantMap) {
t.Errorf("test %d, readOldGNUSparseMap(): got %v, want %v", i, got, v.wantMap)
}
if err != v.wantErr {
t.Errorf("test %d, readOldGNUSparseMap() = %v, want %v", i, err, v.wantErr)
}
if hdr.Size != v.wantSize {
t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize)
}
}
}
func TestReadGNUSparsePAXHeaders(t *testing.T) {
padInput := func(s string) string {
return s + string(zeroBlock[:blockPadding(int64(len(s)))])
}
vectors := []struct {
inputData string
inputHdrs map[string]string
wantMap sparseDatas
wantSize int64
wantName string
wantErr error
}{{
inputHdrs: nil,
wantErr: nil,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: strconv.FormatInt(math.MaxInt64, 10),
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "4\x00",
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,2,3",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0, 1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,02,3",
paxGNUSparseRealSize: "4321",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
wantSize: 4321,
}, {
inputHdrs: map[string]string{
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,one1,2,3",
},
wantErr: ErrHeader,
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "2",
paxGNUSparseMap: "0,1,2,3",
paxGNUSparseSize: "1234",
paxGNUSparseRealSize: "4321",
paxGNUSparseName: "realname",
},
wantMap: sparseDatas{{0, 1}, {2, 3}},
wantSize: 1234,
wantName: "realname",
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "1",
paxGNUSparseMap: "10737418240,512",
paxGNUSparseSize: "10737418240",
paxGNUSparseName: "realname",
},
wantMap: sparseDatas{{10737418240, 512}},
wantSize: 10737418240,
wantName: "realname",
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "0",
paxGNUSparseMap: "",
},
wantMap: sparseDatas{},
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "0",
paxGNUSparseMinor: "1",
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
wantMap: sparseDatas{{0, 5}, {10, 5}, {20, 5}, {30, 5}},
}, {
inputHdrs: map[string]string{
paxGNUSparseMajor: "1",
paxGNUSparseMinor: "0",
paxGNUSparseNumBlocks: "4",
paxGNUSparseMap: "0,5,10,5,20,5,30,5",
},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("0\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{},
}, {
inputData: padInput("0\n")[:blockSize-1] + "#",
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{},
}, {
inputData: padInput("0"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("ab\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: padInput("1\n2\n3\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{2, 3}},
}, {
inputData: padInput("1\n2\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: io.ErrUnexpectedEOF,
}, {
inputData: padInput("1\n2\n\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: string(zeroBlock[:]) + padInput("0\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: strings.Repeat("0", blockSize) + padInput("1\n5\n1\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{5, 1}},
}, {
inputData: padInput(fmt.Sprintf("%d\n", int64(math.MaxInt64))),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantErr: ErrHeader,
}, {
inputData: padInput(strings.Repeat("0", 300) + "1\n" + strings.Repeat("0", 1000) + "5\n" + strings.Repeat("0", 800) + "2\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{5, 2}},
}, {
inputData: padInput("2\n10737418240\n512\n21474836480\n512\n"),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: sparseDatas{{10737418240, 512}, {21474836480, 512}},
}, {
inputData: padInput("100\n" + func() string {
var ss []string
for i := 0; i < 100; i++ {
ss = append(ss, fmt.Sprintf("%d\n%d\n", int64(i)<<30, 512))
}
return strings.Join(ss, "")
}()),
inputHdrs: map[string]string{paxGNUSparseMajor: "1", paxGNUSparseMinor: "0"},
wantMap: func() (spd sparseDatas) {
for i := 0; i < 100; i++ {
spd = append(spd, SparseEntry{int64(i) << 30, 512})
}
return spd
}(),
}}
for i, v := range vectors {
var hdr Header
r := strings.NewReader(v.inputData + "#") // Add canary byte
tr := Reader{curr: &regFileReader{r, int64(r.Len())}}
got, err := tr.readGNUSparsePAXHeaders(&hdr, v.inputHdrs)
if !equalSparseEntries(got, v.wantMap) {
t.Errorf("test %d, readGNUSparsePAXHeaders(): got %v, want %v", i, got, v.wantMap)
}
if err != v.wantErr {
t.Errorf("test %d, readGNUSparsePAXHeaders() = %v, want %v", i, err, v.wantErr)
}
if hdr.Size != v.wantSize {
t.Errorf("test %d, Header.Size = %d, want %d", i, hdr.Size, v.wantSize)
}
if hdr.Name != v.wantName {
t.Errorf("test %d, Header.Name = %s, want %s", i, hdr.Name, v.wantName)
}
if v.wantErr == nil && r.Len() == 0 {
t.Errorf("test %d, canary byte unexpectedly consumed", i)
}
}
}
func TestFileReader(t *testing.T) {
type (
testRead struct { // ReadN(cnt) == (wantStr, wantErr)
cnt int
wantStr string
wantErr error
}
testDiscard struct { // Discard(cnt) == (wantCnt, wantErr)
cnt int64
wantCnt int64
wantErr error
}
testRemaining struct { // Remaining() == wantCnt
wantCnt int64
}
testFnc interface{} // testRead | testDiscard | testRemaining
)
makeReg := func(s string, n int) fileReader {
return &regFileReader{strings.NewReader(s), int64(n)}
}
makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader {
if !validateSparseEntries(spd, size) {
t.Fatalf("invalid sparse map: %v", spd)
}
sph := invertSparseEntries(append([]SparseEntry{}, spd...), size)
return &sparseFileReader{fr, sph, 0}
}
vectors := []struct {
fr fileReader
tests []testFnc
}{{
fr: makeReg("", 0),
tests: []testFnc{
testRemaining{0},
testRead{0, "", io.EOF},
testRead{1, "", io.EOF},
testDiscard{0, 0, nil},
testDiscard{1, 0, io.EOF},
testRemaining{0},
},
}, {
fr: makeReg("", 1),
tests: []testFnc{
testRemaining{1},
testRead{0, "", io.ErrUnexpectedEOF},
testRead{5, "", io.ErrUnexpectedEOF},
testDiscard{0, 0, nil},
testDiscard{1, 0, io.ErrUnexpectedEOF},
testRemaining{1},
},
}, {
fr: makeReg("hello", 5),
tests: []testFnc{
testRemaining{5},
testRead{5, "hello", io.EOF},
testRemaining{0},
},
}, {
fr: makeReg("hello, world", 50),
tests: []testFnc{
testRemaining{50},
testDiscard{7, 7, nil},
testRemaining{43},
testRead{5, "world", nil},
testRemaining{38},
testDiscard{1, 0, io.ErrUnexpectedEOF},
testRead{1, "", io.ErrUnexpectedEOF},
testRemaining{38},
},
}, {
fr: makeReg("hello, world", 5),
tests: []testFnc{
testRemaining{5},
testRead{0, "", nil},
testRead{4, "hell", nil},
testRemaining{1},
testDiscard{5, 1, io.EOF},
testRemaining{0},
testDiscard{5, 0, io.EOF},
testRead{0, "", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{3, "ab\x00", nil},
testRead{10, "\x00\x00cde", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
tests: []testFnc{
testRemaining{8},
testDiscard{100, 8, io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF},
testRemaining{4},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8),
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 0), sparseDatas{}, 2),
tests: []testFnc{
testRead{100, "\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00ab", errMissData},
},
}, {
fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00ab", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", errMissData},
},
}, {
fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", errMissData},
},
}, {
fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRemaining{15},
testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData},
testDiscard{100, 0, errUnrefData},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
tests: []testFnc{
testRemaining{15},
testDiscard{100, 15, errUnrefData},
testRead{100, "", errUnrefData},
testRemaining{0},
},
}}
for i, v := range vectors {
for j, tf := range v.tests {
switch tf := tf.(type) {
case testRead:
b := make([]byte, tf.cnt)
n, err := v.fr.Read(b)
if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr {
t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr)
}
case testDiscard:
got, err := v.fr.Discard(tf.cnt)
if got != tf.wantCnt || err != tf.wantErr {
t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr)
}
case testRemaining:
got := v.fr.Remaining()
if got != tf.wantCnt {
t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt)
}
default:
t.Fatalf("test %d.%d, unknown test operation: %T", i, j, tf)
}
}
}
}
......@@ -19,6 +19,116 @@ import (
"time"
)
func equalSparseEntries(x, y []SparseEntry) bool {
return (len(x) == 0 && len(y) == 0) || reflect.DeepEqual(x, y)
}
func TestSparseEntries(t *testing.T) {
vectors := []struct {
in []SparseEntry
size int64
wantValid bool // Result of validateSparseEntries
wantAligned []SparseEntry // Result of alignSparseEntries
wantInverted []SparseEntry // Result of invertSparseEntries
}{{
in: []SparseEntry{}, size: 0,
wantValid: true,
wantInverted: []SparseEntry{{0, 0}},
}, {
in: []SparseEntry{}, size: 5000,
wantValid: true,
wantInverted: []SparseEntry{{0, 5000}},
}, {
in: []SparseEntry{{0, 5000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{0, 5000}},
wantInverted: []SparseEntry{{5000, 0}},
}, {
in: []SparseEntry{{1000, 4000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{1024, 3976}},
wantInverted: []SparseEntry{{0, 1000}, {5000, 0}},
}, {
in: []SparseEntry{{0, 3000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{0, 2560}},
wantInverted: []SparseEntry{{3000, 2000}},
}, {
in: []SparseEntry{{3000, 2000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{3072, 1928}},
wantInverted: []SparseEntry{{0, 3000}, {5000, 0}},
}, {
in: []SparseEntry{{2000, 2000}}, size: 5000,
wantValid: true,
wantAligned: []SparseEntry{{2048, 1536}},
wantInverted: []SparseEntry{{0, 2000}, {4000, 1000}},
}, {
in: []SparseEntry{{0, 2000}, {8000, 2000}}, size: 10000,
wantValid: true,
wantAligned: []SparseEntry{{0, 1536}, {8192, 1808}},
wantInverted: []SparseEntry{{2000, 6000}, {10000, 0}},
}, {
in: []SparseEntry{{0, 2000}, {2000, 2000}, {4000, 0}, {4000, 3000}, {7000, 1000}, {8000, 0}, {8000, 2000}}, size: 10000,
wantValid: true,
wantAligned: []SparseEntry{{0, 1536}, {2048, 1536}, {4096, 2560}, {7168, 512}, {8192, 1808}},
wantInverted: []SparseEntry{{10000, 0}},
}, {
in: []SparseEntry{{0, 0}, {1000, 0}, {2000, 0}, {3000, 0}, {4000, 0}, {5000, 0}}, size: 5000,
wantValid: true,
wantInverted: []SparseEntry{{0, 5000}},
}, {
in: []SparseEntry{{1, 0}}, size: 0,
wantValid: false,
}, {
in: []SparseEntry{{-1, 0}}, size: 100,
wantValid: false,
}, {
in: []SparseEntry{{0, -1}}, size: 100,
wantValid: false,
}, {
in: []SparseEntry{{0, 0}}, size: -100,
wantValid: false,
}, {
in: []SparseEntry{{math.MaxInt64, 3}, {6, -5}}, size: 35,
wantValid: false,
}, {
in: []SparseEntry{{1, 3}, {6, -5}}, size: 35,
wantValid: false,
}, {
in: []SparseEntry{{math.MaxInt64, math.MaxInt64}}, size: math.MaxInt64,
wantValid: false,
}, {
in: []SparseEntry{{3, 3}}, size: 5,
wantValid: false,
}, {
in: []SparseEntry{{2, 0}, {1, 0}, {0, 0}}, size: 3,
wantValid: false,
}, {
in: []SparseEntry{{1, 3}, {2, 2}}, size: 10,
wantValid: false,
}}
for i, v := range vectors {
gotValid := validateSparseEntries(v.in, v.size)
if gotValid != v.wantValid {
t.Errorf("test %d, validateSparseEntries() = %v, want %v", i, gotValid, v.wantValid)
}
if !v.wantValid {
continue
}
gotAligned := alignSparseEntries(append([]SparseEntry{}, v.in...), v.size)
if !equalSparseEntries(gotAligned, v.wantAligned) {
t.Errorf("test %d, alignSparseEntries():\ngot %v\nwant %v", i, gotAligned, v.wantAligned)
}
gotInverted := invertSparseEntries(append([]SparseEntry{}, v.in...), v.size)
if !equalSparseEntries(gotInverted, v.wantInverted) {
t.Errorf("test %d, inverseSparseEntries():\ngot %v\nwant %v", i, gotInverted, v.wantInverted)
}
}
}
func TestFileInfoHeader(t *testing.T) {
fi, err := os.Stat("testdata/small.txt")
if err != nil {
......
......@@ -250,7 +250,7 @@ func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error {
size = 0
}
tw.nb = size
tw.pad = -size & (blockSize - 1) // blockSize is a power of two
tw.pad = blockPadding(size)
return nil
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment