Commit b8a12928 authored by Joe Tsai's avatar Joe Tsai Committed by Brad Fitzpatrick

archive/tar: convert Reader.Next to be loop based

Motivation for change:
* Recursive logic is hard to follow, since it tends to apply
things in reverse. On the other hand, the tar formats tend to
describe meta headers as affecting the next entry.
* Recursion also applies changes in the wrong order. Two test
files are attached that use multiple headers. The previous Go
behavior differs from what GNU and BSD tar do.

Change-Id: Ic1557256fc1363c5cb26570e5d0b9f65a9e57341
Reviewed-on: https://go-review.googlesource.com/14624
Run-TryBot: Joe Tsai <joetsai@digital-static.net>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 9bad9957
...@@ -117,92 +117,82 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} } ...@@ -117,92 +117,82 @@ func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
// //
// io.EOF is returned at the end of the input. // io.EOF is returned at the end of the input.
func (tr *Reader) Next() (*Header, error) { func (tr *Reader) Next() (*Header, error) {
var p parser
var hdr *Header
if tr.err == nil {
tr.skipUnread()
}
if tr.err != nil { if tr.err != nil {
return hdr, tr.err return nil, tr.err
} }
hdr = tr.readHeader()
if hdr == nil { var hdr *Header
return hdr, tr.err var extHdrs map[string]string
}
// Check for PAX/GNU header. // Externally, Next iterates through the tar archive as if it is a series of
switch hdr.Typeflag { // files. Internally, the tar format often uses fake "files" to add meta
case TypeXHeader: // data that describes the next file. These meta data "files" should not
// PAX extended header // normally be visible to the outside. As such, this loop iterates through
headers, err := parsePAX(tr) // one or more "header files" until it finds a "normal file".
if err != nil { loop:
return nil, err for {
} tr.err = tr.skipUnread()
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
if tr.err != nil { if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
hdr = tr.readHeader() hdr = tr.readHeader()
if hdr == nil { if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
mergePAX(hdr, headers)
// Check for a PAX format sparse file // Check for PAX/GNU special headers and files.
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) switch hdr.Typeflag {
if err != nil { case TypeXHeader:
tr.err = err extHdrs, tr.err = parsePAX(tr)
return nil, err if tr.err != nil {
}
if sp != nil {
// Sparse files do not make sense when applied to the special header
// types that never have a data section.
if isHeaderOnlyType(hdr.Typeflag) {
tr.err = ErrHeader
return nil, tr.err return nil, tr.err
} }
continue loop // This is a meta header affecting the next header
// Current file is a PAX format GNU sparse file. case TypeGNULongName, TypeGNULongLink:
// Set the current file reader to a sparse file reader. var realname []byte
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) realname, tr.err = ioutil.ReadAll(tr)
if tr.err != nil { if tr.err != nil {
return nil, tr.err return nil, tr.err
} }
// Convert GNU extensions to use PAX headers.
if extHdrs == nil {
extHdrs = make(map[string]string)
}
var p parser
switch hdr.Typeflag {
case TypeGNULongName:
extHdrs[paxPath] = p.parseString(realname)
case TypeGNULongLink:
extHdrs[paxLinkpath] = p.parseString(realname)
}
if p.err != nil {
tr.err = p.err
return nil, tr.err
}
continue loop // This is a meta header affecting the next header
default:
mergePAX(hdr, extHdrs)
// Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
if err != nil {
tr.err = err
return nil, err
}
if sp != nil {
// Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader.
tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
if tr.err != nil {
return nil, tr.err
}
}
break loop // This is a file, so stop
} }
return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, tr.err = tr.Next()
if tr.err != nil {
return nil, tr.err
}
hdr.Name = p.parseString(realname)
if p.err != nil {
return nil, p.err
}
return hdr, nil
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, tr.err = tr.Next()
if tr.err != nil {
return nil, tr.err
}
hdr.Linkname = p.parseString(realname)
if p.err != nil {
return nil, p.err
}
return hdr, nil
} }
return hdr, tr.err return hdr, nil
} }
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
......
...@@ -288,6 +288,30 @@ var untarTests = []*untarTest{ ...@@ -288,6 +288,30 @@ var untarTests = []*untarTest{
}, },
}, },
}, },
{
// Matches the behavior of GNU, BSD, and STAR tar utilities.
file: "testdata/gnu-multi-hdrs.tar",
headers: []*Header{
{
Name: "GNU2/GNU2/long-path-name",
Linkname: "GNU4/GNU4/long-linkpath-name",
ModTime: time.Unix(0, 0),
Typeflag: '2',
},
},
},
{
// Matches the behavior of GNU and BSD tar utilities.
file: "testdata/pax-multi-hdrs.tar",
headers: []*Header{
{
Name: "bar",
Linkname: "PAX4/PAX4/long-linkpath-name",
ModTime: time.Unix(0, 0),
Typeflag: '2',
},
},
},
{ {
file: "testdata/neg-size.tar", file: "testdata/neg-size.tar",
err: ErrHeader, err: ErrHeader,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment