Commit 865620ba authored by Jan Provaznik's avatar Jan Provaznik Committed by Nick Thomas

Remove EXIF from JPEG/TIFF images

EXIF may contain sensitive information, when uploading
any file which may be an image (based on filename suffix),
we run it through exiftool which removes any metadata.
parent e60c48b6
......@@ -14,6 +14,7 @@ verify:
GITALY_ADDRESS: "tcp://gitaly:8075"
script:
- go version
- apt-get update && apt-get -y install libimage-exiftool-perl
- make test
test using go 1.10:
......
......@@ -168,6 +168,25 @@ make install PREFIX=/foo
On some operating systems, such as FreeBSD, you may have to use
`gmake` instead of `make`.
## Dependencies
### Exiftool
Workhorse uses [exiftool](https://www.sno.phy.queensu.ca/~phil/exiftool/) for
removing EXIF data (which may contain sensitive information) from uploaded
images. If you installed GitLab:
- Using the Omnibus package, you're all set.
- From source, make sure `exiftool` is installed:
```sh
# Debian/Ubuntu
sudo apt-get install libimage-exiftool-perl
# RHEL/CentOS
sudo yum install perl-Image-ExifTool
```
## Error tracking
GitLab-Workhorse supports remote error tracking with
......
......@@ -37,7 +37,11 @@ func LogError(r *http.Request, err error) {
}
func RequestEntityTooLarge(w http.ResponseWriter, r *http.Request, err error) {
http.Error(w, "Request Entity Too Large", http.StatusRequestEntityTooLarge)
CaptureAndFail(w, r, err, "Request Entity Too Large", http.StatusRequestEntityTooLarge)
}
func CaptureAndFail(w http.ResponseWriter, r *http.Request, err error, msg string, code int) {
http.Error(w, msg, code)
captureRavenError(r, err)
printError(r, err)
}
......
package exif
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os/exec"
"regexp"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/log"
)
var ErrRemovingExif = errors.New("error while removing EXIF")
type cleaner struct {
ctx context.Context
cmd *exec.Cmd
stdout io.Reader
stderr bytes.Buffer
waitDone chan struct{}
waitErr error
}
func NewCleaner(ctx context.Context, stdin io.Reader) (io.Reader, error) {
c := &cleaner{
ctx: ctx,
waitDone: make(chan struct{}),
}
if err := c.startProcessing(stdin); err != nil {
return nil, err
}
return c, nil
}
func (c *cleaner) Read(p []byte) (int, error) {
n, err := c.stdout.Read(p)
if err == io.EOF {
if waitErr := c.wait(); waitErr != nil {
log.WithFields(c.ctx, log.Fields{
"command": c.cmd.Args,
"stderr": c.stderr.String(),
"error": waitErr.Error(),
}).Print("exiftool command failed")
return n, ErrRemovingExif
}
}
return n, err
}
func (c *cleaner) startProcessing(stdin io.Reader) error {
var err error
whitelisted_tags := []string{
"-ResolutionUnit",
"-XResolution",
"-YResolution",
"-YCbCrSubSampling",
"-YCbCrPositioning",
"-BitsPerSample",
"-ImageHeight",
"-ImageWidth",
"-ImageSize",
"-Copyright",
"-CopyrightNotice",
}
args := append([]string{"-all=", "--IPTC:all", "--XMP-iptcExt:all", "-tagsFromFile", "@"}, whitelisted_tags...)
args = append(args, "-")
c.cmd = exec.CommandContext(c.ctx, "exiftool", args...)
c.cmd.Stderr = &c.stderr
c.cmd.Stdin = stdin
c.stdout, err = c.cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("failed to create stdout pipe: %v", err)
}
if err = c.cmd.Start(); err != nil {
return fmt.Errorf("start %v: %v", c.cmd.Args, err)
}
go func() {
c.waitErr = c.cmd.Wait()
close(c.waitDone)
}()
return nil
}
func (c *cleaner) wait() error {
<-c.waitDone
return c.waitErr
}
func IsExifFile(filename string) bool {
filenameMatch := regexp.MustCompile(`(?i)\.(jpg|jpeg|tiff)$`)
return filenameMatch.MatchString(filename)
}
package exif
import (
"context"
"io"
"io/ioutil"
"os"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestIsExifFile(t *testing.T) {
tests := []struct {
name string
expected bool
}{
{
name: "/full/path.jpg",
expected: true,
},
{
name: "path.jpeg",
expected: true,
},
{
name: "path.tiff",
expected: true,
},
{
name: "path.JPG",
expected: true,
},
{
name: "path.tar",
expected: false,
},
{
name: "path",
expected: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
require.Equal(t, test.expected, IsExifFile(test.name))
})
}
}
func TestNewCleanerWithValidFile(t *testing.T) {
input, err := os.Open("testdata/sample_exif.jpg")
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cleaner, err := NewCleaner(ctx, input)
require.NoError(t, err, "Expected no error when creating cleaner command")
size, err := io.Copy(ioutil.Discard, cleaner)
require.NoError(t, err, "Expected no error when reading output")
sizeAfterStrip := int64(25399)
require.Equal(t, sizeAfterStrip, size, "Different size of converted image")
}
func TestNewCleanerWithInvalidFile(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
cleaner, err := NewCleaner(ctx, strings.NewReader("invalid image"))
require.NoError(t, err, "Expected no error when creating cleaner command")
size, err := io.Copy(ioutil.Discard, cleaner)
require.Error(t, err, "Expected error when reading output")
require.Equal(t, int64(0), size, "Size of invalid image should be 0")
}
......@@ -12,6 +12,8 @@ import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/log"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upload/exif"
)
var (
......@@ -113,12 +115,30 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
opts := filestore.GetOpts(rew.preauth)
opts.TempFilePrefix = filename
fh, err := filestore.SaveFileFromReader(ctx, p, -1, opts)
var inputReader io.Reader
if exif.IsExifFile(filename) {
log.WithFields(ctx, log.Fields{
"filename": filename,
}).Print("running exiftool to remove any metadata")
cleaner, err := exif.NewCleaner(ctx, p)
if err != nil {
return fmt.Errorf("failed to start EXIF metadata cleaner: %v", err)
}
inputReader = cleaner
} else {
inputReader = p
}
fh, err := filestore.SaveFileFromReader(ctx, inputReader, -1, opts)
if err != nil {
if err == filestore.ErrEntityTooLarge {
switch err {
case filestore.ErrEntityTooLarge, exif.ErrRemovingExif:
return err
default:
return fmt.Errorf("persisting multipart file: %v", err)
}
return fmt.Errorf("persisting multipart file: %v", err)
}
for key, value := range fh.GitLabFinalizeFields(name) {
......
......@@ -11,6 +11,7 @@ import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upload/exif"
)
// These methods are allowed to have thread-unsafe implementations.
......@@ -40,6 +41,8 @@ func HandleFileUploads(w http.ResponseWriter, r *http.Request, h http.Handler, p
h.ServeHTTP(w, r)
case filestore.ErrEntityTooLarge:
helper.RequestEntityTooLarge(w, r, err)
case exif.ErrRemovingExif:
helper.CaptureAndFail(w, r, err, "Failed to process image", http.StatusUnprocessableEntity)
default:
helper.Fail500(w, r, fmt.Errorf("handleFileUploads: extract files from multipart: %v", err))
}
......
......@@ -12,10 +12,13 @@ import (
"net/http/httptest"
"os"
"regexp"
"strconv"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/api"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/filestore"
"gitlab.com/gitlab-org/gitlab-workhorse/internal/helper"
......@@ -323,6 +326,93 @@ func TestInvalidFileNames(t *testing.T) {
}
}
func TestUploadHandlerRemovingExif(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err)
defer os.RemoveAll(tempPath)
var buffer bytes.Buffer
content, err := ioutil.ReadFile("exif/testdata/sample_exif.jpg")
require.NoError(t, err)
writer := multipart.NewWriter(&buffer)
file, err := writer.CreateFormFile("file", "test.jpg")
require.NoError(t, err)
_, err = file.Write(content)
require.NoError(t, err)
err = writer.Close()
require.NoError(t, err)
ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) {
err := r.ParseMultipartForm(100000)
require.NoError(t, err)
size, err := strconv.Atoi(r.FormValue("file.size"))
require.NoError(t, err)
require.True(t, size < len(content), "Expected the file to be smaller after removal of exif")
require.True(t, size > 0, "Expected to receive not empty file")
w.WriteHeader(200)
fmt.Fprint(w, "RESPONSE")
})
defer ts.Close()
httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
httpRequest = httpRequest.WithContext(ctx)
httpRequest.ContentLength = int64(buffer.Len())
httpRequest.Header.Set("Content-Type", writer.FormDataContentType())
response := httptest.NewRecorder()
handler := newProxy(ts.URL)
HandleFileUploads(response, httpRequest, handler, &api.Response{TempPath: tempPath}, &testFormProcessor{})
testhelper.AssertResponseCode(t, response, 200)
}
func TestUploadHandlerRemovingInvalidExif(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err)
defer os.RemoveAll(tempPath)
var buffer bytes.Buffer
writer := multipart.NewWriter(&buffer)
file, err := writer.CreateFormFile("file", "test.jpg")
require.NoError(t, err)
fmt.Fprint(file, "this is not valid image data")
err = writer.Close()
require.NoError(t, err)
ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) {
err := r.ParseMultipartForm(100000)
require.Error(t, err)
})
defer ts.Close()
httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", &buffer)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
httpRequest = httpRequest.WithContext(ctx)
httpRequest.ContentLength = int64(buffer.Len())
httpRequest.Header.Set("Content-Type", writer.FormDataContentType())
response := httptest.NewRecorder()
handler := newProxy(ts.URL)
HandleFileUploads(response, httpRequest, handler, &api.Response{TempPath: tempPath}, &testFormProcessor{})
testhelper.AssertResponseCode(t, response, 422)
}
func newProxy(url string) *proxy.Proxy {
parsedURL := helper.URLMustParse(url)
return proxy.NewProxy(parsedURL, "123", roundtripper.NewTestBackendRoundTripper(parsedURL))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment