Commit 5ecec3bc authored by Alessio Caiazza's avatar Alessio Caiazza

Merge branch 'eb-workhorse-javascript-content-type-text-plain' into 'master'

Handle application javascript content type to be safe

See merge request gitlab-org/gitlab!83632
parents 7482ed82 8a1dcaa6
......@@ -8,28 +8,37 @@ import (
)
var (
ImageTypeRegex = regexp.MustCompile(`^image/*`)
SvgMimeTypeRegex = regexp.MustCompile(`^image/svg\+xml$`)
javaScriptTypeRegex = regexp.MustCompile(`^(text|application)\/javascript$`)
TextTypeRegex = regexp.MustCompile(`^text/*`)
imageTypeRegex = regexp.MustCompile(`^image/*`)
svgMimeTypeRegex = regexp.MustCompile(`^image/svg\+xml$`)
VideoTypeRegex = regexp.MustCompile(`^video/*`)
textTypeRegex = regexp.MustCompile(`^text/*`)
PdfTypeRegex = regexp.MustCompile(`application\/pdf`)
videoTypeRegex = regexp.MustCompile(`^video/*`)
AttachmentRegex = regexp.MustCompile(`^attachment`)
InlineRegex = regexp.MustCompile(`^inline`)
pdfTypeRegex = regexp.MustCompile(`application\/pdf`)
attachmentRegex = regexp.MustCompile(`^attachment`)
inlineRegex = regexp.MustCompile(`^inline`)
)
// Mime types that can't be inlined. Usually subtypes of main types
var forbiddenInlineTypes = []*regexp.Regexp{SvgMimeTypeRegex}
var forbiddenInlineTypes = []*regexp.Regexp{svgMimeTypeRegex}
// Mime types that can be inlined. We can add global types like "image/" or
// specific types like "text/plain". If there is a specific type inside a global
// allowed type that can't be inlined we must add it to the forbiddenInlineTypes var.
// One example of this is the mime type "image". We allow all images to be
// inlined except for SVGs.
var allowedInlineTypes = []*regexp.Regexp{ImageTypeRegex, TextTypeRegex, VideoTypeRegex, PdfTypeRegex}
var allowedInlineTypes = []*regexp.Regexp{imageTypeRegex, textTypeRegex, videoTypeRegex, pdfTypeRegex}
const (
svgContentType = "image/svg+xml"
textPlainContentType = "text/plain; charset=utf-8"
attachmentDispositionText = "attachment"
inlineDispositionText = "inline"
)
func SafeContentHeaders(data []byte, contentDisposition string) (string, string) {
contentType := safeContentType(data)
......@@ -40,16 +49,24 @@ func SafeContentHeaders(data []byte, contentDisposition string) (string, string)
func safeContentType(data []byte) string {
// Special case for svg because DetectContentType detects it as text
if svg.Is(data) {
return "image/svg+xml"
return svgContentType
}
// Override any existing Content-Type header from other ResponseWriters
contentType := http.DetectContentType(data)
// http.DetectContentType does not support JavaScript and would only
// return text/plain. But for cautionary measures, just in case they start supporting
// it down the road and start returning application/javascript, we want to handle it now
// to avoid regressions.
if isType(contentType, javaScriptTypeRegex) {
return textPlainContentType
}
// If the content is text type, we set to plain, because we don't
// want to render it inline if they're html or javascript
if isType(contentType, TextTypeRegex) {
return "text/plain; charset=utf-8"
if isType(contentType, textTypeRegex) {
return textPlainContentType
}
return contentType
......@@ -58,7 +75,7 @@ func safeContentType(data []byte) string {
func safeContentDisposition(contentType string, contentDisposition string) string {
// If the existing disposition is attachment we return that. This allow us
// to force a download from GitLab (ie: RawController)
if AttachmentRegex.MatchString(contentDisposition) {
if attachmentRegex.MatchString(contentDisposition) {
return contentDisposition
}
......@@ -82,11 +99,11 @@ func safeContentDisposition(contentType string, contentDisposition string) strin
func attachmentDisposition(contentDisposition string) string {
if contentDisposition == "" {
return "attachment"
return attachmentDispositionText
}
if InlineRegex.MatchString(contentDisposition) {
return InlineRegex.ReplaceAllString(contentDisposition, "attachment")
if inlineRegex.MatchString(contentDisposition) {
return inlineRegex.ReplaceAllString(contentDisposition, attachmentDispositionText)
}
return contentDisposition
......@@ -94,11 +111,11 @@ func attachmentDisposition(contentDisposition string) string {
func inlineDisposition(contentDisposition string) string {
if contentDisposition == "" {
return "inline"
return inlineDispositionText
}
if AttachmentRegex.MatchString(contentDisposition) {
return AttachmentRegex.ReplaceAllString(contentDisposition, "inline")
if attachmentRegex.MatchString(contentDisposition) {
return attachmentRegex.ReplaceAllString(contentDisposition, inlineDispositionText)
}
return contentDisposition
......
......@@ -56,11 +56,17 @@ func TestSetProperContentTypeAndDisposition(t *testing.T) {
body: "<html><body>Hello world!</body></html>",
},
{
desc: "Javascript type",
desc: "Javascript within HTML type",
contentType: "text/plain; charset=utf-8",
contentDisposition: "inline",
body: "<script>alert(\"foo\")</script>",
},
{
desc: "Javascript type",
contentType: "text/plain; charset=utf-8",
contentDisposition: "inline",
body: "alert(\"foo\")",
},
{
desc: "Image type",
contentType: "image/png",
......@@ -170,25 +176,41 @@ func TestSetProperContentTypeAndDisposition(t *testing.T) {
}
func TestFailOverrideContentType(t *testing.T) {
testCase := struct {
contentType string
body string
testCases := []struct {
desc string
overrideFromUpstream string
responseContentType string
body string
}{
contentType: "text/plain; charset=utf-8",
body: "<html><body>Hello world!</body></html>",
{
desc: "Force text/html into text/plain",
responseContentType: "text/plain; charset=utf-8",
overrideFromUpstream: "text/html; charset=utf-8",
body: "<html><body>Hello world!</body></html>",
},
{
desc: "Force application/javascript into text/plain",
responseContentType: "text/plain; charset=utf-8",
overrideFromUpstream: "application/javascript; charset=utf-8",
body: "alert(1);",
},
}
h := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
// We are pretending to be upstream or an inner layer of the ResponseWriter chain
w.Header().Set(headers.GitlabWorkhorseDetectContentTypeHeader, "true")
w.Header().Set(headers.ContentTypeHeader, "text/html; charset=utf-8")
_, err := io.WriteString(w, testCase.body)
require.NoError(t, err)
})
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
h := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
// We are pretending to be upstream or an inner layer of the ResponseWriter chain
w.Header().Set(headers.GitlabWorkhorseDetectContentTypeHeader, "true")
w.Header().Set(headers.ContentTypeHeader, tc.overrideFromUpstream)
_, err := io.WriteString(w, tc.body)
require.NoError(t, err)
})
resp := makeRequest(t, h, testCase.body, "")
resp := makeRequest(t, h, tc.body, "")
require.Equal(t, testCase.contentType, resp.Header.Get(headers.ContentTypeHeader))
require.Equal(t, tc.responseContentType, resp.Header.Get(headers.ContentTypeHeader))
})
}
}
func TestSuccessOverrideContentDispositionFromInlineToAttachment(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment