fs.go 17.5 KB
Newer Older
Russ Cox's avatar
Russ Cox committed
1 2 3 4 5 6 7 8 9
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// HTTP file system request handler

package http

import (
10
	"errors"
11 12
	"fmt"
	"io"
13
	"mime"
14 15
	"mime/multipart"
	"net/textproto"
16
	"net/url"
17
	"os"
18
	"path"
19
	"path/filepath"
20
	"sort"
21
	"strconv"
22
	"strings"
23
	"time"
Russ Cox's avatar
Russ Cox committed
24 25
)

26 27 28 29 30 31
// A Dir implements FileSystem using the native file system restricted to a
// specific directory tree.
//
// While the FileSystem.Open method takes '/'-separated paths, a Dir's string
// value is a filename on the native file system, not a URL, so it is separated
// by filepath.Separator, which isn't necessarily '/'.
32 33
//
// An empty Dir is treated as ".".
34 35
type Dir string

36
func (d Dir) Open(name string) (File, error) {
37 38
	if filepath.Separator != '/' && strings.IndexRune(name, filepath.Separator) >= 0 ||
		strings.Contains(name, "\x00") {
39
		return nil, errors.New("http: invalid character in file path")
40
	}
41 42 43 44 45
	dir := string(d)
	if dir == "" {
		dir = "."
	}
	f, err := os.Open(filepath.Join(dir, filepath.FromSlash(path.Clean("/"+name))))
46 47 48 49 50 51 52 53 54 55
	if err != nil {
		return nil, err
	}
	return f, nil
}

// A FileSystem implements access to a collection of named files.
// The elements in a file path are separated by slash ('/', U+002F)
// characters, regardless of host operating system convention.
type FileSystem interface {
56
	Open(name string) (File, error)
57 58 59 60
}

// A File is returned by a FileSystem's Open method and can be
// served by the FileServer implementation.
61 62
//
// The methods should behave the same as those on an *os.File.
63
type File interface {
64 65
	io.Closer
	io.Reader
66 67
	Readdir(count int) ([]os.FileInfo, error)
	Seek(offset int64, whence int) (int64, error)
68
	Stat() (os.FileInfo, error)
69 70 71
}

func dirList(w ResponseWriter, f File) {
72 73 74 75 76 77 78 79 80 81
	dirs, err := f.Readdir(-1)
	if err != nil {
		// TODO: log err.Error() to the Server.ErrorLog, once it's possible
		// for a handler to get at its Server via the ResponseWriter. See
		// Issue 12438.
		Error(w, "Error reading directory", StatusInternalServerError)
		return
	}
	sort.Sort(byName(dirs))

82
	w.Header().Set("Content-Type", "text/html; charset=utf-8")
83
	fmt.Fprintf(w, "<pre>\n")
84 85 86 87
	for _, d := range dirs {
		name := d.Name()
		if d.IsDir() {
			name += "/"
Russ Cox's avatar
Russ Cox committed
88
		}
89 90 91 92 93
		// name may contain '?' or '#', which must be escaped to remain
		// part of the URL path, and not indicate the start of a query
		// string or fragment.
		url := url.URL{Path: name}
		fmt.Fprintf(w, "<a href=\"%s\">%s</a>\n", url.String(), htmlReplacer.Replace(name))
Russ Cox's avatar
Russ Cox committed
94
	}
95
	fmt.Fprintf(w, "</pre>\n")
Russ Cox's avatar
Russ Cox committed
96 97
}

98 99 100 101 102 103 104 105 106 107 108 109
// ServeContent replies to the request using the content in the
// provided ReadSeeker.  The main benefit of ServeContent over io.Copy
// is that it handles Range requests properly, sets the MIME type, and
// handles If-Modified-Since requests.
//
// If the response's Content-Type header is not set, ServeContent
// first tries to deduce the type from name's file extension and,
// if that fails, falls back to reading the first block of the content
// and passing it to DetectContentType.
// The name is otherwise unused; in particular it can be empty and is
// never sent in the response.
//
110 111 112 113
// If modtime is not the zero time or Unix epoch, ServeContent
// includes it in a Last-Modified header in the response.  If the
// request includes an If-Modified-Since header, ServeContent uses
// modtime to decide whether the content needs to be sent at all.
114 115 116 117
//
// The content's Seek method must work: ServeContent uses
// a seek to the end of the content to determine its size.
//
118 119 120
// If the caller has set w's ETag header, ServeContent uses it to
// handle requests using If-Range and If-None-Match.
//
121 122
// Note that *os.File implements the io.ReadSeeker interface.
func ServeContent(w ResponseWriter, req *Request, name string, modtime time.Time, content io.ReadSeeker) {
123 124 125 126 127 128 129 130 131 132
	sizeFunc := func() (int64, error) {
		size, err := content.Seek(0, os.SEEK_END)
		if err != nil {
			return 0, errSeeker
		}
		_, err = content.Seek(0, os.SEEK_SET)
		if err != nil {
			return 0, errSeeker
		}
		return size, nil
133
	}
134
	serveContent(w, req, name, modtime, sizeFunc, content)
135 136
}

137 138 139 140 141 142
// errSeeker is returned by ServeContent's sizeFunc when the content
// doesn't seek properly. The underlying Seeker's error text isn't
// included in the sizeFunc reply so it's not sent over HTTP to end
// users.
var errSeeker = errors.New("seeker can't seek")

143 144 145
// if name is empty, filename is unknown. (used for mime type, before sniffing)
// if modtime.IsZero(), modtime is unknown.
// content must be seeked to the beginning of the file.
146 147
// The sizeFunc is called at most once. Its error, if any, is sent in the HTTP response.
func serveContent(w ResponseWriter, r *Request, name string, modtime time.Time, sizeFunc func() (int64, error), content io.ReadSeeker) {
148 149 150
	if checkLastModified(w, r, modtime) {
		return
	}
151
	rangeReq, done := checkETag(w, r, modtime)
152 153 154
	if done {
		return
	}
155 156 157

	code := StatusOK

158 159 160 161 162
	// If Content-Type isn't set, use the file's extension to find it, but
	// if the Content-Type is unset explicitly, do not sniff the type.
	ctypes, haveType := w.Header()["Content-Type"]
	var ctype string
	if !haveType {
163
		ctype = mime.TypeByExtension(filepath.Ext(name))
164 165
		if ctype == "" {
			// read a chunk to decide between utf-8 text and binary
Brad Fitzpatrick's avatar
Brad Fitzpatrick committed
166
			var buf [sniffLen]byte
167
			n, _ := io.ReadFull(content, buf[:])
Brad Fitzpatrick's avatar
Brad Fitzpatrick committed
168
			ctype = DetectContentType(buf[:n])
169 170 171 172 173 174 175
			_, err := content.Seek(0, os.SEEK_SET) // rewind to output whole file
			if err != nil {
				Error(w, "seeker can't seek", StatusInternalServerError)
				return
			}
		}
		w.Header().Set("Content-Type", ctype)
176 177
	} else if len(ctypes) > 0 {
		ctype = ctypes[0]
178 179
	}

180 181 182 183 184 185
	size, err := sizeFunc()
	if err != nil {
		Error(w, err.Error(), StatusInternalServerError)
		return
	}

186 187
	// handle Content-Range header.
	sendSize := size
188
	var sendContent io.Reader = content
189
	if size >= 0 {
190
		ranges, err := parseRange(rangeReq, size)
191 192 193 194
		if err != nil {
			Error(w, err.Error(), StatusRequestedRangeNotSatisfiable)
			return
		}
195
		if sumRangesSize(ranges) > size {
196
			// The total number of bytes in all the ranges
197
			// is larger than the size of the file by
198 199 200 201
			// itself, so this is probably an attack, or a
			// dumb client.  Ignore the range request.
			ranges = nil
		}
202 203 204 205 206 207 208 209 210 211 212 213 214
		switch {
		case len(ranges) == 1:
			// RFC 2616, Section 14.16:
			// "When an HTTP message includes the content of a single
			// range (for example, a response to a request for a
			// single range, or to a request for a set of ranges
			// that overlap without any holes), this content is
			// transmitted with a Content-Range header, and a
			// Content-Length header showing the number of bytes
			// actually transferred.
			// ...
			// A response to a request for a single range MUST NOT
			// be sent using the multipart/byteranges media type."
215 216 217 218 219 220 221
			ra := ranges[0]
			if _, err := content.Seek(ra.start, os.SEEK_SET); err != nil {
				Error(w, err.Error(), StatusRequestedRangeNotSatisfiable)
				return
			}
			sendSize = ra.length
			code = StatusPartialContent
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
			w.Header().Set("Content-Range", ra.contentRange(size))
		case len(ranges) > 1:
			sendSize = rangesMIMESize(ranges, ctype, size)
			code = StatusPartialContent

			pr, pw := io.Pipe()
			mw := multipart.NewWriter(pw)
			w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
			sendContent = pr
			defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
			go func() {
				for _, ra := range ranges {
					part, err := mw.CreatePart(ra.mimeHeader(ctype, size))
					if err != nil {
						pw.CloseWithError(err)
						return
					}
					if _, err := content.Seek(ra.start, os.SEEK_SET); err != nil {
						pw.CloseWithError(err)
						return
					}
					if _, err := io.CopyN(part, content, ra.length); err != nil {
						pw.CloseWithError(err)
						return
					}
				}
				mw.Close()
				pw.Close()
			}()
251 252 253 254 255 256 257 258 259 260 261
		}

		w.Header().Set("Accept-Ranges", "bytes")
		if w.Header().Get("Content-Encoding") == "" {
			w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
		}
	}

	w.WriteHeader(code)

	if r.Method != "HEAD" {
262
		io.CopyN(w, sendContent, sendSize)
263 264 265
	}
}

266 267
var unixEpochTime = time.Unix(0, 0)

268 269 270
// modtime is the modification time of the resource to be served, or IsZero().
// return value is whether this request is now complete.
func checkLastModified(w ResponseWriter, r *Request, modtime time.Time) bool {
271 272 273 274
	if modtime.IsZero() || modtime.Equal(unixEpochTime) {
		// If the file doesn't have a modtime (IsZero), or the modtime
		// is obviously garbage (Unix time == 0), then ignore modtimes
		// and don't process the If-Modified-Since header.
275 276
		return false
	}
277 278 279 280

	// The Date-Modified header truncates sub-second precision, so
	// use mtime < t+1s instead of mtime <= t to check for unmodified.
	if t, err := time.Parse(TimeFormat, r.Header.Get("If-Modified-Since")); err == nil && modtime.Before(t.Add(1*time.Second)) {
281 282 283
		h := w.Header()
		delete(h, "Content-Type")
		delete(h, "Content-Length")
284 285 286 287 288 289 290
		w.WriteHeader(StatusNotModified)
		return true
	}
	w.Header().Set("Last-Modified", modtime.UTC().Format(TimeFormat))
	return false
}

291
// checkETag implements If-None-Match and If-Range checks.
292 293 294 295
//
// The ETag or modtime must have been previously set in the
// ResponseWriter's headers.  The modtime is only compared at second
// granularity and may be the zero value to mean unknown.
296 297 298
//
// The return value is the effective request "Range" header to use and
// whether this request is now considered done.
299
func checkETag(w ResponseWriter, r *Request, modtime time.Time) (rangeReq string, done bool) {
300 301 302 303 304 305 306 307 308 309
	etag := w.Header().get("Etag")
	rangeReq = r.Header.get("Range")

	// Invalidate the range request if the entity doesn't match the one
	// the client was expecting.
	// "If-Range: version" means "ignore the Range: header unless version matches the
	// current file."
	// We only support ETag versions.
	// The caller must have set the ETag on the response already.
	if ir := r.Header.get("If-Range"); ir != "" && ir != etag {
310 311 312 313 314 315 316 317 318 319 320
		// The If-Range value is typically the ETag value, but it may also be
		// the modtime date. See golang.org/issue/8367.
		timeMatches := false
		if !modtime.IsZero() {
			if t, err := ParseTime(ir); err == nil && t.Unix() == modtime.Unix() {
				timeMatches = true
			}
		}
		if !timeMatches {
			rangeReq = ""
		}
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
	}

	if inm := r.Header.get("If-None-Match"); inm != "" {
		// Must know ETag.
		if etag == "" {
			return rangeReq, false
		}

		// TODO(bradfitz): non-GET/HEAD requests require more work:
		// sending a different status code on matches, and
		// also can't use weak cache validators (those with a "W/
		// prefix).  But most users of ServeContent will be using
		// it on GET or HEAD, so only support those for now.
		if r.Method != "GET" && r.Method != "HEAD" {
			return rangeReq, false
		}

		// TODO(bradfitz): deal with comma-separated or multiple-valued
		// list of If-None-match values.  For now just handle the common
		// case of a single item.
		if inm == etag || inm == "*" {
			h := w.Header()
			delete(h, "Content-Type")
			delete(h, "Content-Length")
			w.WriteHeader(StatusNotModified)
			return "", true
		}
	}
	return rangeReq, false
}

352 353
// name is '/'-separated, not filepath.Separator.
func serveFile(w ResponseWriter, r *Request, fs FileSystem, name string, redirect bool) {
354
	const indexPage = "/index.html"
Russ Cox's avatar
Russ Cox committed
355

Russ Cox's avatar
Russ Cox committed
356
	// redirect .../index.html to .../
357 358
	// can't use Redirect() because that would make the path absolute,
	// which would be a problem running under StripPrefix
Russ Cox's avatar
Russ Cox committed
359
	if strings.HasSuffix(r.URL.Path, indexPage) {
360
		localRedirect(w, r, "./")
361
		return
Russ Cox's avatar
Russ Cox committed
362 363
	}

364
	f, err := fs.Open(name)
Russ Cox's avatar
Russ Cox committed
365
	if err != nil {
366 367
		msg, code := toHTTPError(err)
		Error(w, msg, code)
368
		return
Russ Cox's avatar
Russ Cox committed
369
	}
370
	defer f.Close()
Russ Cox's avatar
Russ Cox committed
371

372 373
	d, err := f.Stat()
	if err != nil {
374 375
		msg, code := toHTTPError(err)
		Error(w, msg, code)
376
		return
Russ Cox's avatar
Russ Cox committed
377 378 379 380
	}

	if redirect {
		// redirect to canonical path: / at end of directory url
381
		// r.URL.Path always begins with /
382
		url := r.URL.Path
383
		if d.IsDir() {
Russ Cox's avatar
Russ Cox committed
384
			if url[len(url)-1] != '/' {
385
				localRedirect(w, r, path.Base(url)+"/")
386
				return
Russ Cox's avatar
Russ Cox committed
387 388 389
			}
		} else {
			if url[len(url)-1] == '/' {
390
				localRedirect(w, r, "../"+path.Base(url))
391
				return
Russ Cox's avatar
Russ Cox committed
392 393 394 395 396
			}
		}
	}

	// use contents of index.html for directory, if present
397
	if d.IsDir() {
398
		index := strings.TrimSuffix(name, "/") + indexPage
399
		ff, err := fs.Open(index)
Russ Cox's avatar
Russ Cox committed
400
		if err == nil {
401 402
			defer ff.Close()
			dd, err := ff.Stat()
Russ Cox's avatar
Russ Cox committed
403
			if err == nil {
404 405 406
				name = index
				d = dd
				f = ff
Russ Cox's avatar
Russ Cox committed
407 408 409 410
			}
		}
	}

411
	// Still a directory? (we didn't find an index.html file)
412
	if d.IsDir() {
413 414 415
		if checkLastModified(w, r, d.ModTime()) {
			return
		}
416
		dirList(w, f)
417
		return
Russ Cox's avatar
Russ Cox committed
418 419
	}

420
	// serveContent will check modification time
421 422
	sizeFunc := func() (int64, error) { return d.Size(), nil }
	serveContent(w, r, d.Name(), d.ModTime(), sizeFunc, f)
Russ Cox's avatar
Russ Cox committed
423 424
}

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
// toHTTPError returns a non-specific HTTP error message and status code
// for a given non-nil error value. It's important that toHTTPError does not
// actually return err.Error(), since msg and httpStatus are returned to users,
// and historically Go's ServeContent always returned just "404 Not Found" for
// all errors. We don't want to start leaking information in error messages.
func toHTTPError(err error) (msg string, httpStatus int) {
	if os.IsNotExist(err) {
		return "404 page not found", StatusNotFound
	}
	if os.IsPermission(err) {
		return "403 Forbidden", StatusForbidden
	}
	// Default:
	return "500 Internal Server Error", StatusInternalServerError
}

441 442 443 444 445 446 447 448 449 450
// localRedirect gives a Moved Permanently response.
// It does not convert relative paths to absolute paths like Redirect does.
func localRedirect(w ResponseWriter, r *Request, newPath string) {
	if q := r.URL.RawQuery; q != "" {
		newPath += "?" + q
	}
	w.Header().Set("Location", newPath)
	w.WriteHeader(StatusMovedPermanently)
}

451 452 453 454 455 456 457
// ServeFile replies to the request with the contents of the named
// file or directory.
//
// As a special case, ServeFile redirects any request where r.URL.Path
// ends in "/index.html" to the same path, without the final
// "index.html". To avoid such redirects either modify the path or
// use ServeContent.
458
func ServeFile(w ResponseWriter, r *Request, name string) {
459 460
	dir, file := filepath.Split(name)
	serveFile(w, r, Dir(dir), file, false)
Russ Cox's avatar
Russ Cox committed
461 462 463
}

type fileHandler struct {
464
	root FileSystem
Russ Cox's avatar
Russ Cox committed
465 466 467 468
}

// FileServer returns a handler that serves HTTP requests
// with the contents of the file system rooted at root.
469 470 471 472 473
//
// To use the operating system's file system implementation,
// use http.Dir:
//
//     http.Handle("/", http.FileServer(http.Dir("/tmp")))
474 475 476 477
//
// As a special case, the returned file server redirects any request
// ending in "/index.html" to the same path, without the final
// "index.html".
478 479
func FileServer(root FileSystem) Handler {
	return &fileHandler{root}
480
}
Russ Cox's avatar
Russ Cox committed
481

482
func (f *fileHandler) ServeHTTP(w ResponseWriter, r *Request) {
483 484 485 486 487 488
	upath := r.URL.Path
	if !strings.HasPrefix(upath, "/") {
		upath = "/" + upath
		r.URL.Path = upath
	}
	serveFile(w, r, f.root, path.Clean(upath), true)
Russ Cox's avatar
Russ Cox committed
489
}
490 491 492 493 494 495

// httpRange specifies the byte range to be sent to the client.
type httpRange struct {
	start, length int64
}

496 497 498 499 500 501 502 503 504 505 506
func (r httpRange) contentRange(size int64) string {
	return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size)
}

func (r httpRange) mimeHeader(contentType string, size int64) textproto.MIMEHeader {
	return textproto.MIMEHeader{
		"Content-Range": {r.contentRange(size)},
		"Content-Type":  {contentType},
	}
}

507
// parseRange parses a Range header string as per RFC 2616.
508
func parseRange(s string, size int64) ([]httpRange, error) {
509 510 511 512 513
	if s == "" {
		return nil, nil // header not present
	}
	const b = "bytes="
	if !strings.HasPrefix(s, b) {
514
		return nil, errors.New("invalid range")
515 516
	}
	var ranges []httpRange
517
	for _, ra := range strings.Split(s[len(b):], ",") {
518 519 520 521
		ra = strings.TrimSpace(ra)
		if ra == "" {
			continue
		}
522
		i := strings.Index(ra, "-")
523
		if i < 0 {
524
			return nil, errors.New("invalid range")
525
		}
526
		start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
527 528 529 530
		var r httpRange
		if start == "" {
			// If no start is specified, end specifies the
			// range start relative to the end of the file.
Russ Cox's avatar
Russ Cox committed
531
			i, err := strconv.ParseInt(end, 10, 64)
532
			if err != nil {
533
				return nil, errors.New("invalid range")
534 535 536 537 538 539 540
			}
			if i > size {
				i = size
			}
			r.start = size - i
			r.length = size - r.start
		} else {
Russ Cox's avatar
Russ Cox committed
541
			i, err := strconv.ParseInt(start, 10, 64)
542
			if err != nil || i >= size || i < 0 {
543
				return nil, errors.New("invalid range")
544 545 546 547 548 549
			}
			r.start = i
			if end == "" {
				// If no end is specified, range extends to end of the file.
				r.length = size - r.start
			} else {
Russ Cox's avatar
Russ Cox committed
550
				i, err := strconv.ParseInt(end, 10, 64)
551
				if err != nil || r.start > i {
552
					return nil, errors.New("invalid range")
553 554 555 556 557 558 559 560 561 562 563
				}
				if i >= size {
					i = size - 1
				}
				r.length = i - r.start + 1
			}
		}
		ranges = append(ranges, r)
	}
	return ranges, nil
}
564 565 566 567 568 569 570 571 572

// countingWriter counts how many bytes have been written to it.
type countingWriter int64

func (w *countingWriter) Write(p []byte) (n int, err error) {
	*w += countingWriter(len(p))
	return len(p), nil
}

573
// rangesMIMESize returns the number of bytes it takes to encode the
574 575 576 577 578 579 580 581 582 583 584 585
// provided ranges as a multipart response.
func rangesMIMESize(ranges []httpRange, contentType string, contentSize int64) (encSize int64) {
	var w countingWriter
	mw := multipart.NewWriter(&w)
	for _, ra := range ranges {
		mw.CreatePart(ra.mimeHeader(contentType, contentSize))
		encSize += ra.length
	}
	mw.Close()
	encSize += int64(w)
	return
}
586 587 588 589 590 591 592

func sumRangesSize(ranges []httpRange) (size int64) {
	for _, ra := range ranges {
		size += ra.length
	}
	return
}
593 594 595 596 597 598

type byName []os.FileInfo

func (s byName) Len() int           { return len(s) }
func (s byName) Less(i, j int) bool { return s[i].Name() < s[j].Name() }
func (s byName) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }