Commit b42334eb authored by Matthew Holt's avatar Matthew Holt

Several improvements and bug fixes related to graceful reloads

Added a -grace flag to customize graceful shutdown period, fixed bugs related to closing file descriptors (and dup'ed fds), improved healthcheck signaling to parent, fixed a race condition with the graceful listener, etc. These improvements mainly provide better support for frequent reloading or unusual use cases of Start and Stop after a Restart (POSIX systems). This forum thread was valuable help in debugging: https://forum.golangbridge.org/t/bind-address-already-in-use-even-after-listener-closed/1510?u=matt
parent 94c746c4
...@@ -29,6 +29,7 @@ import ( ...@@ -29,6 +29,7 @@ import (
"path" "path"
"strings" "strings"
"sync" "sync"
"time"
"github.com/mholt/caddy/caddy/letsencrypt" "github.com/mholt/caddy/caddy/letsencrypt"
"github.com/mholt/caddy/server" "github.com/mholt/caddy/server"
...@@ -43,11 +44,14 @@ var ( ...@@ -43,11 +44,14 @@ var (
// If true, initialization will not show any informative output. // If true, initialization will not show any informative output.
Quiet bool Quiet bool
// HTTP2 indicates whether HTTP2 is enabled or not // HTTP2 indicates whether HTTP2 is enabled or not.
HTTP2 bool // TODO: temporary flag until http2 is standard HTTP2 bool // TODO: temporary flag until http2 is standard
// PidFile is the path to the pidfile to create // PidFile is the path to the pidfile to create.
PidFile string PidFile string
// GracefulTimeout is the maximum duration of a graceful shutdown.
GracefulTimeout time.Duration
) )
var ( var (
...@@ -91,17 +95,20 @@ const ( ...@@ -91,17 +95,20 @@ const (
) )
// Start starts Caddy with the given Caddyfile. If cdyfile // Start starts Caddy with the given Caddyfile. If cdyfile
// is nil or the process is forked from a parent as part of // is nil, the LoadCaddyfile function will be called to get
// a graceful restart, Caddy will check to see if Caddyfile // one.
// was piped from stdin and use that. It blocks until all the //
// servers are listening. // This function blocks until all the servers are listening.
// //
// If this process is a fork and no Caddyfile was piped in, // Note (POSIX): If Start is called in the child process of a
// an error will be returned (the Restart() function does this // restart more than once within the duration of the graceful
// for you automatically). If this process is NOT a fork and // cutoff (i.e. the child process called Start a first time,
// cdyfile is nil, a default configuration will be assumed. // then called Stop, then Start again within the first 5 seconds
// In any case, an error is returned if Caddy could not be // or however long GracefulTimeout is) and the Caddyfiles have
// started. // at least one listener address in common, the second Start
// may fail with "address already in use" as there's no
// guarantee that the parent process has relinquished the
// address before the grace period ends.
func Start(cdyfile Input) (err error) { func Start(cdyfile Input) (err error) {
// If we return with no errors, we must do two things: tell the // If we return with no errors, we must do two things: tell the
// parent that we succeeded and write to the pidfile. // parent that we succeeded and write to the pidfile.
...@@ -148,17 +155,6 @@ func Start(cdyfile Input) (err error) { ...@@ -148,17 +155,6 @@ func Start(cdyfile Input) (err error) {
} }
startedBefore = true startedBefore = true
// Close remaining file descriptors we may have inherited that we don't need
if IsRestart() {
for _, fdIndex := range loadedGob.ListenerFds {
file := os.NewFile(fdIndex, "")
fln, err := net.FileListener(file)
if err == nil {
fln.Close()
}
}
}
// Show initialization output // Show initialization output
if !Quiet && !IsRestart() { if !Quiet && !IsRestart() {
var checkedFdLimit bool var checkedFdLimit bool
...@@ -192,7 +188,7 @@ func startServers(groupings bindingGroup) error { ...@@ -192,7 +188,7 @@ func startServers(groupings bindingGroup) error {
errChan := make(chan error, len(groupings)) // must be buffered to allow Serve functions below to return if stopped later errChan := make(chan error, len(groupings)) // must be buffered to allow Serve functions below to return if stopped later
for _, group := range groupings { for _, group := range groupings {
s, err := server.New(group.BindAddr.String(), group.Configs) s, err := server.New(group.BindAddr.String(), group.Configs, GracefulTimeout)
if err != nil { if err != nil {
return err return err
} }
...@@ -215,7 +211,8 @@ func startServers(groupings bindingGroup) error { ...@@ -215,7 +211,8 @@ func startServers(groupings bindingGroup) error {
return errors.New("listener for " + s.Addr + " was not a ListenerFile") return errors.New("listener for " + s.Addr + " was not a ListenerFile")
} }
delete(loadedGob.ListenerFds, s.Addr) // mark it as used file.Close()
delete(loadedGob.ListenerFds, s.Addr)
} }
} }
...@@ -252,6 +249,14 @@ func startServers(groupings bindingGroup) error { ...@@ -252,6 +249,14 @@ func startServers(groupings bindingGroup) error {
serversMu.Unlock() serversMu.Unlock()
} }
// Close the remaining (unused) file descriptors to free up resources
if IsRestart() {
for key, fdIndex := range loadedGob.ListenerFds {
os.NewFile(fdIndex, "").Close()
delete(loadedGob.ListenerFds, key)
}
}
// Wait for all servers to finish starting // Wait for all servers to finish starting
startupWg.Wait() startupWg.Wait()
...@@ -276,7 +281,9 @@ func Stop() error { ...@@ -276,7 +281,9 @@ func Stop() error {
serversMu.Lock() serversMu.Lock()
for _, s := range servers { for _, s := range servers {
s.Stop() // TODO: error checking/reporting? if err := s.Stop(); err != nil {
log.Printf("[ERROR] Stopping %s: %v", s.Addr, err)
}
} }
servers = []*server.Server{} // don't reuse servers servers = []*server.Server{} // don't reuse servers
serversMu.Unlock() serversMu.Unlock()
...@@ -289,12 +296,13 @@ func Wait() { ...@@ -289,12 +296,13 @@ func Wait() {
wg.Wait() wg.Wait()
} }
// LoadCaddyfile loads a Caddyfile in a way that prioritizes // LoadCaddyfile loads a Caddyfile, prioritizing a Caddyfile
// reading from stdin pipe; otherwise it calls loader to load // piped from stdin as part of a restart (only happens on first call
// the Caddyfile. If loader does not return a Caddyfile, the // to LoadCaddyfile). If it is not a restart, this function tries
// default one will be returned. Thus, if there are no other // calling the user's loader function, and if that returns nil, then
// errors, this function always returns at least the default // this function resorts to the default configuration. Thus, if there
// Caddyfile (not the previously-used Caddyfile). // are no other errors, this function always returns at least the
// default Caddyfile.
func LoadCaddyfile(loader func() (Input, error)) (cdyfile Input, err error) { func LoadCaddyfile(loader func() (Input, error)) (cdyfile Input, err error) {
// If we are a fork, finishing the restart is highest priority; // If we are a fork, finishing the restart is highest priority;
// piped input is required in this case. // piped input is required in this case.
......
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"runtime" "runtime"
"strconv" "strconv"
"strings" "strings"
"sync"
"github.com/mholt/caddy/caddy/letsencrypt" "github.com/mholt/caddy/caddy/letsencrypt"
) )
...@@ -44,7 +45,9 @@ func checkFdlimit() { ...@@ -44,7 +45,9 @@ func checkFdlimit() {
// If this process is not a restart, this function does nothing. // If this process is not a restart, this function does nothing.
// Calling this function once this process has successfully initialized // Calling this function once this process has successfully initialized
// is vital so that the parent process can unblock and kill itself. // is vital so that the parent process can unblock and kill itself.
// This function is idempotent; it executes at most once per process.
func signalSuccessToParent() { func signalSuccessToParent() {
signalParentOnce.Do(func() {
if IsRestart() { if IsRestart() {
ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3 ppipe := os.NewFile(3, "") // parent is reading from pipe at index 3
_, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent _, err := ppipe.Write([]byte("success")) // we must send some bytes to the parent
...@@ -53,8 +56,17 @@ func signalSuccessToParent() { ...@@ -53,8 +56,17 @@ func signalSuccessToParent() {
} }
ppipe.Close() ppipe.Close()
} }
})
} }
// signalParentOnce is used to make sure that the parent is only
// signaled once; doing so more than once breaks whatever socket is
// at fd 4 (the reason for this is still unclear - to reproduce,
// call Stop() and Start() in succession at least once after a
// restart, then try loading first host of Caddyfile in the browser).
// Do not use this directly - call signalSuccessToParent instead.
var signalParentOnce sync.Once
// caddyfileGob maps bind address to index of the file descriptor // caddyfileGob maps bind address to index of the file descriptor
// in the Files array passed to the child process. It also contains // in the Files array passed to the child process. It also contains
// the caddyfile contents. Used only during graceful restarts. // the caddyfile contents. Used only during graceful restarts.
......
...@@ -84,6 +84,11 @@ func Restart(newCaddyfile Input) error { ...@@ -84,6 +84,11 @@ func Restart(newCaddyfile Input) error {
return err return err
} }
// Immediately close our dup'ed fds and the write end of our signal pipe
for _, f := range extraFiles {
f.Close()
}
// Feed Caddyfile to the child // Feed Caddyfile to the child
err = gob.NewEncoder(wpipe).Encode(cdyfileGob) err = gob.NewEncoder(wpipe).Encode(cdyfileGob)
if err != nil { if err != nil {
...@@ -92,7 +97,6 @@ func Restart(newCaddyfile Input) error { ...@@ -92,7 +97,6 @@ func Restart(newCaddyfile Input) error {
wpipe.Close() wpipe.Close()
// Determine whether child startup succeeded // Determine whether child startup succeeded
sigwpipe.Close() // close our copy of the write end of the pipe -- TODO: why?
answer, readErr := ioutil.ReadAll(sigrpipe) answer, readErr := ioutil.ReadAll(sigrpipe)
if answer == nil || len(answer) == 0 { if answer == nil || len(answer) == 0 {
cmdErr := cmd.Wait() // get exit status cmdErr := cmd.Wait() // get exit status
...@@ -103,6 +107,6 @@ func Restart(newCaddyfile Input) error { ...@@ -103,6 +107,6 @@ func Restart(newCaddyfile Input) error {
return errIncompleteRestart return errIncompleteRestart
} }
// Looks like child was successful; we can exit gracefully. // Looks like child is successful; we can exit gracefully.
return Stop() return Stop()
} }
...@@ -10,8 +10,11 @@ CHANGES ...@@ -10,8 +10,11 @@ CHANGES
- New -ca flag to customize ACME CA server URL - New -ca flag to customize ACME CA server URL
- New -revoke flag to revoke a certificate - New -revoke flag to revoke a certificate
- New -log flag to enable process log - New -log flag to enable process log
- New -pidfile flag to enable writing pidfile
- New -grace flag to customize the graceful shutdown timeout
- browse: Render filenames with multiple whitespace properly - browse: Render filenames with multiple whitespace properly
- markdown: Include Last-Modified header in response - markdown: Include Last-Modified header in response
- markdown: Render tables, strikethrough, and fenced code blocks
- startup, shutdown: Better Windows support - startup, shutdown: Better Windows support
- templates: Bug fix for .Host when port is absent - templates: Bug fix for .Host when port is absent
- templates: Include Last-Modified header in response - templates: Include Last-Modified header in response
......
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"runtime" "runtime"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/mholt/caddy/caddy" "github.com/mholt/caddy/caddy"
"github.com/mholt/caddy/caddy/letsencrypt" "github.com/mholt/caddy/caddy/letsencrypt"
...@@ -34,6 +35,7 @@ func init() { ...@@ -34,6 +35,7 @@ func init() {
flag.StringVar(&conf, "conf", "", "Configuration file to use (default="+caddy.DefaultConfigFile+")") flag.StringVar(&conf, "conf", "", "Configuration file to use (default="+caddy.DefaultConfigFile+")")
flag.StringVar(&cpu, "cpu", "100%", "CPU cap") flag.StringVar(&cpu, "cpu", "100%", "CPU cap")
flag.StringVar(&letsencrypt.DefaultEmail, "email", "", "Default Let's Encrypt account email address") flag.StringVar(&letsencrypt.DefaultEmail, "email", "", "Default Let's Encrypt account email address")
flag.DurationVar(&caddy.GracefulTimeout, "grace", 5*time.Second, "Maximum duration of graceful shutdown")
flag.StringVar(&caddy.Host, "host", caddy.DefaultHost, "Default host") flag.StringVar(&caddy.Host, "host", caddy.DefaultHost, "Default host")
flag.BoolVar(&caddy.HTTP2, "http2", true, "HTTP/2 support") // TODO: temporary flag until http2 merged into std lib flag.BoolVar(&caddy.HTTP2, "http2", true, "HTTP/2 support") // TODO: temporary flag until http2 merged into std lib
flag.StringVar(&logfile, "log", "", "Process log file") flag.StringVar(&logfile, "log", "", "Process log file")
......
...@@ -2,7 +2,6 @@ package server ...@@ -2,7 +2,6 @@ package server
import ( import (
"net" "net"
"os"
"sync" "sync"
"syscall" "syscall"
) )
...@@ -13,7 +12,9 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener { ...@@ -13,7 +12,9 @@ func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener {
gl := &gracefulListener{ListenerFile: l, stop: make(chan error), httpWg: wg} gl := &gracefulListener{ListenerFile: l, stop: make(chan error), httpWg: wg}
go func() { go func() {
<-gl.stop <-gl.stop
gl.Lock()
gl.stopped = true gl.stopped = true
gl.Unlock()
gl.stop <- gl.ListenerFile.Close() gl.stop <- gl.ListenerFile.Close()
}() }()
return gl return gl
...@@ -26,10 +27,11 @@ type gracefulListener struct { ...@@ -26,10 +27,11 @@ type gracefulListener struct {
ListenerFile ListenerFile
stop chan error stop chan error
stopped bool stopped bool
sync.Mutex // protects the stopped flag
httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections
} }
// Accept accepts a connection. This type wraps // Accept accepts a connection.
func (gl *gracefulListener) Accept() (c net.Conn, err error) { func (gl *gracefulListener) Accept() (c net.Conn, err error) {
c, err = gl.ListenerFile.Accept() c, err = gl.ListenerFile.Accept()
if err != nil { if err != nil {
...@@ -42,18 +44,16 @@ func (gl *gracefulListener) Accept() (c net.Conn, err error) { ...@@ -42,18 +44,16 @@ func (gl *gracefulListener) Accept() (c net.Conn, err error) {
// Close immediately closes the listener. // Close immediately closes the listener.
func (gl *gracefulListener) Close() error { func (gl *gracefulListener) Close() error {
gl.Lock()
if gl.stopped { if gl.stopped {
gl.Unlock()
return syscall.EINVAL return syscall.EINVAL
} }
gl.Unlock()
gl.stop <- nil gl.stop <- nil
return <-gl.stop return <-gl.stop
} }
// File implements ListenerFile; it gets the file of the listening socket.
func (gl *gracefulListener) File() (*os.File, error) {
return gl.ListenerFile.File()
}
// gracefulConn represents a connection on a // gracefulConn represents a connection on a
// gracefulListener so that we can keep track // gracefulListener so that we can keep track
// of the number of connections, thus facilitating // of the number of connections, thus facilitating
......
...@@ -8,7 +8,6 @@ import ( ...@@ -8,7 +8,6 @@ import (
"crypto/x509" "crypto/x509"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log"
"net" "net"
"net/http" "net/http"
"os" "os"
...@@ -33,6 +32,7 @@ type Server struct { ...@@ -33,6 +32,7 @@ type Server struct {
listenerMu sync.Mutex // protects listener listenerMu sync.Mutex // protects listener
httpWg sync.WaitGroup // used to wait on outstanding connections httpWg sync.WaitGroup // used to wait on outstanding connections
startChan chan struct{} // used to block until server is finished starting startChan chan struct{} // used to block until server is finished starting
connTimeout time.Duration // the maximum duration of a graceful shutdown
} }
// ListenerFile represents a listener. // ListenerFile represents a listener.
...@@ -42,14 +42,17 @@ type ListenerFile interface { ...@@ -42,14 +42,17 @@ type ListenerFile interface {
} }
// New creates a new Server which will bind to addr and serve // New creates a new Server which will bind to addr and serve
// the sites/hosts configured in configs. This function does // the sites/hosts configured in configs. Its listener will
// not start serving. // gracefully close when the server is stopped which will take
// no longer than gracefulTimeout.
//
// This function does not start serving.
// //
// Do not re-use a server (start, stop, then start again). We // Do not re-use a server (start, stop, then start again). We
// could probably add more locking to make this possible, but // could probably add more locking to make this possible, but
// as it stands, you should dispose of a server after stopping it. // as it stands, you should dispose of a server after stopping it.
// The behavior of serving with a spent server is undefined. // The behavior of serving with a spent server is undefined.
func New(addr string, configs []Config) (*Server, error) { func New(addr string, configs []Config, gracefulTimeout time.Duration) (*Server, error) {
var tls bool var tls bool
if len(configs) > 0 { if len(configs) > 0 {
tls = configs[0].TLS.Enabled tls = configs[0].TLS.Enabled
...@@ -66,6 +69,7 @@ func New(addr string, configs []Config) (*Server, error) { ...@@ -66,6 +69,7 @@ func New(addr string, configs []Config) (*Server, error) {
tls: tls, tls: tls,
vhosts: make(map[string]virtualHost), vhosts: make(map[string]virtualHost),
startChan: make(chan struct{}), startChan: make(chan struct{}),
connTimeout: gracefulTimeout,
} }
s.Handler = s // this is weird, but whatever s.Handler = s // this is weird, but whatever
...@@ -241,7 +245,7 @@ func serveTLSWithSNI(s *Server, ln net.Listener, tlsConfigs []TLSConfig) error { ...@@ -241,7 +245,7 @@ func serveTLSWithSNI(s *Server, ln net.Listener, tlsConfigs []TLSConfig) error {
// connections to close (up to a max timeout of a few // connections to close (up to a max timeout of a few
// seconds); on Windows it will close the listener // seconds); on Windows it will close the listener
// immediately. // immediately.
func (s *Server) Stop() error { func (s *Server) Stop() (err error) {
s.Server.SetKeepAlivesEnabled(false) s.Server.SetKeepAlivesEnabled(false)
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
...@@ -256,20 +260,19 @@ func (s *Server) Stop() error { ...@@ -256,20 +260,19 @@ func (s *Server) Stop() error {
// Wait for remaining connections to finish or // Wait for remaining connections to finish or
// force them all to close after timeout // force them all to close after timeout
select { select {
case <-time.After(5 * time.Second): // TODO: make configurable case <-time.After(s.connTimeout):
case <-done: case <-done:
} }
} }
// Close the listener now; this stops the server without delay // Close the listener now; this stops the server without delay
s.listenerMu.Lock() s.listenerMu.Lock()
err := s.listener.Close() if s.listener != nil {
s.listenerMu.Unlock() err = s.listener.Close()
if err != nil {
log.Printf("[ERROR] Closing listener for %s: %v", s.Addr, err)
} }
s.listenerMu.Unlock()
return err return
} }
// WaitUntilStarted blocks until the server s is started, meaning // WaitUntilStarted blocks until the server s is started, meaning
...@@ -279,12 +282,17 @@ func (s *Server) WaitUntilStarted() { ...@@ -279,12 +282,17 @@ func (s *Server) WaitUntilStarted() {
<-s.startChan <-s.startChan
} }
// ListenerFd gets the file descriptor of the listener. // ListenerFd gets a dup'ed file of the listener. If there
// is no underlying file, the return value will be nil. It
// is the caller's responsibility to close the file.
func (s *Server) ListenerFd() *os.File { func (s *Server) ListenerFd() *os.File {
s.listenerMu.Lock() s.listenerMu.Lock()
defer s.listenerMu.Unlock() defer s.listenerMu.Unlock()
if s.listener != nil {
file, _ := s.listener.File() file, _ := s.listener.File()
return file return file
}
return nil
} }
// ServeHTTP is the entry point for every request to the address that s // ServeHTTP is the entry point for every request to the address that s
...@@ -370,7 +378,7 @@ func setupClientAuth(tlsConfigs []TLSConfig, config *tls.Config) error { ...@@ -370,7 +378,7 @@ func setupClientAuth(tlsConfigs []TLSConfig, config *tls.Config) error {
// RunFirstStartupFuncs runs all of the server's FirstStartup // RunFirstStartupFuncs runs all of the server's FirstStartup
// callback functions unless one of them returns an error first. // callback functions unless one of them returns an error first.
// It is up the caller's responsibility to call this only once and // It is the caller's responsibility to call this only once and
// at the correct time. The functions here should not be executed // at the correct time. The functions here should not be executed
// at restarts or where the user does not explicitly start a new // at restarts or where the user does not explicitly start a new
// instance of the server. // instance of the server.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment