Commit b5b31e39 authored by Matthew Holt's avatar Matthew Holt

letsencrypt: Graceful restarts

Lots of refinement still needed and runs only on POSIX systems. Windows will not get true graceful restarts (for now), but we will opt for very, very quick forceful restarts. Also, server configs are no longer put into a map; it is critical that they stay ordered so that they can be matched with their sockets in the child process after forking.

This implementation of graceful restarts is probably not perfect, but it is a good start. Lots of details to attend to now.
parent f24ecee6
......@@ -7,12 +7,15 @@ package app
import (
"errors"
"log"
"os"
"os/signal"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"github.com/mholt/caddy/server"
)
......@@ -42,6 +45,75 @@ var (
Quiet bool
)
func init() {
go func() {
// Wait for signal
interrupt := make(chan os.Signal, 1)
signal.Notify(interrupt, os.Interrupt, os.Kill) // TODO: syscall.SIGTERM? Or that should not run callbacks...
<-interrupt
// Run shutdown callbacks
var exitCode int
ServersMutex.Lock()
errs := server.ShutdownCallbacks(Servers)
ServersMutex.Unlock()
if len(errs) > 0 {
for _, err := range errs {
log.Println(err)
}
exitCode = 1
}
os.Exit(exitCode)
}()
}
// Restart restarts the entire application; gracefully with zero
// downtime if on a POSIX-compatible system, or forcefully if on
// Windows but with imperceptibly-short downtime.
//
// The restarted application will use caddyfile as its input
// configuration; it will not look elsewhere for the config
// to use.
func Restart(caddyfile []byte) error {
// TODO: This is POSIX-only right now; also, os.Args[0] is required!
// TODO: Pipe the Caddyfile to stdin of child!
// TODO: Before stopping this process, verify child started successfully (valid Caddyfile, etc)
// Tell the child that it's a restart
os.Setenv("CADDY_RESTART", "true")
// Pass along current environment and file descriptors to child.
// We pass along the file descriptors explicitly to ensure proper
// order, since losing the original order will break the child.
fds := []uintptr{os.Stdin.Fd(), os.Stdout.Fd(), os.Stderr.Fd()}
// Now add file descriptors of the sockets
ServersMutex.Lock()
for _, s := range Servers {
fds = append(fds, s.ListenerFd())
}
ServersMutex.Unlock()
// Fork the process with the current environment and file descriptors
execSpec := &syscall.ProcAttr{
Env: os.Environ(),
Files: fds,
}
fork, err := syscall.ForkExec(os.Args[0], os.Args, execSpec)
if err != nil {
log.Println("FORK ERR:", err, fork)
}
// Child process is listening now; we can stop all our servers here.
ServersMutex.Lock()
for _, s := range Servers {
go s.Stop() // TODO: error checking/reporting
}
ServersMutex.Unlock()
return err
}
// SetCPU parses string cpu and sets GOMAXPROCS
// according to its value. It accepts either
// a number (e.g. 3) or a percent (e.g. 50%).
......
......@@ -153,14 +153,14 @@ func makeStorages() map[string]interface{} {
// bind address to list of configs that would become VirtualHosts on that
// server. Use the keys of the returned map to create listeners, and use
// the associated values to set up the virtualhosts.
func arrangeBindings(allConfigs []server.Config) (map[*net.TCPAddr][]server.Config, error) {
addresses := make(map[*net.TCPAddr][]server.Config)
func arrangeBindings(allConfigs []server.Config) (Group, error) {
var groupings Group
// Group configs by bind address
for _, conf := range allConfigs {
newAddr, warnErr, fatalErr := resolveAddr(conf)
bindAddr, warnErr, fatalErr := resolveAddr(conf)
if fatalErr != nil {
return addresses, fatalErr
return groupings, fatalErr
}
if warnErr != nil {
log.Println("[Warning]", warnErr)
......@@ -169,37 +169,40 @@ func arrangeBindings(allConfigs []server.Config) (map[*net.TCPAddr][]server.Conf
// Make sure to compare the string representation of the address,
// not the pointer, since a new *TCPAddr is created each time.
var existing bool
for addr := range addresses {
if addr.String() == newAddr.String() {
addresses[addr] = append(addresses[addr], conf)
for i := 0; i < len(groupings); i++ {
if groupings[i].BindAddr.String() == bindAddr.String() {
groupings[i].Configs = append(groupings[i].Configs, conf)
existing = true
break
}
}
if !existing {
addresses[newAddr] = append(addresses[newAddr], conf)
groupings = append(groupings, BindingMapping{
BindAddr: bindAddr,
Configs: []server.Config{conf},
})
}
}
// Don't allow HTTP and HTTPS to be served on the same address
for _, configs := range addresses {
isTLS := configs[0].TLS.Enabled
for _, config := range configs {
for _, group := range groupings {
isTLS := group.Configs[0].TLS.Enabled
for _, config := range group.Configs {
if config.TLS.Enabled != isTLS {
thisConfigProto, otherConfigProto := "HTTP", "HTTP"
if config.TLS.Enabled {
thisConfigProto = "HTTPS"
}
if configs[0].TLS.Enabled {
if group.Configs[0].TLS.Enabled {
otherConfigProto = "HTTPS"
}
return addresses, fmt.Errorf("configuration error: Cannot multiplex %s (%s) and %s (%s) on same address",
configs[0].Address(), otherConfigProto, config.Address(), thisConfigProto)
return groupings, fmt.Errorf("configuration error: Cannot multiplex %s (%s) and %s (%s) on same address",
group.Configs[0].Address(), otherConfigProto, config.Address(), thisConfigProto)
}
}
}
return addresses, nil
return groupings, nil
}
// resolveAddr determines the address (host and port) that a config will
......@@ -291,5 +294,15 @@ var (
Port = DefaultPort
)
// BindingMapping maps a network address to configurations
// that will bind to it. The order of the configs is important.
type BindingMapping struct {
BindAddr *net.TCPAddr
Configs []server.Config
}
// Group maps network addresses to their configurations.
type Group map[*net.TCPAddr][]server.Config
// Preserving the order of the groupings is important
// (related to graceful shutdown and restart)
// so this is a slice, not a literal map.
type Group []BindingMapping
......@@ -6,12 +6,14 @@ import (
"fmt"
"io/ioutil"
"log"
"net"
"os"
"os/exec"
"path"
"runtime"
"strconv"
"strings"
"time"
"github.com/mholt/caddy/app"
"github.com/mholt/caddy/config"
......@@ -63,44 +65,70 @@ func main() {
}
// Load config from file
addresses, err := loadConfigs()
groupings, err := loadConfigs()
if err != nil {
log.Fatal(err)
}
// Start each server with its one or more configurations
for addr, configs := range addresses {
s, err := server.New(addr.String(), configs)
for i, group := range groupings {
s, err := server.New(group.BindAddr.String(), group.Configs)
if err != nil {
log.Fatal(err)
}
s.HTTP2 = app.HTTP2 // TODO: This setting is temporary
app.Wg.Add(1)
go func(s *server.Server) {
go func(s *server.Server, i int) {
defer app.Wg.Done()
err := s.Serve()
if os.Getenv("CADDY_RESTART") == "true" {
file := os.NewFile(uintptr(3+i), "")
ln, err := net.FileListener(file)
if err != nil {
log.Fatal("FILE LISTENER:", err)
}
var ok bool
ln, ok = ln.(server.ListenerFile)
if !ok {
log.Fatal("Listener was not a ListenerFile")
}
err = s.Serve(ln.(server.ListenerFile))
// TODO: Better error logging... also, is it even necessary?
if err != nil {
log.Println(err)
}
} else {
err := s.ListenAndServe()
// TODO: Better error logging... also, is it even necessary?
// For example, "use of closed network connection" is normal if doing graceful shutdown...
if err != nil {
log.Fatal(err) // kill whole process to avoid a half-alive zombie server
log.Println(err)
}
}(s)
}
}(s, i)
app.ServersMutex.Lock()
app.Servers = append(app.Servers, s)
app.ServersMutex.Unlock()
}
// Show initialization output
if !app.Quiet {
var checkedFdLimit bool
for addr, configs := range addresses {
for _, conf := range configs {
for _, group := range groupings {
for _, conf := range group.Configs {
// Print address of site
fmt.Println(conf.Address())
// Note if non-localhost site resolves to loopback interface
if addr.IP.IsLoopback() && !isLocalhost(conf.Host) {
if group.BindAddr.IP.IsLoopback() && !isLocalhost(conf.Host) {
fmt.Printf("Notice: %s is only accessible on this machine (%s)\n",
conf.Host, addr.IP.String())
conf.Host, group.BindAddr.IP.String())
}
if !checkedFdLimit && !addr.IP.IsLoopback() && !isLocalhost(conf.Host) {
if !checkedFdLimit && !group.BindAddr.IP.IsLoopback() && !isLocalhost(conf.Host) {
checkFdlimit()
checkedFdLimit = true
}
......@@ -108,7 +136,16 @@ func main() {
}
}
// Wait for all listeners to stop
// TODO: Temporary; testing restart
if os.Getenv("CADDY_RESTART") != "true" {
go func() {
time.Sleep(5 * time.Second)
fmt.Println("restarting")
log.Println("RESTART ERR:", app.Restart([]byte{}))
}()
}
// Wait for all servers to be stopped
app.Wg.Wait()
}
......
package server
import (
"net"
"os"
"sync"
"syscall"
)
// newGracefulListener returns a gracefulListener that wraps l and
// uses wg (stored in the host server) to count connections.
func newGracefulListener(l ListenerFile, wg *sync.WaitGroup) *gracefulListener {
gl := &gracefulListener{ListenerFile: l, stop: make(chan error), httpWg: wg}
go func() {
<-gl.stop
gl.stopped = true
gl.stop <- gl.ListenerFile.Close()
}()
return gl
}
// gracefuListener is a net.Listener which can
// count the number of connections on it. Its
// methods mainly wrap net.Listener to be graceful.
type gracefulListener struct {
ListenerFile
stop chan error
stopped bool
httpWg *sync.WaitGroup // pointer to the host's wg used for counting connections
}
// Accept accepts a connection. This type wraps
func (gl *gracefulListener) Accept() (c net.Conn, err error) {
c, err = gl.ListenerFile.Accept()
if err != nil {
return
}
c = gracefulConn{Conn: c, httpWg: gl.httpWg}
gl.httpWg.Add(1)
return
}
// Close immediately closes the listener.
func (gl *gracefulListener) Close() error {
if gl.stopped {
return syscall.EINVAL
}
gl.stop <- nil
return <-gl.stop
}
// File implements ListenerFile; it gets the file of the listening socket.
func (gl *gracefulListener) File() (*os.File, error) {
return gl.ListenerFile.File()
}
// gracefulConn represents a connection on a
// gracefulListener so that we can keep track
// of the number of connections, thus facilitating
// a graceful shutdown.
type gracefulConn struct {
net.Conn
httpWg *sync.WaitGroup // pointer to the host server's connection waitgroup
}
// Close closes c's underlying connection while updating the wg count.
func (c gracefulConn) Close() error {
c.httpWg.Done()
return c.Conn.Close()
}
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment