Commit 08028714 authored by Matthew Holt's avatar Matthew Holt

tls: Synchronize renewals between Caddy instances sharing file storage

Also introduce caddy.OnProcessExit which is a list of functions that
run before exiting the process cleanly; these do not count as shutdown
callbacks, so they do not return errors and must execute quickly.
parent fc2ff915
...@@ -77,6 +77,14 @@ var ( ...@@ -77,6 +77,14 @@ var (
mu sync.Mutex mu sync.Mutex
) )
func init() {
OnProcessExit = append(OnProcessExit, func() {
if PidFile != "" {
os.Remove(PidFile)
}
})
}
// Instance contains the state of servers created as a result of // Instance contains the state of servers created as a result of
// calling Start and can be used to access or control those servers. // calling Start and can be used to access or control those servers.
// It is literally an instance of a server type. Instance values // It is literally an instance of a server type. Instance values
......
...@@ -29,34 +29,6 @@ import ( ...@@ -29,34 +29,6 @@ import (
"golang.org/x/crypto/ocsp" "golang.org/x/crypto/ocsp"
) )
// Certificate is a tls.Certificate with associated metadata tacked on.
// Even if the metadata can be obtained by parsing the certificate,
// we are more efficient by extracting the metadata onto this struct.
type Certificate struct {
tls.Certificate
// Names is the list of names this certificate is written for.
// The first is the CommonName (if any), the rest are SAN.
Names []string
// NotAfter is when the certificate expires.
NotAfter time.Time
// OCSP contains the certificate's parsed OCSP response.
OCSP *ocsp.Response
// The hex-encoded hash of this cert's chain's bytes.
Hash string
// configs is the list of configs that use or refer to
// The first one is assumed to be the config that is
// "in charge" of this certificate (i.e. determines
// whether it is managed, how it is managed, etc).
// This field will be populated by cacheCertificate.
// Only meddle with it if you know what you're doing!
configs []*Config
}
// certificateCache is to be an instance-wide cache of certs // certificateCache is to be an instance-wide cache of certs
// that site-specific TLS configs can refer to. Using a // that site-specific TLS configs can refer to. Using a
// central map like this avoids duplication of certs in // central map like this avoids duplication of certs in
...@@ -127,6 +99,54 @@ func (certCache *certificateCache) replaceCertificate(oldCert, newCert Certifica ...@@ -127,6 +99,54 @@ func (certCache *certificateCache) replaceCertificate(oldCert, newCert Certifica
return nil return nil
} }
// reloadManagedCertificate reloads the certificate corresponding to the name(s)
// on oldCert into the cache, from storage. This also replaces the old certificate
// with the new one, so that all configurations that used the old cert now point
// to the new cert.
func (certCache *certificateCache) reloadManagedCertificate(oldCert Certificate) error {
// get the certificate from storage and cache it
newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0])
if err != nil {
return fmt.Errorf("unable to reload certificate for %v into cache: %v", oldCert.Names, err)
}
// and replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
return fmt.Errorf("replacing certificate %v: %v", oldCert.Names, err)
}
return nil
}
// Certificate is a tls.Certificate with associated metadata tacked on.
// Even if the metadata can be obtained by parsing the certificate,
// we are more efficient by extracting the metadata onto this struct.
type Certificate struct {
tls.Certificate
// Names is the list of names this certificate is written for.
// The first is the CommonName (if any), the rest are SAN.
Names []string
// NotAfter is when the certificate expires.
NotAfter time.Time
// OCSP contains the certificate's parsed OCSP response.
OCSP *ocsp.Response
// The hex-encoded hash of this cert's chain's bytes.
Hash string
// configs is the list of configs that use or refer to
// The first one is assumed to be the config that is
// "in charge" of this certificate (i.e. determines
// whether it is managed, how it is managed, etc).
// This field will be populated by cacheCertificate.
// Only meddle with it if you know what you're doing!
configs []*Config
}
// CacheManagedCertificate loads the certificate for domain into the // CacheManagedCertificate loads the certificate for domain into the
// cache, from the TLS storage for managed certificates. It returns a // cache, from the TLS storage for managed certificates. It returns a
// copy of the Certificate that was put into the cache. // copy of the Certificate that was put into the cache.
......
...@@ -39,7 +39,7 @@ type ACMEClient struct { ...@@ -39,7 +39,7 @@ type ACMEClient struct {
AllowPrompts bool AllowPrompts bool
config *Config config *Config
acmeClient *acme.Client acmeClient *acme.Client
locker Locker storage Storage
} }
// newACMEClient creates a new ACMEClient given an email and whether // newACMEClient creates a new ACMEClient given an email and whether
...@@ -121,10 +121,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error) ...@@ -121,10 +121,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error)
AllowPrompts: allowPrompts, AllowPrompts: allowPrompts,
config: config, config: config,
acmeClient: client, acmeClient: client,
locker: &syncLock{ storage: storage,
nameLocks: make(map[string]*sync.WaitGroup),
nameLocksMu: sync.Mutex{},
},
} }
if config.DNSProvider == "" { if config.DNSProvider == "" {
...@@ -209,13 +206,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error) ...@@ -209,13 +206,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error)
// Callers who have access to a Config value should use the ObtainCert // Callers who have access to a Config value should use the ObtainCert
// method on that instead of this lower-level method. // method on that instead of this lower-level method.
func (c *ACMEClient) Obtain(name string) error { func (c *ACMEClient) Obtain(name string) error {
// Get access to ACME storage waiter, err := c.storage.TryLock(name)
storage, err := c.config.StorageFor(c.config.CAUrl)
if err != nil {
return err
}
waiter, err := c.locker.TryLock(name)
if err != nil { if err != nil {
return err return err
} }
...@@ -225,7 +216,7 @@ func (c *ACMEClient) Obtain(name string) error { ...@@ -225,7 +216,7 @@ func (c *ACMEClient) Obtain(name string) error {
return nil // we assume the process with the lock succeeded, rather than hammering this execution path again return nil // we assume the process with the lock succeeded, rather than hammering this execution path again
} }
defer func() { defer func() {
if err := c.locker.Unlock(name); err != nil { if err := c.storage.Unlock(name); err != nil {
log.Printf("[ERROR] Unable to unlock obtain call for %s: %v", name, err) log.Printf("[ERROR] Unable to unlock obtain call for %s: %v", name, err)
} }
}() }()
...@@ -268,7 +259,7 @@ Attempts: ...@@ -268,7 +259,7 @@ Attempts:
} }
// Success - immediately save the certificate resource // Success - immediately save the certificate resource
err = saveCertResource(storage, certificate) err = saveCertResource(c.storage, certificate)
if err != nil { if err != nil {
return fmt.Errorf("error saving assets for %v: %v", name, err) return fmt.Errorf("error saving assets for %v: %v", name, err)
} }
...@@ -279,35 +270,30 @@ Attempts: ...@@ -279,35 +270,30 @@ Attempts:
return nil return nil
} }
// Renew renews the managed certificate for name. This function is // Renew renews the managed certificate for name. It puts the renewed
// safe for concurrent use. // certificate into storage (not the cache). This function is safe for
// concurrent use.
// //
// Callers who have access to a Config value should use the RenewCert // Callers who have access to a Config value should use the RenewCert
// method on that instead of this lower-level method. // method on that instead of this lower-level method.
func (c *ACMEClient) Renew(name string) error { func (c *ACMEClient) Renew(name string) error {
// Get access to ACME storage waiter, err := c.storage.TryLock(name)
storage, err := c.config.StorageFor(c.config.CAUrl)
if err != nil {
return err
}
waiter, err := c.locker.TryLock(name)
if err != nil { if err != nil {
return err return err
} }
if waiter != nil { if waiter != nil {
log.Printf("[INFO] Certificate for %s is already being renewed elsewhere and stored; waiting", name) log.Printf("[INFO] Certificate for %s is already being renewed elsewhere and stored; waiting", name)
waiter.Wait() waiter.Wait()
return nil // we assume the process with the lock succeeded, rather than hammering this execution path again return nil // assume that the worker that renewed the cert succeeded; avoid hammering this path over and over
} }
defer func() { defer func() {
if err := c.locker.Unlock(name); err != nil { if err := c.storage.Unlock(name); err != nil {
log.Printf("[ERROR] Unable to unlock renew call for %s: %v", name, err) log.Printf("[ERROR] Unable to unlock renew call for %s: %v", name, err)
} }
}() }()
// Prepare for renewal (load PEM cert, key, and meta) // Prepare for renewal (load PEM cert, key, and meta)
siteData, err := storage.LoadSite(name) siteData, err := c.storage.LoadSite(name)
if err != nil { if err != nil {
return err return err
} }
...@@ -350,21 +336,15 @@ func (c *ACMEClient) Renew(name string) error { ...@@ -350,21 +336,15 @@ func (c *ACMEClient) Renew(name string) error {
return errors.New("too many renewal attempts; last error: " + err.Error()) return errors.New("too many renewal attempts; last error: " + err.Error())
} }
// Executes Cert renew events
caddy.EmitEvent(caddy.CertRenewEvent, name) caddy.EmitEvent(caddy.CertRenewEvent, name)
return saveCertResource(storage, newCertMeta) return saveCertResource(c.storage, newCertMeta)
} }
// Revoke revokes the certificate for name and deltes // Revoke revokes the certificate for name and deletes
// it from storage. // it from storage.
func (c *ACMEClient) Revoke(name string) error { func (c *ACMEClient) Revoke(name string) error {
storage, err := c.config.StorageFor(c.config.CAUrl) siteExists, err := c.storage.SiteExists(name)
if err != nil {
return err
}
siteExists, err := storage.SiteExists(name)
if err != nil { if err != nil {
return err return err
} }
...@@ -373,7 +353,7 @@ func (c *ACMEClient) Revoke(name string) error { ...@@ -373,7 +353,7 @@ func (c *ACMEClient) Revoke(name string) error {
return errors.New("no certificate and key for " + name) return errors.New("no certificate and key for " + name)
} }
siteData, err := storage.LoadSite(name) siteData, err := c.storage.LoadSite(name)
if err != nil { if err != nil {
return err return err
} }
...@@ -383,7 +363,7 @@ func (c *ACMEClient) Revoke(name string) error { ...@@ -383,7 +363,7 @@ func (c *ACMEClient) Revoke(name string) error {
return err return err
} }
err = storage.DeleteSite(name) err = c.storage.DeleteSite(name)
if err != nil { if err != nil {
return errors.New("certificate revoked, but unable to delete certificate file: " + err.Error()) return errors.New("certificate revoked, but unable to delete certificate file: " + err.Error())
} }
......
...@@ -38,9 +38,9 @@ var storageBasePath = filepath.Join(caddy.AssetsPath(), "acme") ...@@ -38,9 +38,9 @@ var storageBasePath = filepath.Join(caddy.AssetsPath(), "acme")
// Storage instance backed by the local disk. The resulting Storage // Storage instance backed by the local disk. The resulting Storage
// instance is guaranteed to be non-nil if there is no error. // instance is guaranteed to be non-nil if there is no error.
func NewFileStorage(caURL *url.URL) (Storage, error) { func NewFileStorage(caURL *url.URL) (Storage, error) {
return &FileStorage{ storage := &FileStorage{Path: filepath.Join(storageBasePath, caURL.Host)}
Path: filepath.Join(storageBasePath, caURL.Host), storage.Locker = &fileStorageLock{caURL: caURL.Host, storage: storage}
}, nil return storage, nil
} }
// FileStorage facilitates forming file paths derived from a root // FileStorage facilitates forming file paths derived from a root
...@@ -48,6 +48,7 @@ func NewFileStorage(caURL *url.URL) (Storage, error) { ...@@ -48,6 +48,7 @@ func NewFileStorage(caURL *url.URL) (Storage, error) {
// cross-platform way or persisting ACME assets on the file system. // cross-platform way or persisting ACME assets on the file system.
type FileStorage struct { type FileStorage struct {
Path string Path string
Locker
} }
// sites gets the directory that stores site certificate and keys. // sites gets the directory that stores site certificate and keys.
......
// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package caddytls
import (
"fmt"
"os"
"sync"
"time"
"github.com/mholt/caddy"
)
func init() {
// be sure to remove lock files when exiting the process!
caddy.OnProcessExit = append(caddy.OnProcessExit, func() {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
for key, fw := range fileStorageNameLocks {
os.Remove(fw.filename)
delete(fileStorageNameLocks, key)
}
})
}
// fileStorageLock facilitates ACME-related locking by using
// the associated FileStorage, so multiple processes can coordinate
// renewals on the certificates on a shared file system.
type fileStorageLock struct {
caURL string
storage *FileStorage
}
// TryLock attempts to get a lock for name, otherwise it returns
// a Waiter value to wait until the other process is finished.
func (s *fileStorageLock) TryLock(name string) (Waiter, error) {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
// see if lock already exists within this process
fw, ok := fileStorageNameLocks[s.caURL+name]
if ok {
// lock already created within process, let caller wait on it
return fw, nil
}
// attempt to persist lock to disk by creating lock file
fw = &fileWaiter{
filename: s.storage.siteCertFile(name) + ".lock",
wg: new(sync.WaitGroup),
}
lf, err := os.OpenFile(fw.filename, os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
if os.IsExist(err) {
// another process has the lock; use it to wait
return fw, nil
}
// otherwise, this was some unexpected error
return nil, err
}
lf.Close()
// looks like we get the lock
fw.wg.Add(1)
fileStorageNameLocks[s.caURL+name] = fw
return nil, nil
}
// Unlock unlocks name.
func (s *fileStorageLock) Unlock(name string) error {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
fw, ok := fileStorageNameLocks[s.caURL+name]
if !ok {
return fmt.Errorf("FileStorage: no lock to release for %s", name)
}
os.Remove(fw.filename)
fw.wg.Done()
delete(fileStorageNameLocks, s.caURL+name)
return nil
}
// fileWaiter waits for a file to disappear; it polls
// the file system to check for the existence of a file.
// It also has a WaitGroup which will be faster than
// polling, for when locking need only happen within this
// process.
type fileWaiter struct {
filename string
wg *sync.WaitGroup
}
// Wait waits until the lock is released.
func (fw *fileWaiter) Wait() {
start := time.Now()
fw.wg.Wait()
for time.Since(start) < 1*time.Hour {
_, err := os.Stat(fw.filename)
if os.IsNotExist(err) {
return
}
time.Sleep(1 * time.Second)
}
}
var fileStorageNameLocks = make(map[string]*fileWaiter) // keyed by CA + name
var fileStorageNameLocksMu sync.Mutex
var _ Locker = &fileStorageLock{}
var _ Waiter = &fileWaiter{}
...@@ -160,17 +160,12 @@ func RenewManagedCertificates(allowPrompts bool) (err error) { ...@@ -160,17 +160,12 @@ func RenewManagedCertificates(allowPrompts bool) (err error) {
log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate", log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate",
oldCert.Names, timeLeft) oldCert.Names, timeLeft)
// get the certificate from storage and cache it err = certCache.reloadManagedCertificate(oldCert)
newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0])
if err != nil { if err != nil {
log.Printf("[ERROR] Unable to reload certificate for %v into cache: %v", oldCert.Names, err) if allowPrompts {
continue return err // operator is present, so report error immediately
} }
log.Printf("[ERROR] Loading renewed certificate: %v", err)
// and replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
log.Printf("[ERROR] Replacing certificate: %v", err)
} }
} }
...@@ -212,21 +207,13 @@ func RenewManagedCertificates(allowPrompts bool) (err error) { ...@@ -212,21 +207,13 @@ func RenewManagedCertificates(allowPrompts bool) (err error) {
// successful renewal, so update in-memory cache by loading // successful renewal, so update in-memory cache by loading
// renewed certificate so it will be used with handshakes // renewed certificate so it will be used with handshakes
err = certCache.reloadManagedCertificate(oldCert)
// put the certificate in the cache
newCert, err := oldCert.configs[0].CacheManagedCertificate(renewName)
if err != nil { if err != nil {
if allowPrompts { if allowPrompts {
return err // operator is present, so report error immediately return err // operator is present, so report error immediately
} }
log.Printf("[ERROR] %v", err) log.Printf("[ERROR] %v", err)
} }
// replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
log.Printf("[ERROR] Replacing certificate: %v", err)
}
} }
// Deletion queue // Deletion queue
......
...@@ -107,6 +107,10 @@ type Storage interface { ...@@ -107,6 +107,10 @@ type Storage interface {
// in StoreUser. The result is an empty string if there are no // in StoreUser. The result is an empty string if there are no
// persisted users in storage. // persisted users in storage.
MostRecentUserEmail() string MostRecentUserEmail() string
// Locker is necessary because synchronizing certificate maintenance
// depends on how storage is implemented.
Locker
} }
// ErrNotExist is returned by Storage implementations when // ErrNotExist is returned by Storage implementations when
......
// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package caddytls
import (
"fmt"
"sync"
)
var _ Locker = &syncLock{}
type syncLock struct {
nameLocks map[string]*sync.WaitGroup
nameLocksMu sync.Mutex
}
// TryLock attempts to get a lock for name, otherwise it returns
// a Waiter value to wait until the other process is finished.
func (s *syncLock) TryLock(name string) (Waiter, error) {
s.nameLocksMu.Lock()
defer s.nameLocksMu.Unlock()
wg, ok := s.nameLocks[name]
if ok {
// lock already obtained, let caller wait on it
return wg, nil
}
// caller gets lock
wg = new(sync.WaitGroup)
wg.Add(1)
s.nameLocks[name] = wg
return nil, nil
}
// Unlock unlocks name.
func (s *syncLock) Unlock(name string) error {
s.nameLocksMu.Lock()
defer s.nameLocksMu.Unlock()
wg, ok := s.nameLocks[name]
if !ok {
return fmt.Errorf("FileStorage: no lock to release for %s", name)
}
wg.Done()
delete(s.nameLocks, name)
return nil
}
...@@ -383,6 +383,14 @@ func loadCaddyfileInput(serverType string) (Input, error) { ...@@ -383,6 +383,14 @@ func loadCaddyfileInput(serverType string) (Input, error) {
return caddyfileToUse, nil return caddyfileToUse, nil
} }
// OnProcessExit is a list of functions to run when the process
// exits -- they are ONLY for cleanup and should not block,
// return errors, or do anything fancy. They will be run with
// every signal, even if "shutdown callbacks" are not executed.
// This variable must only be modified in the main goroutine
// from init() functions.
var OnProcessExit []func()
// caddyfileLoader pairs the name of a loader to the loader. // caddyfileLoader pairs the name of a loader to the loader.
type caddyfileLoader struct { type caddyfileLoader struct {
name string name string
......
...@@ -44,16 +44,17 @@ func trapSignalsCrossPlatform() { ...@@ -44,16 +44,17 @@ func trapSignalsCrossPlatform() {
if i > 0 { if i > 0 {
log.Println("[INFO] SIGINT: Force quit") log.Println("[INFO] SIGINT: Force quit")
if PidFile != "" { for _, f := range OnProcessExit {
os.Remove(PidFile) f() // important cleanup actions only
} }
os.Exit(2) os.Exit(2)
} }
log.Println("[INFO] SIGINT: Shutting down") log.Println("[INFO] SIGINT: Shutting down")
if PidFile != "" { // important cleanup actions before shutdown callbacks
os.Remove(PidFile) for _, f := range OnProcessExit {
f()
} }
go func() { go func() {
......
...@@ -33,8 +33,8 @@ func trapSignalsPosix() { ...@@ -33,8 +33,8 @@ func trapSignalsPosix() {
switch sig { switch sig {
case syscall.SIGTERM: case syscall.SIGTERM:
log.Println("[INFO] SIGTERM: Terminating process") log.Println("[INFO] SIGTERM: Terminating process")
if PidFile != "" { for _, f := range OnProcessExit {
os.Remove(PidFile) f() // only perform important cleanup actions
} }
os.Exit(0) os.Exit(0)
...@@ -46,8 +46,8 @@ func trapSignalsPosix() { ...@@ -46,8 +46,8 @@ func trapSignalsPosix() {
log.Printf("[ERROR] SIGQUIT stop: %v", err) log.Printf("[ERROR] SIGQUIT stop: %v", err)
exitCode = 3 exitCode = 3
} }
if PidFile != "" { for _, f := range OnProcessExit {
os.Remove(PidFile) f() // only perform important cleanup actions
} }
os.Exit(exitCode) os.Exit(exitCode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment