Commit 08028714 authored by Matthew Holt's avatar Matthew Holt

tls: Synchronize renewals between Caddy instances sharing file storage

Also introduce caddy.OnProcessExit which is a list of functions that
run before exiting the process cleanly; these do not count as shutdown
callbacks, so they do not return errors and must execute quickly.
parent fc2ff915
......@@ -77,6 +77,14 @@ var (
mu sync.Mutex
)
func init() {
OnProcessExit = append(OnProcessExit, func() {
if PidFile != "" {
os.Remove(PidFile)
}
})
}
// Instance contains the state of servers created as a result of
// calling Start and can be used to access or control those servers.
// It is literally an instance of a server type. Instance values
......
......@@ -29,34 +29,6 @@ import (
"golang.org/x/crypto/ocsp"
)
// Certificate is a tls.Certificate with associated metadata tacked on.
// Even if the metadata can be obtained by parsing the certificate,
// we are more efficient by extracting the metadata onto this struct.
type Certificate struct {
tls.Certificate
// Names is the list of names this certificate is written for.
// The first is the CommonName (if any), the rest are SAN.
Names []string
// NotAfter is when the certificate expires.
NotAfter time.Time
// OCSP contains the certificate's parsed OCSP response.
OCSP *ocsp.Response
// The hex-encoded hash of this cert's chain's bytes.
Hash string
// configs is the list of configs that use or refer to
// The first one is assumed to be the config that is
// "in charge" of this certificate (i.e. determines
// whether it is managed, how it is managed, etc).
// This field will be populated by cacheCertificate.
// Only meddle with it if you know what you're doing!
configs []*Config
}
// certificateCache is to be an instance-wide cache of certs
// that site-specific TLS configs can refer to. Using a
// central map like this avoids duplication of certs in
......@@ -127,6 +99,54 @@ func (certCache *certificateCache) replaceCertificate(oldCert, newCert Certifica
return nil
}
// reloadManagedCertificate reloads the certificate corresponding to the name(s)
// on oldCert into the cache, from storage. This also replaces the old certificate
// with the new one, so that all configurations that used the old cert now point
// to the new cert.
func (certCache *certificateCache) reloadManagedCertificate(oldCert Certificate) error {
// get the certificate from storage and cache it
newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0])
if err != nil {
return fmt.Errorf("unable to reload certificate for %v into cache: %v", oldCert.Names, err)
}
// and replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
return fmt.Errorf("replacing certificate %v: %v", oldCert.Names, err)
}
return nil
}
// Certificate is a tls.Certificate with associated metadata tacked on.
// Even if the metadata can be obtained by parsing the certificate,
// we are more efficient by extracting the metadata onto this struct.
type Certificate struct {
tls.Certificate
// Names is the list of names this certificate is written for.
// The first is the CommonName (if any), the rest are SAN.
Names []string
// NotAfter is when the certificate expires.
NotAfter time.Time
// OCSP contains the certificate's parsed OCSP response.
OCSP *ocsp.Response
// The hex-encoded hash of this cert's chain's bytes.
Hash string
// configs is the list of configs that use or refer to
// The first one is assumed to be the config that is
// "in charge" of this certificate (i.e. determines
// whether it is managed, how it is managed, etc).
// This field will be populated by cacheCertificate.
// Only meddle with it if you know what you're doing!
configs []*Config
}
// CacheManagedCertificate loads the certificate for domain into the
// cache, from the TLS storage for managed certificates. It returns a
// copy of the Certificate that was put into the cache.
......
......@@ -39,7 +39,7 @@ type ACMEClient struct {
AllowPrompts bool
config *Config
acmeClient *acme.Client
locker Locker
storage Storage
}
// newACMEClient creates a new ACMEClient given an email and whether
......@@ -121,10 +121,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error)
AllowPrompts: allowPrompts,
config: config,
acmeClient: client,
locker: &syncLock{
nameLocks: make(map[string]*sync.WaitGroup),
nameLocksMu: sync.Mutex{},
},
storage: storage,
}
if config.DNSProvider == "" {
......@@ -209,13 +206,7 @@ var newACMEClient = func(config *Config, allowPrompts bool) (*ACMEClient, error)
// Callers who have access to a Config value should use the ObtainCert
// method on that instead of this lower-level method.
func (c *ACMEClient) Obtain(name string) error {
// Get access to ACME storage
storage, err := c.config.StorageFor(c.config.CAUrl)
if err != nil {
return err
}
waiter, err := c.locker.TryLock(name)
waiter, err := c.storage.TryLock(name)
if err != nil {
return err
}
......@@ -225,7 +216,7 @@ func (c *ACMEClient) Obtain(name string) error {
return nil // we assume the process with the lock succeeded, rather than hammering this execution path again
}
defer func() {
if err := c.locker.Unlock(name); err != nil {
if err := c.storage.Unlock(name); err != nil {
log.Printf("[ERROR] Unable to unlock obtain call for %s: %v", name, err)
}
}()
......@@ -268,7 +259,7 @@ Attempts:
}
// Success - immediately save the certificate resource
err = saveCertResource(storage, certificate)
err = saveCertResource(c.storage, certificate)
if err != nil {
return fmt.Errorf("error saving assets for %v: %v", name, err)
}
......@@ -279,35 +270,30 @@ Attempts:
return nil
}
// Renew renews the managed certificate for name. This function is
// safe for concurrent use.
// Renew renews the managed certificate for name. It puts the renewed
// certificate into storage (not the cache). This function is safe for
// concurrent use.
//
// Callers who have access to a Config value should use the RenewCert
// method on that instead of this lower-level method.
func (c *ACMEClient) Renew(name string) error {
// Get access to ACME storage
storage, err := c.config.StorageFor(c.config.CAUrl)
if err != nil {
return err
}
waiter, err := c.locker.TryLock(name)
waiter, err := c.storage.TryLock(name)
if err != nil {
return err
}
if waiter != nil {
log.Printf("[INFO] Certificate for %s is already being renewed elsewhere and stored; waiting", name)
waiter.Wait()
return nil // we assume the process with the lock succeeded, rather than hammering this execution path again
return nil // assume that the worker that renewed the cert succeeded; avoid hammering this path over and over
}
defer func() {
if err := c.locker.Unlock(name); err != nil {
if err := c.storage.Unlock(name); err != nil {
log.Printf("[ERROR] Unable to unlock renew call for %s: %v", name, err)
}
}()
// Prepare for renewal (load PEM cert, key, and meta)
siteData, err := storage.LoadSite(name)
siteData, err := c.storage.LoadSite(name)
if err != nil {
return err
}
......@@ -350,21 +336,15 @@ func (c *ACMEClient) Renew(name string) error {
return errors.New("too many renewal attempts; last error: " + err.Error())
}
// Executes Cert renew events
caddy.EmitEvent(caddy.CertRenewEvent, name)
return saveCertResource(storage, newCertMeta)
return saveCertResource(c.storage, newCertMeta)
}
// Revoke revokes the certificate for name and deltes
// Revoke revokes the certificate for name and deletes
// it from storage.
func (c *ACMEClient) Revoke(name string) error {
storage, err := c.config.StorageFor(c.config.CAUrl)
if err != nil {
return err
}
siteExists, err := storage.SiteExists(name)
siteExists, err := c.storage.SiteExists(name)
if err != nil {
return err
}
......@@ -373,7 +353,7 @@ func (c *ACMEClient) Revoke(name string) error {
return errors.New("no certificate and key for " + name)
}
siteData, err := storage.LoadSite(name)
siteData, err := c.storage.LoadSite(name)
if err != nil {
return err
}
......@@ -383,7 +363,7 @@ func (c *ACMEClient) Revoke(name string) error {
return err
}
err = storage.DeleteSite(name)
err = c.storage.DeleteSite(name)
if err != nil {
return errors.New("certificate revoked, but unable to delete certificate file: " + err.Error())
}
......
......@@ -38,9 +38,9 @@ var storageBasePath = filepath.Join(caddy.AssetsPath(), "acme")
// Storage instance backed by the local disk. The resulting Storage
// instance is guaranteed to be non-nil if there is no error.
func NewFileStorage(caURL *url.URL) (Storage, error) {
return &FileStorage{
Path: filepath.Join(storageBasePath, caURL.Host),
}, nil
storage := &FileStorage{Path: filepath.Join(storageBasePath, caURL.Host)}
storage.Locker = &fileStorageLock{caURL: caURL.Host, storage: storage}
return storage, nil
}
// FileStorage facilitates forming file paths derived from a root
......@@ -48,6 +48,7 @@ func NewFileStorage(caURL *url.URL) (Storage, error) {
// cross-platform way or persisting ACME assets on the file system.
type FileStorage struct {
Path string
Locker
}
// sites gets the directory that stores site certificate and keys.
......
// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package caddytls
import (
"fmt"
"os"
"sync"
"time"
"github.com/mholt/caddy"
)
func init() {
// be sure to remove lock files when exiting the process!
caddy.OnProcessExit = append(caddy.OnProcessExit, func() {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
for key, fw := range fileStorageNameLocks {
os.Remove(fw.filename)
delete(fileStorageNameLocks, key)
}
})
}
// fileStorageLock facilitates ACME-related locking by using
// the associated FileStorage, so multiple processes can coordinate
// renewals on the certificates on a shared file system.
type fileStorageLock struct {
caURL string
storage *FileStorage
}
// TryLock attempts to get a lock for name, otherwise it returns
// a Waiter value to wait until the other process is finished.
func (s *fileStorageLock) TryLock(name string) (Waiter, error) {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
// see if lock already exists within this process
fw, ok := fileStorageNameLocks[s.caURL+name]
if ok {
// lock already created within process, let caller wait on it
return fw, nil
}
// attempt to persist lock to disk by creating lock file
fw = &fileWaiter{
filename: s.storage.siteCertFile(name) + ".lock",
wg: new(sync.WaitGroup),
}
lf, err := os.OpenFile(fw.filename, os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
if os.IsExist(err) {
// another process has the lock; use it to wait
return fw, nil
}
// otherwise, this was some unexpected error
return nil, err
}
lf.Close()
// looks like we get the lock
fw.wg.Add(1)
fileStorageNameLocks[s.caURL+name] = fw
return nil, nil
}
// Unlock unlocks name.
func (s *fileStorageLock) Unlock(name string) error {
fileStorageNameLocksMu.Lock()
defer fileStorageNameLocksMu.Unlock()
fw, ok := fileStorageNameLocks[s.caURL+name]
if !ok {
return fmt.Errorf("FileStorage: no lock to release for %s", name)
}
os.Remove(fw.filename)
fw.wg.Done()
delete(fileStorageNameLocks, s.caURL+name)
return nil
}
// fileWaiter waits for a file to disappear; it polls
// the file system to check for the existence of a file.
// It also has a WaitGroup which will be faster than
// polling, for when locking need only happen within this
// process.
type fileWaiter struct {
filename string
wg *sync.WaitGroup
}
// Wait waits until the lock is released.
func (fw *fileWaiter) Wait() {
start := time.Now()
fw.wg.Wait()
for time.Since(start) < 1*time.Hour {
_, err := os.Stat(fw.filename)
if os.IsNotExist(err) {
return
}
time.Sleep(1 * time.Second)
}
}
var fileStorageNameLocks = make(map[string]*fileWaiter) // keyed by CA + name
var fileStorageNameLocksMu sync.Mutex
var _ Locker = &fileStorageLock{}
var _ Waiter = &fileWaiter{}
......@@ -160,17 +160,12 @@ func RenewManagedCertificates(allowPrompts bool) (err error) {
log.Printf("[INFO] Certificate for %v expires in %v, but is already renewed in storage; reloading stored certificate",
oldCert.Names, timeLeft)
// get the certificate from storage and cache it
newCert, err := oldCert.configs[0].CacheManagedCertificate(oldCert.Names[0])
err = certCache.reloadManagedCertificate(oldCert)
if err != nil {
log.Printf("[ERROR] Unable to reload certificate for %v into cache: %v", oldCert.Names, err)
continue
if allowPrompts {
return err // operator is present, so report error immediately
}
// and replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
log.Printf("[ERROR] Replacing certificate: %v", err)
log.Printf("[ERROR] Loading renewed certificate: %v", err)
}
}
......@@ -212,21 +207,13 @@ func RenewManagedCertificates(allowPrompts bool) (err error) {
// successful renewal, so update in-memory cache by loading
// renewed certificate so it will be used with handshakes
// put the certificate in the cache
newCert, err := oldCert.configs[0].CacheManagedCertificate(renewName)
err = certCache.reloadManagedCertificate(oldCert)
if err != nil {
if allowPrompts {
return err // operator is present, so report error immediately
}
log.Printf("[ERROR] %v", err)
}
// replace the old certificate with the new one
err = certCache.replaceCertificate(oldCert, newCert)
if err != nil {
log.Printf("[ERROR] Replacing certificate: %v", err)
}
}
// Deletion queue
......
......@@ -107,6 +107,10 @@ type Storage interface {
// in StoreUser. The result is an empty string if there are no
// persisted users in storage.
MostRecentUserEmail() string
// Locker is necessary because synchronizing certificate maintenance
// depends on how storage is implemented.
Locker
}
// ErrNotExist is returned by Storage implementations when
......
// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package caddytls
import (
"fmt"
"sync"
)
var _ Locker = &syncLock{}
type syncLock struct {
nameLocks map[string]*sync.WaitGroup
nameLocksMu sync.Mutex
}
// TryLock attempts to get a lock for name, otherwise it returns
// a Waiter value to wait until the other process is finished.
func (s *syncLock) TryLock(name string) (Waiter, error) {
s.nameLocksMu.Lock()
defer s.nameLocksMu.Unlock()
wg, ok := s.nameLocks[name]
if ok {
// lock already obtained, let caller wait on it
return wg, nil
}
// caller gets lock
wg = new(sync.WaitGroup)
wg.Add(1)
s.nameLocks[name] = wg
return nil, nil
}
// Unlock unlocks name.
func (s *syncLock) Unlock(name string) error {
s.nameLocksMu.Lock()
defer s.nameLocksMu.Unlock()
wg, ok := s.nameLocks[name]
if !ok {
return fmt.Errorf("FileStorage: no lock to release for %s", name)
}
wg.Done()
delete(s.nameLocks, name)
return nil
}
......@@ -383,6 +383,14 @@ func loadCaddyfileInput(serverType string) (Input, error) {
return caddyfileToUse, nil
}
// OnProcessExit is a list of functions to run when the process
// exits -- they are ONLY for cleanup and should not block,
// return errors, or do anything fancy. They will be run with
// every signal, even if "shutdown callbacks" are not executed.
// This variable must only be modified in the main goroutine
// from init() functions.
var OnProcessExit []func()
// caddyfileLoader pairs the name of a loader to the loader.
type caddyfileLoader struct {
name string
......
......@@ -44,16 +44,17 @@ func trapSignalsCrossPlatform() {
if i > 0 {
log.Println("[INFO] SIGINT: Force quit")
if PidFile != "" {
os.Remove(PidFile)
for _, f := range OnProcessExit {
f() // important cleanup actions only
}
os.Exit(2)
}
log.Println("[INFO] SIGINT: Shutting down")
if PidFile != "" {
os.Remove(PidFile)
// important cleanup actions before shutdown callbacks
for _, f := range OnProcessExit {
f()
}
go func() {
......
......@@ -33,8 +33,8 @@ func trapSignalsPosix() {
switch sig {
case syscall.SIGTERM:
log.Println("[INFO] SIGTERM: Terminating process")
if PidFile != "" {
os.Remove(PidFile)
for _, f := range OnProcessExit {
f() // only perform important cleanup actions
}
os.Exit(0)
......@@ -46,8 +46,8 @@ func trapSignalsPosix() {
log.Printf("[ERROR] SIGQUIT stop: %v", err)
exitCode = 3
}
if PidFile != "" {
os.Remove(PidFile)
for _, f := range OnProcessExit {
f() // only perform important cleanup actions
}
os.Exit(exitCode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment