Commit 76872d95 authored by Vincent Pelletier's avatar Vincent Pelletier

cli.updater: Retry on network error.

Maybe name resolution is flapping, or server is unreachable or
misbehaving... This must not cause caucase-update to exit.
parent bd783f0f
......@@ -24,8 +24,10 @@ import datetime
import httplib
import json
import os
import socket
import struct
import sys
import traceback
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from . import utils
......@@ -36,6 +38,35 @@ from .client import (
HTTPSOnlyCaucaseClient,
)
class RetryingCaucaseClient(CaucaseClient):
_until = utils.until
def _request(self, connection, method, url, body=None, headers=None):
while True:
try:
return super(RetryingCaucaseClient, self)._request(
connection=connection,
method=method,
url=url,
body=body,
headers=headers,
)
except (
socket.error,
# Note: all exceptions below inherit from httplib.HTTPException,
# but so do errors which are either sign of code bugs
# (ImproperConnectionState) or sign of garbage values provided by
# caller/user, and these should be let through.
httplib.BadStatusLine,
httplib.LineTooLong,
httplib.UnknownProtocol,
httplib.IncompleteRead,
):
connection.close() # Resets HTTPConnection state machine.
print 'Got a network error, retrying in a bit...'
traceback.print_exc()
self._until(datetime.datetime.now() + datetime.timedelta(0, 10))
_cryptography_backend = default_backend()
STATUS_ERROR = 1
......@@ -693,11 +724,11 @@ def updater(argv=None, until=utils.until):
}[args.mode]
threshold = datetime.timedelta(args.threshold, 0)
max_sleep = datetime.timedelta(args.max_sleep, 0)
updated = CaucaseClient.updateCAFile(
updated = RetryingCaucaseClient.updateCAFile(
cas_url,
args.cas_ca,
) and args.cas_ca == args.ca
client = CaucaseClient(
client = RetryingCaucaseClient(
ca_url=ca_url,
ca_crt_pem_list=utils.getCertList(args.cas_ca)
)
......@@ -733,15 +764,15 @@ def updater(argv=None, until=utils.until):
)
now = until(next_deadline)
next_deadline = now + max_sleep
if args.cas_ca != args.ca and CaucaseClient.updateCAFile(
if args.cas_ca != args.ca and RetryingCaucaseClient.updateCAFile(
cas_url,
args.cas_ca,
):
client = CaucaseClient(
client = RetryingCaucaseClient(
ca_url=ca_url,
ca_crt_pem_list=utils.getCertList(args.cas_ca)
)
if CaucaseClient.updateCAFile(ca_url, args.ca):
if RetryingCaucaseClient.updateCAFile(ca_url, args.ca):
print 'Got new CA'
updated = True
# Note: CRL expiration should happen several time during CA renewal
......@@ -751,7 +782,7 @@ def updater(argv=None, until=utils.until):
utils.load_ca_certificate(x)
for x in utils.getCertList(args.ca)
]
if CaucaseClient.updateCRLFile(ca_url, args.crl, ca_crt_list):
if RetryingCaucaseClient.updateCRLFile(ca_url, args.crl, ca_crt_list):
print 'Got new CRL'
updated = True
next_deadline = min(
......
......@@ -25,6 +25,7 @@ from cStringIO import StringIO
import datetime
import errno
import glob
import httplib
import ipaddress
import os
import random
......@@ -1934,6 +1935,43 @@ class CaucaseTest(unittest.TestCase):
self.assertFalse(os.path.exists(re_crt_path))
updater_event = threading.Event()
until_updater = UntilEvent(updater_event)
network_issue_event = threading.Event()
until_network_issue = UntilEvent(network_issue_event)
# pylint: disable=protected-access
cli.RetryingCaucaseClient._until = until_network_issue
until_network_issue.action = ON_EVENT_EXPIRE
# pylint: enable=protected-access
original_HTTPConnection = cli.RetryingCaucaseClient.HTTPConnection
class ErrorInjector(object):
"""
Callable instances which set a flag when called and raise provided
exception.
"""
reached = False
def __init__(self, exception):
self.exception = exception
def __call__(self, *_, **__):
self.reached = True
raise self.exception
class HTTPConnectionNetworkFaultInjector(original_HTTPConnection):
"""
Place holder for error injectors (to not poison the real HTTPConnection).
"""
pass
def reraise(*_, **__): # pragma: no cover
"""
Just reraise latest error.
For replacing RetryingCaucaseClient._until only.
"""
# pylint: disable=misplaced-bare-raise
raise
# pylint: enable=misplaced-bare-raise
cli.RetryingCaucaseClient.HTTPConnection = HTTPConnectionNetworkFaultInjector
# Prime the error pump.
HTTPConnectionNetworkFaultInjector.connect = injector = ErrorInjector(
socket.gaierror(-3, 'Temporary failure in name resolution'),
)
updater_thread = threading.Thread(
target=cli.updater,
kwargs={
......@@ -1952,6 +1990,40 @@ class CaucaseTest(unittest.TestCase):
updater_thread.daemon = True
updater_thread.start()
try:
# XXX: this error injection technique only tests the first query. Assume
# the code handle network errors consistently everywhere
# (IOW, RetryingCaucaseClient is used everywhere).
for index, (HTTPConnection_method_id, injector) in enumerate((
('connect', injector), # Primed above
('connect', ErrorInjector(socket.error(111, 'Connection refused'))),
('getresponse', ErrorInjector(httplib.BadStatusLine('blah'))),
('getresponse', ErrorInjector(httplib.LineTooLong('header line'))),
('getresponse', ErrorInjector(httplib.UnknownProtocol('HTTP/pigeon'))),
('getresponse', ErrorInjector(httplib.IncompleteRead('you are my '))),
)):
if index:
# Set next fault
setattr(
HTTPConnectionNetworkFaultInjector,
HTTPConnection_method_id,
injector,
)
# Let code reach it
network_issue_event.set()
# Wait for the code to reach the fault.
# XXX: how to fast-fail if thread dies ?
until_network_issue.wait()
# Check injector really got triggered.
self.assertTrue(injector.reached)
# Cleanup and prepare next error
delattr(HTTPConnectionNetworkFaultInjector, HTTPConnection_method_id)
cli.RetryingCaucaseClient.HTTPConnection = original_HTTPConnection
# pylint: disable=protected-access
cli.RetryingCaucaseClient._until = reraise
# pylint: enable=protected-access
# Let code carry on.
network_issue_event.set()
until_updater.wait()
# CSR must have been submitted
csr_line_list = self._runClient(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment