Commit 418af5b3 authored by Ilya Dryomov's avatar Ilya Dryomov

libceph: lower exponential backoff delay

The current setting allows the backoff to climb up to 5 minutes.  This
is too high -- it becomes hard to tell whether the client is stuck on
something or just in backoff.

In userspace, ms_max_backoff is defaulted to 15 seconds.  Let's do the
same.
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent b77f8f0e
...@@ -241,8 +241,8 @@ struct ceph_msg { ...@@ -241,8 +241,8 @@ struct ceph_msg {
}; };
/* ceph connection fault delay defaults, for exponential backoff */ /* ceph connection fault delay defaults, for exponential backoff */
#define BASE_DELAY_INTERVAL (HZ/2) #define BASE_DELAY_INTERVAL (HZ / 4)
#define MAX_DELAY_INTERVAL (5 * 60 * HZ) #define MAX_DELAY_INTERVAL (15 * HZ)
/* /*
* A single connection with another host. * A single connection with another host.
......
...@@ -2812,6 +2812,9 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay) ...@@ -2812,6 +2812,9 @@ static int queue_con_delay(struct ceph_connection *con, unsigned long delay)
return -ENOENT; return -ENOENT;
} }
if (delay >= HZ)
delay = round_jiffies_relative(delay);
dout("%s %p %lu\n", __func__, con, delay); dout("%s %p %lu\n", __func__, con, delay);
if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) {
dout("%s %p - already queued\n", __func__, con); dout("%s %p - already queued\n", __func__, con);
...@@ -2871,7 +2874,7 @@ static bool con_backoff(struct ceph_connection *con) ...@@ -2871,7 +2874,7 @@ static bool con_backoff(struct ceph_connection *con)
if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
return false; return false;
ret = queue_con_delay(con, round_jiffies_relative(con->delay)); ret = queue_con_delay(con, con->delay);
if (ret) { if (ret) {
dout("%s: con %p FAILED to back off %lu\n", __func__, dout("%s: con %p FAILED to back off %lu\n", __func__,
con, con->delay); con, con->delay);
...@@ -3018,10 +3021,13 @@ static void con_fault(struct ceph_connection *con) ...@@ -3018,10 +3021,13 @@ static void con_fault(struct ceph_connection *con)
} else { } else {
/* retry after a delay. */ /* retry after a delay. */
con->state = CON_STATE_PREOPEN; con->state = CON_STATE_PREOPEN;
if (con->delay == 0) if (!con->delay) {
con->delay = BASE_DELAY_INTERVAL; con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL) } else if (con->delay < MAX_DELAY_INTERVAL) {
con->delay *= 2; con->delay *= 2;
if (con->delay > MAX_DELAY_INTERVAL)
con->delay = MAX_DELAY_INTERVAL;
}
con_flag_set(con, CON_FLAG_BACKOFF); con_flag_set(con, CON_FLAG_BACKOFF);
queue_con(con); queue_con(con);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment