Commit 91e45ce3 authored by Sage Weil's avatar Sage Weil

ceph: cancel delayed work when closing connection

This ensures that if/when we reopen the connection, we can requeue work on
the connection immediately, without waiting for an old timer to expire.
Queue new delayed work inside con->mutex to avoid any race.

This fixes problems with clients failing to reconnect to the MDS due to
the client_reconnect message arriving too late (due to waiting for an old
delayed work timeout to expire).
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent e2663ab6
...@@ -344,6 +344,7 @@ void ceph_con_close(struct ceph_connection *con) ...@@ -344,6 +344,7 @@ void ceph_con_close(struct ceph_connection *con)
clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
reset_connection(con); reset_connection(con);
cancel_delayed_work(&con->work);
mutex_unlock(&con->mutex); mutex_unlock(&con->mutex);
queue_con(con); queue_con(con);
} }
...@@ -1841,6 +1842,8 @@ static void ceph_fault(struct ceph_connection *con) ...@@ -1841,6 +1842,8 @@ static void ceph_fault(struct ceph_connection *con)
clear_bit(BUSY, &con->state); /* to avoid an improbable race */ clear_bit(BUSY, &con->state); /* to avoid an improbable race */
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
if (test_bit(CLOSED, &con->state))
goto out_unlock;
con_close_socket(con); con_close_socket(con);
...@@ -1876,8 +1879,6 @@ static void ceph_fault(struct ceph_connection *con) ...@@ -1876,8 +1879,6 @@ static void ceph_fault(struct ceph_connection *con)
else if (con->delay < MAX_DELAY_INTERVAL) else if (con->delay < MAX_DELAY_INTERVAL)
con->delay *= 2; con->delay *= 2;
mutex_unlock(&con->mutex);
/* explicitly schedule work to try to reconnect again later. */ /* explicitly schedule work to try to reconnect again later. */
dout("fault queueing %p delay %lu\n", con, con->delay); dout("fault queueing %p delay %lu\n", con, con->delay);
con->ops->get(con); con->ops->get(con);
...@@ -1885,6 +1886,8 @@ static void ceph_fault(struct ceph_connection *con) ...@@ -1885,6 +1886,8 @@ static void ceph_fault(struct ceph_connection *con)
round_jiffies_relative(con->delay)) == 0) round_jiffies_relative(con->delay)) == 0)
con->ops->put(con); con->ops->put(con);
out_unlock:
mutex_unlock(&con->mutex);
out: out:
if (con->ops->fault) if (con->ops->fault)
con->ops->fault(con); con->ops->fault(con);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment