Commit 1e86ac48 authored by Philipp Reisner's avatar Philipp Reisner

drbd: Bugfix for the connection behavior

If we get into the C_BROKEN_PIPE cstate once, the state engine set the
thi->t_state of the receiver thread to restarting.  But with the while loop
in drbdd_init() a new connection gets established. After the call into
drbdd() returns immediately since the thi->t_state is not RUNNING.  The
restart of drbd_init() then resets thi->t_state to RUNNING.

I.e. after entering C_BROKEN_PIPE once, the next successful established
connection gets wasted.

The two parts of the fix:
  * Do not cause the thread to restart if we detect the issue
    with the sockets while we are in C_WF_CONNECTION.

  * Make sure that all actions that would have set us to C_BROKEN_PIPE
    happen before the state change to C_WF_REPORT_PARAMS.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 80f9fd55
...@@ -1318,7 +1318,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ...@@ -1318,7 +1318,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
drbd_thread_stop_nowait(&mdev->receiver); drbd_thread_stop_nowait(&mdev->receiver);
/* Upon network failure, we need to restart the receiver. */ /* Upon network failure, we need to restart the receiver. */
if (os.conn > C_TEAR_DOWN && if (os.conn > C_WF_CONNECTION &&
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
drbd_thread_restart_nowait(&mdev->receiver); drbd_thread_restart_nowait(&mdev->receiver);
......
...@@ -888,17 +888,12 @@ static int drbd_connect(struct drbd_conf *mdev) ...@@ -888,17 +888,12 @@ static int drbd_connect(struct drbd_conf *mdev)
} }
} }
if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
return 0;
sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->packet_seq, 0);
mdev->peer_seq = 0; mdev->peer_seq = 0;
drbd_thread_start(&mdev->asender);
if (drbd_send_protocol(mdev) == -1) if (drbd_send_protocol(mdev) == -1)
return -1; return -1;
drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sync_param(mdev, &mdev->sync_conf);
...@@ -907,6 +902,11 @@ static int drbd_connect(struct drbd_conf *mdev) ...@@ -907,6 +902,11 @@ static int drbd_connect(struct drbd_conf *mdev)
drbd_send_state(mdev); drbd_send_state(mdev);
clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(USE_DEGR_WFC_T, &mdev->flags);
clear_bit(RESIZE_PENDING, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags);
if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
return 0;
drbd_thread_start(&mdev->asender);
mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
return 1; return 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment