Commit a1096a6e authored by Philipp Reisner's avatar Philipp Reisner

drbd: Delay/reject other state changes while establishing a connection

Changes to the role and disk state should be delayed or rejected
while we establish a connection.

This is necessary, since the peer will base its resync decision
on the UUIDs and the state we sent in the drbd_connect() function.

The most prominent example for this race is becoming primary after
sending state and UUIDs and before the state changes to C_WF_CONNECTION.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 27eb13e9
...@@ -808,6 +808,7 @@ enum { ...@@ -808,6 +808,7 @@ enum {
CONN_WD_ST_CHG_FAIL, CONN_WD_ST_CHG_FAIL,
CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */ CREATE_BARRIER, /* next P_DATA is preceded by a P_BARRIER */
STATE_SENT, /* Do not change state/UUIDs while this is set */
}; };
struct drbd_tconn { /* is a resource from the config file */ struct drbd_tconn { /* is a resource from the config file */
......
...@@ -502,7 +502,7 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn) ...@@ -502,7 +502,7 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
here, because we might were able to re-establish the connection in the here, because we might were able to re-establish the connection in the
meantime. */ meantime. */
spin_lock_irq(&tconn->req_lock); spin_lock_irq(&tconn->req_lock);
if (tconn->cstate < C_WF_REPORT_PARAMS) if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
_conn_request_state(tconn, mask, val, CS_VERBOSE); _conn_request_state(tconn, mask, val, CS_VERBOSE);
spin_unlock_irq(&tconn->req_lock); spin_unlock_irq(&tconn->req_lock);
......
...@@ -848,6 +848,7 @@ static int conn_connect(struct drbd_tconn *tconn) ...@@ -848,6 +848,7 @@ static int conn_connect(struct drbd_tconn *tconn)
struct net_conf *nc; struct net_conf *nc;
int vnr, timeout, try, h, ok; int vnr, timeout, try, h, ok;
bool discard_my_data; bool discard_my_data;
enum drbd_state_rv rv;
if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
return -2; return -2;
...@@ -1008,6 +1009,8 @@ static int conn_connect(struct drbd_tconn *tconn) ...@@ -1008,6 +1009,8 @@ static int conn_connect(struct drbd_tconn *tconn)
if (drbd_send_protocol(tconn) == -EOPNOTSUPP) if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
return -1; return -1;
set_bit(STATE_SENT, &tconn->flags);
rcu_read_lock(); rcu_read_lock();
idr_for_each_entry(&tconn->volumes, mdev, vnr) { idr_for_each_entry(&tconn->volumes, mdev, vnr) {
kref_get(&mdev->kref); kref_get(&mdev->kref);
...@@ -1024,8 +1027,11 @@ static int conn_connect(struct drbd_tconn *tconn) ...@@ -1024,8 +1027,11 @@ static int conn_connect(struct drbd_tconn *tconn)
} }
rcu_read_unlock(); rcu_read_unlock();
if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS) rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
if (rv < SS_SUCCESS) {
clear_bit(STATE_SENT, &tconn->flags);
return 0; return 0;
}
drbd_thread_start(&tconn->asender); drbd_thread_start(&tconn->asender);
......
...@@ -53,7 +53,7 @@ static int w_after_state_ch(struct drbd_work *w, int unused); ...@@ -53,7 +53,7 @@ static int w_after_state_ch(struct drbd_work *w, int unused);
static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags); union drbd_state ns, enum chg_state_flags flags);
static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns, static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
enum sanitize_state_warnings *warn); enum sanitize_state_warnings *warn);
...@@ -267,7 +267,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask, ...@@ -267,7 +267,7 @@ _req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
if (rv == SS_UNKNOWN_ERROR) { if (rv == SS_UNKNOWN_ERROR) {
rv = is_valid_state(mdev, ns); rv = is_valid_state(mdev, ns);
if (rv == SS_SUCCESS) { if (rv == SS_SUCCESS) {
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, mdev->tconn);
if (rv == SS_SUCCESS) if (rv == SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
} }
...@@ -313,7 +313,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask, ...@@ -313,7 +313,7 @@ drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
if (cl_wide_st_chg(mdev, os, ns)) { if (cl_wide_st_chg(mdev, os, ns)) {
rv = is_valid_state(mdev, ns); rv = is_valid_state(mdev, ns);
if (rv == SS_SUCCESS) if (rv == SS_SUCCESS)
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, mdev->tconn);
spin_unlock_irqrestore(&mdev->tconn->req_lock, flags); spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
if (rv < SS_SUCCESS) { if (rv < SS_SUCCESS) {
...@@ -569,7 +569,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns) ...@@ -569,7 +569,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
* @os: old state. * @os: old state.
*/ */
static enum drbd_state_rv static enum drbd_state_rv
is_valid_soft_transition(union drbd_state os, union drbd_state ns) is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn)
{ {
enum drbd_state_rv rv = SS_SUCCESS; enum drbd_state_rv rv = SS_SUCCESS;
...@@ -595,6 +595,13 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns) ...@@ -595,6 +595,13 @@ is_valid_soft_transition(union drbd_state os, union drbd_state ns)
/* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) /* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
rv = SS_IN_TRANSIENT_STATE; */ rv = SS_IN_TRANSIENT_STATE; */
/* While establishing a connection only allow cstate to change.
Delay/refuse role changes, detach attach etc... */
if (test_bit(STATE_SENT, &tconn->flags) &&
!(os.conn == C_WF_REPORT_PARAMS ||
(ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
rv = SS_IN_TRANSIENT_STATE;
if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
rv = SS_NEED_CONNECTION; rv = SS_NEED_CONNECTION;
...@@ -927,9 +934,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns, ...@@ -927,9 +934,9 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
this happen...*/ this happen...*/
if (is_valid_state(mdev, os) == rv) if (is_valid_state(mdev, os) == rv)
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, mdev->tconn);
} else } else
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, mdev->tconn);
} }
if (rv < SS_SUCCESS) { if (rv < SS_SUCCESS) {
...@@ -1393,6 +1400,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ...@@ -1393,6 +1400,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
drbd_send_state(mdev, ns); drbd_send_state(mdev, ns);
/* Wake up role changes, that were delayed because of connection establishing */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags))
wake_up(&mdev->state_wait);
}
/* This triggers bitmap writeout of potentially still unwritten pages /* This triggers bitmap writeout of potentially still unwritten pages
* if the resync finished cleanly, or aborted because of peer disk * if the resync finished cleanly, or aborted because of peer disk
* failure, or because of connection loss. * failure, or because of connection loss.
...@@ -1565,9 +1578,9 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union ...@@ -1565,9 +1578,9 @@ conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union
rv = is_valid_state(mdev, ns); rv = is_valid_state(mdev, ns);
if (rv < SS_SUCCESS) { if (rv < SS_SUCCESS) {
if (is_valid_state(mdev, os) == rv) if (is_valid_state(mdev, os) == rv)
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, tconn);
} else } else
rv = is_valid_soft_transition(os, ns); rv = is_valid_soft_transition(os, ns, tconn);
} }
if (rv < SS_SUCCESS) if (rv < SS_SUCCESS)
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment