Commit 4d99b258 authored by Liang Zhen's avatar Liang Zhen Committed by Greg Kroah-Hartman

staging: lustre: avoid intensive reconnecting for ko2iblnd

When there is a connection race between two nodes and one side
of the connection is rejected by the other side. o2iblnd will
reconnect immediately, this is going to generate a lot of
trashes if:

 - race winner is slow and can't send out connecting request
   in short time.
 - remote side leaves a cmid in TIMEWAIT state, which will reject
   future connection requests

To resolve this problem, this patch changed the reconnection
behave: reconnection is submitted by connd only if a zombie
connection is being destroyed and there is a pending
reconnection request for the corresponding peer.

Also, after a few rejections, reconnection will have a time
interval between each attempt.
Signed-off-by: default avatarLiang Zhen <liang.zhen@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-7569
Reviewed-on: http://review.whamcloud.com/17892Reviewed-by: default avatarDoug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Tested-by: default avatarJames Simmons <uja.ornl@yahoo.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 82fffff4
...@@ -364,9 +364,7 @@ void kiblnd_destroy_peer(kib_peer_t *peer) ...@@ -364,9 +364,7 @@ void kiblnd_destroy_peer(kib_peer_t *peer)
LASSERT(net); LASSERT(net);
LASSERT(!atomic_read(&peer->ibp_refcount)); LASSERT(!atomic_read(&peer->ibp_refcount));
LASSERT(!kiblnd_peer_active(peer)); LASSERT(!kiblnd_peer_active(peer));
LASSERT(!peer->ibp_connecting); LASSERT(kiblnd_peer_idle(peer));
LASSERT(!peer->ibp_accepting);
LASSERT(list_empty(&peer->ibp_conns));
LASSERT(list_empty(&peer->ibp_tx_queue)); LASSERT(list_empty(&peer->ibp_tx_queue));
LIBCFS_FREE(peer, sizeof(*peer)); LIBCFS_FREE(peer, sizeof(*peer));
...@@ -392,10 +390,7 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid) ...@@ -392,10 +390,7 @@ kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid)
list_for_each(tmp, peer_list) { list_for_each(tmp, peer_list) {
peer = list_entry(tmp, kib_peer_t, ibp_list); peer = list_entry(tmp, kib_peer_t, ibp_list);
LASSERT(!kiblnd_peer_idle(peer));
LASSERT(peer->ibp_connecting > 0 || /* creating conns */
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns)); /* active conn */
if (peer->ibp_nid != nid) if (peer->ibp_nid != nid)
continue; continue;
...@@ -432,9 +427,7 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index, ...@@ -432,9 +427,7 @@ static int kiblnd_get_peer_info(lnet_ni_t *ni, int index,
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list); peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 || LASSERT(!kiblnd_peer_idle(peer));
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
if (peer->ibp_ni != ni) if (peer->ibp_ni != ni)
continue; continue;
...@@ -502,9 +495,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid) ...@@ -502,9 +495,7 @@ static int kiblnd_del_peer(lnet_ni_t *ni, lnet_nid_t nid)
for (i = lo; i <= hi; i++) { for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list); peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 || LASSERT(!kiblnd_peer_idle(peer));
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
if (peer->ibp_ni != ni) if (peer->ibp_ni != ni)
continue; continue;
...@@ -545,9 +536,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) ...@@ -545,9 +536,7 @@ static kib_conn_t *kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index)
for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) {
list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { list_for_each(ptmp, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list); peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 || LASSERT(!kiblnd_peer_idle(peer));
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
if (peer->ibp_ni != ni) if (peer->ibp_ni != ni)
continue; continue;
...@@ -837,14 +826,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, ...@@ -837,14 +826,14 @@ kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
return conn; return conn;
failed_2: failed_2:
kiblnd_destroy_conn(conn); kiblnd_destroy_conn(conn, true);
failed_1: failed_1:
LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
failed_0: failed_0:
return NULL; return NULL;
} }
void kiblnd_destroy_conn(kib_conn_t *conn) void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn)
{ {
struct rdma_cm_id *cmid = conn->ibc_cmid; struct rdma_cm_id *cmid = conn->ibc_cmid;
kib_peer_t *peer = conn->ibc_peer; kib_peer_t *peer = conn->ibc_peer;
...@@ -984,9 +973,7 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) ...@@ -984,9 +973,7 @@ static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid)
for (i = lo; i <= hi; i++) { for (i = lo; i <= hi; i++) {
list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) {
peer = list_entry(ptmp, kib_peer_t, ibp_list); peer = list_entry(ptmp, kib_peer_t, ibp_list);
LASSERT(peer->ibp_connecting > 0 || LASSERT(!kiblnd_peer_idle(peer));
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns));
if (peer->ibp_ni != ni) if (peer->ibp_ni != ni)
continue; continue;
...@@ -1071,12 +1058,8 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when) ...@@ -1071,12 +1058,8 @@ static void kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
read_lock_irqsave(glock, flags); read_lock_irqsave(glock, flags);
peer = kiblnd_find_peer_locked(nid); peer = kiblnd_find_peer_locked(nid);
if (peer) { if (peer)
LASSERT(peer->ibp_connecting > 0 || /* creating conns */
peer->ibp_accepting > 0 ||
!list_empty(&peer->ibp_conns)); /* active conn */
last_alive = peer->ibp_last_alive; last_alive = peer->ibp_last_alive;
}
read_unlock_irqrestore(glock, flags); read_unlock_irqrestore(glock, flags);
...@@ -2368,6 +2351,8 @@ static void kiblnd_base_shutdown(void) ...@@ -2368,6 +2351,8 @@ static void kiblnd_base_shutdown(void)
LASSERT(list_empty(&kiblnd_data.kib_peers[i])); LASSERT(list_empty(&kiblnd_data.kib_peers[i]));
LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); LASSERT(list_empty(&kiblnd_data.kib_connd_zombies));
LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); LASSERT(list_empty(&kiblnd_data.kib_connd_conns));
LASSERT(list_empty(&kiblnd_data.kib_reconn_list));
LASSERT(list_empty(&kiblnd_data.kib_reconn_wait));
/* flag threads to terminate; wake and wait for them to die */ /* flag threads to terminate; wake and wait for them to die */
kiblnd_data.kib_shutdown = 1; kiblnd_data.kib_shutdown = 1;
...@@ -2506,6 +2491,9 @@ static int kiblnd_base_startup(void) ...@@ -2506,6 +2491,9 @@ static int kiblnd_base_startup(void)
spin_lock_init(&kiblnd_data.kib_connd_lock); spin_lock_init(&kiblnd_data.kib_connd_lock);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);
init_waitqueue_head(&kiblnd_data.kib_connd_waitq); init_waitqueue_head(&kiblnd_data.kib_connd_waitq);
init_waitqueue_head(&kiblnd_data.kib_failover_waitq); init_waitqueue_head(&kiblnd_data.kib_failover_waitq);
......
...@@ -348,6 +348,16 @@ typedef struct { ...@@ -348,6 +348,16 @@ typedef struct {
void *kib_connd; /* the connd task (serialisation assertions) */ void *kib_connd; /* the connd task (serialisation assertions) */
struct list_head kib_connd_conns; /* connections to setup/teardown */ struct list_head kib_connd_conns; /* connections to setup/teardown */
struct list_head kib_connd_zombies; /* connections with zero refcount */ struct list_head kib_connd_zombies; /* connections with zero refcount */
/* connections to reconnect */
struct list_head kib_reconn_list;
/* peers wait for reconnection */
struct list_head kib_reconn_wait;
/**
* The second that peers are pulled out from \a kib_reconn_wait
* for reconnection.
*/
time64_t kib_reconn_sec;
wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */
spinlock_t kib_connd_lock; /* serialise */ spinlock_t kib_connd_lock; /* serialise */
struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
...@@ -525,6 +535,8 @@ typedef struct kib_conn { ...@@ -525,6 +535,8 @@ typedef struct kib_conn {
struct list_head ibc_list; /* stash on peer's conn list */ struct list_head ibc_list; /* stash on peer's conn list */
struct list_head ibc_sched_list; /* schedule for attention */ struct list_head ibc_sched_list; /* schedule for attention */
__u16 ibc_version; /* version of connection */ __u16 ibc_version; /* version of connection */
/* reconnect later */
__u16 ibc_reconnect:1;
__u64 ibc_incarnation; /* which instance of the peer */ __u64 ibc_incarnation; /* which instance of the peer */
atomic_t ibc_refcount; /* # users */ atomic_t ibc_refcount; /* # users */
int ibc_state; /* what's happening */ int ibc_state; /* what's happening */
...@@ -574,18 +586,25 @@ typedef struct kib_peer { ...@@ -574,18 +586,25 @@ typedef struct kib_peer {
struct list_head ibp_list; /* stash on global peer list */ struct list_head ibp_list; /* stash on global peer list */
lnet_nid_t ibp_nid; /* who's on the other end(s) */ lnet_nid_t ibp_nid; /* who's on the other end(s) */
lnet_ni_t *ibp_ni; /* LNet interface */ lnet_ni_t *ibp_ni; /* LNet interface */
atomic_t ibp_refcount; /* # users */
struct list_head ibp_conns; /* all active connections */ struct list_head ibp_conns; /* all active connections */
struct list_head ibp_tx_queue; /* msgs waiting for a conn */ struct list_head ibp_tx_queue; /* msgs waiting for a conn */
__u16 ibp_version; /* version of peer */
__u64 ibp_incarnation; /* incarnation of peer */ __u64 ibp_incarnation; /* incarnation of peer */
int ibp_connecting; /* current active connection attempts /* when (in jiffies) I was last alive */
*/ unsigned long ibp_last_alive;
int ibp_accepting; /* current passive connection attempts /* # users */
*/ atomic_t ibp_refcount;
int ibp_error; /* errno on closing this peer */ /* version of peer */
unsigned long ibp_last_alive; /* when (in jiffies) I was last alive __u16 ibp_version;
*/ /* current passive connection attempts */
unsigned short ibp_accepting;
/* current active connection attempts */
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
int ibp_error;
/* max map_on_demand */ /* max map_on_demand */
__u16 ibp_max_frags; __u16 ibp_max_frags;
/* max_peer_credits */ /* max_peer_credits */
...@@ -667,6 +686,20 @@ do { \ ...@@ -667,6 +686,20 @@ do { \
kiblnd_destroy_peer(peer); \ kiblnd_destroy_peer(peer); \
} while (0) } while (0)
static inline bool
kiblnd_peer_connecting(kib_peer_t *peer)
{
return peer->ibp_connecting ||
peer->ibp_reconnecting ||
peer->ibp_accepting;
}
static inline bool
kiblnd_peer_idle(kib_peer_t *peer)
{
return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
}
static inline struct list_head * static inline struct list_head *
kiblnd_nid2peerlist(lnet_nid_t nid) kiblnd_nid2peerlist(lnet_nid_t nid)
{ {
...@@ -943,6 +976,7 @@ int kiblnd_translate_mtu(int value); ...@@ -943,6 +976,7 @@ int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev); int kiblnd_dev_failover(kib_dev_t *dev);
int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid); int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer(kib_peer_t *peer); void kiblnd_destroy_peer(kib_peer_t *peer);
bool kiblnd_reconnect_peer(kib_peer_t *peer);
void kiblnd_destroy_dev(kib_dev_t *dev); void kiblnd_destroy_dev(kib_dev_t *dev);
void kiblnd_unlink_peer_locked(kib_peer_t *peer); void kiblnd_unlink_peer_locked(kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid); kib_peer_t *kiblnd_find_peer_locked(lnet_nid_t nid);
...@@ -952,7 +986,7 @@ int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why); ...@@ -952,7 +986,7 @@ int kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why);
kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
int state, int version); int state, int version);
void kiblnd_destroy_conn(kib_conn_t *conn); void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
void kiblnd_close_conn(kib_conn_t *conn, int error); void kiblnd_close_conn(kib_conn_t *conn, int error);
void kiblnd_close_conn_locked(kib_conn_t *conn, int error); void kiblnd_close_conn_locked(kib_conn_t *conn, int error);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment