Commit 654490a3 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-smc-improve-termination-handling'

Karsten Graul says:

====================
net/smc: improve termination handling

First set of patches to improve termination handling.
====================
Signed-off-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
parents 0ea1671f d18963cf
......@@ -75,6 +75,9 @@ struct smcd_dev {
struct workqueue_struct *event_wq;
u8 pnetid[SMC_MAX_PNETID_LEN];
bool pnetid_by_user;
struct list_head lgr_list;
spinlock_t lgr_lock;
u8 going_away : 1;
};
struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
......
......@@ -65,8 +65,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
(sk->sk_err == ECONNABORTED) ||
(sk->sk_err == ECONNRESET),
sk->sk_err == ECONNABORTED ||
sk->sk_err == ECONNRESET,
&wait);
if (rc)
break;
......@@ -113,9 +113,6 @@ static void smc_close_active_abort(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
......@@ -129,35 +126,26 @@ static void smc_close_active_abort(struct smc_sock *smc)
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
sk->sk_state = SMC_CLOSED;
sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
if (!smc_cdc_rxed_any_close(&smc->conn))
sk->sk_state = SMC_PEERABORTWAIT;
else
sk->sk_state = SMC_CLOSED;
release_sock(sk);
cancel_delayed_work_sync(&smc->conn.tx_work);
lock_sock(sk);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
/* just SHUTDOWN_SEND done */
sk->sk_state = SMC_PEERABORTWAIT;
} else {
case SMC_PEERFINCLOSEWAIT:
sk->sk_state = SMC_CLOSED;
}
sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
sock_put(sk); /* passive closing */
break;
case SMC_INIT:
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
......@@ -215,8 +203,6 @@ int smc_close_active(struct smc_sock *smc)
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
if (rc)
break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
......@@ -229,8 +215,6 @@ int smc_close_active(struct smc_sock *smc)
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
if (rc)
break;
}
sk->sk_state = SMC_CLOSED;
break;
......@@ -246,8 +230,6 @@ int smc_close_active(struct smc_sock *smc)
goto again;
/* confirm close from peer */
rc = smc_close_final(conn);
if (rc)
break;
if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
......@@ -263,8 +245,6 @@ int smc_close_active(struct smc_sock *smc)
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
if (rc)
break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
......@@ -272,10 +252,12 @@ int smc_close_active(struct smc_sock *smc)
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
smc_close_abort(conn);
rc = smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
case SMC_CLOSED:
/* nothing to do, add tracing in future patch */
break;
......@@ -451,8 +433,6 @@ int smc_close_shutdown_write(struct smc_sock *smc)
goto again;
/* send close wr request */
rc = smc_close_wr(conn);
if (rc)
break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
......@@ -466,8 +446,6 @@ int smc_close_shutdown_write(struct smc_sock *smc)
goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
if (rc)
break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
......
......@@ -42,6 +42,19 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
{
if (lgr->is_smcd) {
*lgr_lock = &lgr->smcd->lgr_lock;
return &lgr->smcd->lgr_list;
}
*lgr_lock = &smc_lgr_list.lock;
return &smc_lgr_list.list;
}
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
/* client link group creation always follows the server link group
......@@ -157,19 +170,21 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group *lgr = container_of(to_delayed_work(work),
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
if (!conns) { /* number of lgr connections is no longer zero */
spin_unlock_bh(&smc_lgr_list.lock);
spin_unlock_bh(lgr_lock);
return;
}
if (!list_empty(&lgr->list))
list_del_init(&lgr->list); /* remove from smc_lgr_list */
spin_unlock_bh(&smc_lgr_list.lock);
spin_unlock_bh(lgr_lock);
if (!lgr->is_smcd && !lgr->terminating) {
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
......@@ -198,7 +213,9 @@ static void smc_lgr_free_work(struct work_struct *work)
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct list_head *lgr_list;
struct smc_link *lnk;
spinlock_t *lgr_lock;
u8 rndvec[3];
int rc = 0;
int i;
......@@ -231,10 +248,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->smcd = ini->ism_dev;
lgr_list = &ini->ism_dev->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
} else {
/* SMC-R specific settings */
get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
......@@ -245,6 +266,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk->link_id = SMC_SINGLE_LINK;
lnk->smcibdev = ini->ib_dev;
lnk->ibport = ini->ib_port;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
lnk->path_mtu =
ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
if (!ini->ib_dev->initialized)
......@@ -274,9 +297,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
goto destroy_qp;
}
smc->conn.lgr = lgr;
spin_lock_bh(&smc_lgr_list.lock);
list_add(&lgr->list, &smc_lgr_list.list);
spin_unlock_bh(&smc_lgr_list.lock);
spin_lock_bh(lgr_lock);
list_add(&lgr->list, lgr_list);
spin_unlock_bh(lgr_lock);
return 0;
destroy_qp:
......@@ -430,20 +453,27 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
static void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd)
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
else
put_device(&lgr->smcd->dev);
} else {
smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
}
kfree(lgr);
}
void smc_lgr_forget(struct smc_link_group *lgr)
{
spin_lock_bh(&smc_lgr_list.lock);
struct list_head *lgr_list;
spinlock_t *lgr_lock;
lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
/* do not use this link group for new connections */
if (!list_empty(&lgr->list))
list_del_init(&lgr->list);
spin_unlock_bh(&smc_lgr_list.lock);
if (!list_empty(lgr_list))
list_del_init(lgr_list);
spin_unlock_bh(lgr_lock);
}
/* terminate linkgroup abnormally */
......@@ -484,9 +514,12 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
void smc_lgr_terminate(struct smc_link_group *lgr)
{
spin_lock_bh(&smc_lgr_list.lock);
spinlock_t *lgr_lock;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
__smc_lgr_terminate(lgr);
spin_unlock_bh(&smc_lgr_list.lock);
spin_unlock_bh(lgr_lock);
}
/* Called when IB port is terminated */
......@@ -511,16 +544,15 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
LIST_HEAD(lgr_free_list);
/* run common cleanup function and build free list */
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
spin_lock_bh(&dev->lgr_lock);
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
if ((!peer_gid || lgr->peer_gid == peer_gid) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
}
spin_unlock_bh(&smc_lgr_list.lock);
spin_unlock_bh(&dev->lgr_lock);
/* cancel the regular free workers and actually free lgrs */
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
......@@ -604,10 +636,14 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
struct list_head *lgr_list;
struct smc_link_group *lgr;
enum smc_lgr_role role;
spinlock_t *lgr_lock;
int rc = 0;
lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
ini->cln_first_contact = SMC_FIRST_CONTACT;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->srv_first_contact)
......@@ -615,8 +651,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
goto create;
/* determine if an existing link group can be reused */
spin_lock_bh(&smc_lgr_list.lock);
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
spin_lock_bh(lgr_lock);
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
......@@ -636,7 +672,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
write_unlock_bh(&lgr->conns_lock);
}
spin_unlock_bh(&smc_lgr_list.lock);
spin_unlock_bh(lgr_lock);
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
......@@ -1024,16 +1060,45 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
return 0;
}
static void smc_core_going_away(void)
{
struct smc_ib_device *smcibdev;
struct smcd_dev *smcd;
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
int i;
for (i = 0; i < SMC_MAX_PORTS; i++)
set_bit(i, smcibdev->ports_going_away);
}
spin_unlock(&smc_ib_devices.lock);
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(smcd, &smcd_dev_list.list, list) {
smcd->going_away = 1;
}
spin_unlock(&smcd_dev_list.lock);
}
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_freeing_list);
struct smcd_dev *smcd;
smc_core_going_away();
spin_lock_bh(&smc_lgr_list.lock);
if (!list_empty(&smc_lgr_list.list))
list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
spin_unlock_bh(&smc_lgr_list.lock);
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(smcd, &smcd_dev_list.list, list)
list_splice_init(&smcd->lgr_list, &lgr_freeing_list);
spin_unlock(&smcd_dev_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
if (!lgr->is_smcd) {
......
......@@ -242,8 +242,12 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
if (!smc_ib_port_active(smcibdev, port_idx + 1))
if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
set_bit(port_idx, smcibdev->ports_going_away);
smc_port_terminate(smcibdev, port_idx + 1);
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
}
}
}
......@@ -259,8 +263,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
/* terminate all ports on device */
for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++)
for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
set_bit(port_idx, &smcibdev->port_event_mask);
set_bit(port_idx, smcibdev->ports_going_away);
}
schedule_work(&smcibdev->port_event_work);
break;
case IB_EVENT_PORT_ERR:
......@@ -269,6 +275,10 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
port_idx = ibevent->element.port_num - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
if (ibevent->event == IB_EVENT_PORT_ERR)
set_bit(port_idx, smcibdev->ports_going_away);
else if (ibevent->event == IB_EVENT_PORT_ACTIVE)
clear_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
......@@ -307,6 +317,7 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
port_idx = ibevent->element.qp->port - 1;
if (port_idx < SMC_MAX_PORTS) {
set_bit(port_idx, &smcibdev->port_event_mask);
set_bit(port_idx, smcibdev->ports_going_away);
schedule_work(&smcibdev->port_event_work);
}
break;
......
......@@ -47,6 +47,7 @@ struct smc_ib_device { /* ib-device infos for smc */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS);
};
struct smc_buf_desc;
......
......@@ -286,7 +286,9 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
INIT_LIST_HEAD(&smcd->vlan);
INIT_LIST_HEAD(&smcd->lgr_list);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
if (!smcd->event_wq) {
......@@ -313,6 +315,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_lock(&smcd_dev_list.lock);
list_del(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
smcd->going_away = 1;
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
......@@ -342,6 +345,8 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
if (smcd->going_away)
return;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
......
......@@ -781,6 +781,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
dev_put(ndev);
if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
......@@ -820,6 +821,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
continue;
if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away) &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id,
ini->ib_gid, NULL)) {
ini->ib_dev = ibdev;
......@@ -846,7 +848,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away) {
ini->ism_dev = ismdev;
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment