Commit 25ed57b8 authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-next'

Karsten Graul says:

====================
net/smc: updates 2020-09-10

Please apply the following patch series for smc to netdev's net-next tree.

This patch series is a mix of various improvements and cleanups.
The patches 1 and 10 improve the handling of large parallel workloads.
Patch 8 corrects a kernel config default for config CCWGROUP on s390.
Patch 9 allows userspace tools to retrieve socket information for more
sockets.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b55353e2 22ef473d
......@@ -107,7 +107,7 @@ config QETH_OSX
config CCWGROUP
tristate
default (LCS || CTCM || QETH)
default (LCS || CTCM || QETH || SMC)
config ISM
tristate "Support for ISM vPCI Adapter"
......
......@@ -55,6 +55,9 @@ static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
* creation on client
*/
struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
struct workqueue_struct *smc_close_wq; /* wq for close work */
static void smc_tcp_listen_work(struct work_struct *);
static void smc_connect_work(struct work_struct *);
......@@ -436,10 +439,10 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
static void smcr_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
smc->conn.peer_rmbe_idx = clc->rmbe_idx;
smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
smc->conn.peer_rmbe_size = bufsize;
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
......@@ -448,10 +451,10 @@ static void smcr_conn_save_peer_info(struct smc_sock *smc,
static void smcd_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
smc->conn.peer_rmbe_idx = clc->dmbe_idx;
smc->conn.peer_token = clc->token;
smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
smc->conn.peer_token = clc->d0.token;
/* msg header takes up space in the buffer */
smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
......@@ -470,11 +473,11 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
static void smc_link_save_peer_info(struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc)
{
link->peer_qpn = ntoh24(clc->qpn);
memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->psn);
link->peer_mtu = clc->qp_mtu;
link->peer_qpn = ntoh24(clc->r0.qpn);
memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac));
link->peer_psn = ntoh24(clc->r0.psn);
link->peer_mtu = clc->r0.qp_mtu;
}
static void smc_switch_to_fallback(struct smc_sock *smc)
......@@ -523,11 +526,11 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
/* abort connecting */
static int smc_connect_abort(struct smc_sock *smc, int reason_code,
int local_contact)
int local_first)
{
bool is_smcd = smc->conn.lgr->is_smcd;
if (local_contact == SMC_FIRST_CONTACT)
if (local_first)
smc_lgr_cleanup_early(&smc->conn);
else
smc_conn_free(&smc->conn);
......@@ -613,9 +616,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
struct smc_link *link;
ini->is_smcd = false;
ini->ib_lcl = &aclc->lcl;
ini->ib_clcqpn = ntoh24(aclc->qpn);
ini->srv_first_contact = aclc->hdr.flag;
ini->ib_lcl = &aclc->r0.lcl;
ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
ini->first_contact_peer = aclc->hdr.flag;
mutex_lock(&smc_client_lgr_pending);
reason_code = smc_conn_create(smc, ini);
......@@ -626,7 +629,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
smc_conn_save_peer_info(smc, aclc);
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (ini->first_contact_local) {
link = smc->conn.lnk;
} else {
/* set link that was assigned by server */
......@@ -634,60 +637,62 @@ static int smc_connect_rdma(struct smc_sock *smc,
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *l = &smc->conn.lgr->lnk[i];
if (l->peer_qpn == ntoh24(aclc->qpn) &&
!memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) &&
!memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) {
if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
!memcmp(l->peer_gid, &aclc->r0.lcl.gid,
SMC_GID_SIZE) &&
!memcmp(l->peer_mac, &aclc->r0.lcl.mac,
sizeof(l->peer_mac))) {
link = l;
break;
}
}
if (!link)
return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
ini->cln_first_contact);
ini->first_contact_local);
smc->conn.lnk = link;
}
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
ini->cln_first_contact);
ini->first_contact_local);
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
if (ini->first_contact_local)
smc_link_save_peer_info(link, aclc);
if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
ini->cln_first_contact);
ini->first_contact_local);
smc_close_init(smc);
smc_rx_init(smc);
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (ini->first_contact_local) {
if (smc_ib_ready_link(link))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
ini->first_contact_local);
} else {
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
ini->first_contact_local);
}
smc_rmb_sync_sg_for_device(&smc->conn);
reason_code = smc_clc_send_confirm(smc);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
ini->first_contact_local);
smc_tx_init(smc);
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (ini->first_contact_local) {
/* QP confirmation over RoCE fabric */
smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
reason_code = smcr_clnt_conf_first_link(smc);
smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
ini->first_contact_local);
}
mutex_unlock(&smc_client_lgr_pending);
......@@ -707,8 +712,8 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
ini->is_smcd = true;
ini->ism_gid = aclc->gid;
ini->srv_first_contact = aclc->hdr.flag;
ini->ism_peer_gid = aclc->d0.gid;
ini->first_contact_peer = aclc->hdr.flag;
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
......@@ -724,7 +729,7 @@ static int smc_connect_ism(struct smc_sock *smc,
return smc_connect_abort(smc, (rc == -ENOSPC) ?
SMC_CLC_DECL_MAX_DMB :
SMC_CLC_DECL_MEM,
ini->cln_first_contact);
ini->first_contact_local);
smc_conn_save_peer_info(smc, aclc);
smc_close_init(smc);
......@@ -733,7 +738,7 @@ static int smc_connect_ism(struct smc_sock *smc,
rc = smc_clc_send_confirm(smc);
if (rc)
return smc_connect_abort(smc, rc, ini->cln_first_contact);
return smc_connect_abort(smc, rc, ini->first_contact_local);
mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc);
......@@ -903,7 +908,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
if (smc->use_fallback)
goto out;
if (flags & O_NONBLOCK) {
if (schedule_work(&smc->connect_work))
if (queue_work(smc_hs_wq, &smc->connect_work))
smc->connect_nonblock = 1;
rc = -EINPROGRESS;
} else {
......@@ -940,10 +945,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
mutex_lock(&lsmc->clcsock_release_lock);
if (lsmc->clcsock)
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
mutex_unlock(&lsmc->clcsock_release_lock);
lock_sock(lsk);
if (rc < 0)
if (rc < 0 && rc != -EAGAIN)
lsk->sk_err = -rc;
if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
new_sk->sk_prot->unhash(new_sk);
......@@ -956,6 +961,10 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
goto out;
}
/* new clcsock has inherited the smc listen-specific sk_data_ready
* function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
(*new_smc)->clcsock = new_clcsock;
out:
return rc;
......@@ -1123,10 +1132,10 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
/* listen worker: decline and fall back if possible */
static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
int local_contact)
bool local_first)
{
/* RDMA setup failed, switch back to TCP */
if (local_contact == SMC_FIRST_CONTACT)
if (local_first)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
......@@ -1182,30 +1191,16 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_msg_smcd *pclc_smcd;
int rc;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
ini->ism_gid = pclc_smcd->gid;
rc = smc_conn_create(new_smc, ini);
if (rc)
return rc;
/* Check if peer can be reached via ISM device */
if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
rc = smc_buf_create(new_smc, true);
if (rc) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
if (ini->first_contact_local)
smc_lgr_cleanup_early(&new_smc->conn);
else
smc_conn_free(&new_smc->conn);
......@@ -1217,11 +1212,11 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
}
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
{
struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
if (!local_first) {
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
......@@ -1233,35 +1228,25 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
/* listen worker: finish RDMA setup */
static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
bool local_first)
{
struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_contact == SMC_FIRST_CONTACT)
if (local_first)
smc_link_save_peer_info(link, cclc);
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto decline;
}
if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
return SMC_CLC_DECL_ERR_RTOK;
if (local_contact == SMC_FIRST_CONTACT) {
if (smc_ib_ready_link(link)) {
reason_code = SMC_CLC_DECL_ERR_RDYLNK;
goto decline;
}
if (local_first) {
if (smc_ib_ready_link(link))
return SMC_CLC_DECL_ERR_RDYLNK;
/* QP confirmation over RoCE fabric */
smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
reason_code = smcr_serv_conf_first_link(new_smc);
smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
goto decline;
}
return 0;
decline:
smc_listen_decline(new_smc, reason_code, local_contact);
return reason_code;
}
......@@ -1272,10 +1257,10 @@ static void smc_listen_work(struct work_struct *work)
smc_listen_work);
struct socket *newclcsock = new_smc->clcsock;
struct smc_clc_msg_accept_confirm cclc;
struct smc_clc_msg_proposal_area *buf;
struct smc_clc_msg_proposal *pclc;
struct smc_init_info ini = {0};
bool ism_supported = false;
u8 buf[SMC_CLC_MAX_LEN];
int rc = 0;
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
......@@ -1297,8 +1282,13 @@ static void smc_listen_work(struct work_struct *work)
/* do inband token exchange -
* wait for and receive SMC Proposal CLC message
*/
pclc = (struct smc_clc_msg_proposal *)&buf;
rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
buf = kzalloc(sizeof(*buf), GFP_KERNEL);
if (!buf) {
rc = SMC_CLC_DECL_MEM;
goto out_decl;
}
pclc = (struct smc_clc_msg_proposal *)buf;
rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (rc)
goto out_decl;
......@@ -1327,7 +1317,10 @@ static void smc_listen_work(struct work_struct *work)
/* check if ISM is available */
if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
ini.is_smcd = true; /* prepare ISM check */
ini.ism_peer_gid = pclc_smcd->gid;
rc = smc_find_ism_device(new_smc, &ini);
if (!rc)
rc = smc_listen_ism_init(new_smc, pclc, &ini);
......@@ -1354,13 +1347,13 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_listen_rdma_init(new_smc, &ini);
if (rc)
goto out_unlock;
rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
rc = smc_listen_rdma_reg(new_smc, ini.first_contact_local);
if (rc)
goto out_unlock;
}
/* send SMC Accept CLC message */
rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
rc = smc_clc_send_accept(new_smc, ini.first_contact_local);
if (rc)
goto out_unlock;
......@@ -1378,12 +1371,13 @@ static void smc_listen_work(struct work_struct *work)
}
/* finish worker */
kfree(buf);
if (!ism_supported) {
rc = smc_listen_rdma_finish(new_smc, &cclc,
ini.cln_first_contact);
mutex_unlock(&smc_server_lgr_pending);
ini.first_contact_local);
if (rc)
return;
goto out_unlock;
mutex_unlock(&smc_server_lgr_pending);
}
smc_conn_save_peer_info(new_smc, &cclc);
smc_listen_out_connected(new_smc);
......@@ -1392,7 +1386,8 @@ static void smc_listen_work(struct work_struct *work)
out_unlock:
mutex_unlock(&smc_server_lgr_pending);
out_decl:
smc_listen_decline(new_smc, rc, ini.cln_first_contact);
smc_listen_decline(new_smc, rc, ini.first_contact_local);
kfree(buf);
}
static void smc_tcp_listen_work(struct work_struct *work)
......@@ -1406,7 +1401,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
lock_sock(lsk);
while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
if (rc) /* clcsock accept queue empty or error */
goto out;
if (!new_smc)
continue;
......@@ -1420,13 +1415,29 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!schedule_work(&new_smc->smc_listen_work))
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
}
out:
release_sock(lsk);
sock_put(&lsmc->sk); /* sock_hold in smc_listen */
sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
}
static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
struct smc_sock *lsmc;
lsmc = (struct smc_sock *)
((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY);
if (!lsmc)
return;
lsmc->clcsk_data_ready(listen_clcsock);
if (lsmc->sk.sk_state == SMC_LISTEN) {
sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work))
sock_put(&lsmc->sk);
}
}
static int smc_listen(struct socket *sock, int backlog)
......@@ -1455,15 +1466,19 @@ static int smc_listen(struct socket *sock, int backlog)
if (!smc->use_fallback)
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
/* save original sk_data_ready function and establish
* smc-specific sk_data_ready function
*/
smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
goto out;
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
sock_hold(sk); /* sock_hold in tcp_listen_worker */
if (!schedule_work(&smc->tcp_listen_work))
sock_put(sk);
out:
release_sock(sk);
......@@ -1788,8 +1803,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
if (val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
mod_delayed_work(smc->conn.lgr->tx_wq,
&smc->conn.tx_work, 0);
}
break;
case TCP_CORK:
......@@ -1797,8 +1812,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
if (!val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
mod_delayed_work(smc->conn.lgr->tx_wq,
&smc->conn.tx_work, 0);
}
break;
case TCP_DEFER_ACCEPT:
......@@ -2081,10 +2096,19 @@ static int __init smc_init(void)
if (rc)
goto out_pernet_subsys;
rc = -ENOMEM;
smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
if (!smc_hs_wq)
goto out_pnet;
smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
if (!smc_close_wq)
goto out_alloc_hs_wq;
rc = smc_core_init();
if (rc) {
pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
goto out_pnet;
goto out_alloc_wqs;
}
rc = smc_llc_init();
......@@ -2136,6 +2160,10 @@ static int __init smc_init(void)
proto_unregister(&smc_proto);
out_core:
smc_core_exit();
out_alloc_wqs:
destroy_workqueue(smc_close_wq);
out_alloc_hs_wq:
destroy_workqueue(smc_hs_wq);
out_pnet:
smc_pnet_exit();
out_pernet_subsys:
......@@ -2150,6 +2178,8 @@ static void __exit smc_exit(void)
sock_unregister(PF_SMC);
smc_core_exit();
smc_ib_unregister_client();
destroy_workqueue(smc_close_wq);
destroy_workqueue(smc_hs_wq);
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
smc_pnet_exit();
......
......@@ -18,6 +18,8 @@
#include "smc_ib.h"
#define SMC_V1 1 /* SMC version V1 */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
......@@ -201,6 +203,8 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_data_ready)(struct sock *sk);
/* original data_ready fct. **/
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct connect_work; /* handle non-blocking connect*/
......@@ -235,6 +239,9 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
return (struct smc_sock *)sk;
}
extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
extern struct workqueue_struct *smc_close_wq; /* wq for close work */
#define SMC_SYSTEMID_LEN 8
extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
......
......@@ -299,7 +299,7 @@ static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc,
conn->lnk = link;
spin_unlock_bh(&conn->send_lock);
sock_hold(&smc->sk); /* sock_put in abort_work */
if (!schedule_work(&conn->abort_work))
if (!queue_work(smc_close_wq, &conn->abort_work))
sock_put(&smc->sk);
}
}
......@@ -368,7 +368,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
sock_hold(&smc->sk); /* sock_put in close_work */
if (!schedule_work(&conn->close_work))
if (!queue_work(smc_close_wq, &conn->close_work))
sock_put(&smc->sk);
}
}
......
......@@ -153,7 +153,6 @@ static int smc_clc_prfx_set(struct socket *clcsock,
struct sockaddr_in *addr;
int rc = -ENOENT;
memset(prop, 0, sizeof(*prop));
if (!dst) {
rc = -ENOTCONN;
goto out;
......@@ -320,7 +319,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
(clcm->version < SMC_CLC_V1) ||
(clcm->version < SMC_V1) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
......@@ -389,7 +388,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
dclc.hdr.version = SMC_V1;
dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
if ((!smc->conn.lgr || !smc->conn.lgr->is_smcd) &&
smc_ib_is_valid_local_systemid())
......@@ -412,138 +411,171 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_init_info *ini)
{
struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal pclc;
struct smc_clc_msg_trail trl;
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct smc_clc_msg_proposal *pclc_base;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[5];
struct msghdr msg;
pclc = kzalloc(sizeof(*pclc), GFP_KERNEL);
if (!pclc)
return -ENOMEM;
pclc_base = &pclc->pclc_base;
pclc_smcd = &pclc->pclc_smcd;
pclc_prfx = &pclc->pclc_prfx;
ipv6_prfx = pclc->pclc_prfx_ipv6;
trl = &pclc->pclc_trl;
/* retrieve ip prefixes for CLC proposal msg */
rc = smc_clc_prfx_set(smc->clcsock, &pclc_prfx, ipv6_prfx);
if (rc)
rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx);
if (rc) {
kfree(pclc);
return SMC_CLC_DECL_CNFERR; /* configuration error */
}
/* send SMC Proposal CLC message */
plen = sizeof(pclc) + sizeof(pclc_prfx) +
(pclc_prfx.ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
sizeof(trl);
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
pclc.hdr.path = smc_type;
plen = sizeof(*pclc_base) + sizeof(*pclc_prfx) +
(pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0])) +
sizeof(*trl);
memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
pclc_base->hdr.type = SMC_CLC_PROPOSAL;
pclc_base->hdr.version = SMC_V1; /* SMC version */
pclc_base->hdr.path = smc_type;
if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
/* add SMC-R specifics */
memcpy(pclc.lcl.id_for_peer, local_systemid,
memcpy(pclc_base->lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&pclc.lcl.gid, ini->ib_gid, SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE);
memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1],
ETH_ALEN);
pclc.iparea_offset = htons(0);
pclc_base->iparea_offset = htons(0);
}
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
/* add SMC-D specifics */
memset(&pclc_smcd, 0, sizeof(pclc_smcd));
plen += sizeof(pclc_smcd);
pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
pclc_smcd.gid = ini->ism_dev->local_gid;
plen += sizeof(*pclc_smcd);
pclc_base->iparea_offset = htons(sizeof(*pclc_smcd));
pclc_smcd->gid = ini->ism_dev->local_gid;
}
pclc.hdr.length = htons(plen);
pclc_base->hdr.length = htons(plen);
memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
i = 0;
vec[i].iov_base = &pclc;
vec[i++].iov_len = sizeof(pclc);
vec[i].iov_base = pclc_base;
vec[i++].iov_len = sizeof(*pclc_base);
if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
vec[i].iov_base = &pclc_smcd;
vec[i++].iov_len = sizeof(pclc_smcd);
vec[i].iov_base = pclc_smcd;
vec[i++].iov_len = sizeof(*pclc_smcd);
}
vec[i].iov_base = &pclc_prfx;
vec[i++].iov_len = sizeof(pclc_prfx);
if (pclc_prfx.ipv6_prefixes_cnt > 0) {
vec[i].iov_base = &ipv6_prfx[0];
vec[i++].iov_len = pclc_prfx.ipv6_prefixes_cnt *
vec[i].iov_base = pclc_prfx;
vec[i++].iov_len = sizeof(*pclc_prfx);
if (pclc_prfx->ipv6_prefixes_cnt > 0) {
vec[i].iov_base = ipv6_prfx;
vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt *
sizeof(ipv6_prfx[0]);
}
vec[i].iov_base = &trl;
vec[i++].iov_len = sizeof(trl);
vec[i].iov_base = trl;
vec[i++].iov_len = sizeof(*trl);
/* due to the few bytes needed for clc-handshake this cannot block */
len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen);
if (len < 0) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
reason_code = -smc->sk.sk_err;
} else if (len < (int)sizeof(pclc)) {
} else if (len < ntohs(pclc_base->hdr.length)) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
}
kfree(pclc);
return reason_code;
}
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc)
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc,
int first_contact)
{
struct smc_connection *conn = &smc->conn;
struct smc_clc_msg_accept_confirm cclc;
struct smc_link *link;
int reason_code = 0;
struct msghdr msg;
struct kvec vec;
int len;
/* send SMC Confirm CLC msg */
memset(&cclc, 0, sizeof(cclc));
cclc.hdr.type = SMC_CLC_CONFIRM;
cclc.hdr.version = SMC_CLC_V1; /* SMC version */
if (smc->conn.lgr->is_smcd) {
clc->hdr.version = SMC_V1; /* SMC version */
if (first_contact)
clc->hdr.flag = 1;
if (conn->lgr->is_smcd) {
/* SMC-D specific settings */
memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
cclc.hdr.path = SMC_TYPE_D;
cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
cclc.gid = conn->lgr->smcd->local_gid;
cclc.token = conn->rmb_desc->token;
cclc.dmbe_size = conn->rmbe_size_short;
cclc.dmbe_idx = 0;
memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
clc->hdr.path = SMC_TYPE_D;
clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
clc->d0.gid = conn->lgr->smcd->local_gid;
clc->d0.token = conn->rmb_desc->token;
clc->d0.dmbe_size = conn->rmbe_size_short;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(clc->d0.smcd_trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
struct smc_link *link = conn->lnk;
/* SMC-R specific settings */
link = conn->lnk;
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(cclc.lcl.id_for_peer, local_systemid,
clc->hdr.path = SMC_TYPE_R;
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(clc->r0.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE);
memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
hton24(clc->r0.qpn, link->roce_qp->qp_num);
clc->r0.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
switch (clc->hdr.type) {
case SMC_CLC_ACCEPT:
clc->r0.qp_mtu = link->path_mtu;
break;
case SMC_CLC_CONFIRM:
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
clc->r0.rmbe_size = conn->rmbe_size_short;
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
hton24(clc->r0.psn, link->psn_initial);
memcpy(clc->r0.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
vec.iov_base = &cclc;
vec.iov_len = ntohs(cclc.hdr.length);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
ntohs(cclc.hdr.length));
vec.iov_base = clc;
vec.iov_len = ntohs(clc->hdr.length);
return kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
ntohs(clc->hdr.length));
}
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc)
{
struct smc_clc_msg_accept_confirm cclc;
int reason_code = 0;
int len;
/* send SMC Confirm CLC msg */
memset(&cclc, 0, sizeof(cclc));
cclc.hdr.type = SMC_CLC_CONFIRM;
len = smc_clc_send_confirm_accept(smc, &cclc, 0);
if (len < ntohs(cclc.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
......@@ -557,65 +589,14 @@ int smc_clc_send_confirm(struct smc_sock *smc)
}
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact)
{
struct smc_connection *conn = &new_smc->conn;
struct smc_clc_msg_accept_confirm aclc;
struct smc_link *link;
struct msghdr msg;
struct kvec vec;
int len;
memset(&aclc, 0, sizeof(aclc));
aclc.hdr.type = SMC_CLC_ACCEPT;
aclc.hdr.version = SMC_CLC_V1; /* SMC version */
if (srv_first_contact)
aclc.hdr.flag = 1;
if (new_smc->conn.lgr->is_smcd) {
/* SMC-D specific settings */
aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
aclc.hdr.path = SMC_TYPE_D;
aclc.gid = conn->lgr->smcd->local_gid;
aclc.token = conn->rmb_desc->token;
aclc.dmbe_size = conn->rmbe_size_short;
aclc.dmbe_idx = 0;
memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
link = conn->lnk;
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
(conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
}
memset(&msg, 0, sizeof(msg));
vec.iov_base = &aclc;
vec.iov_len = ntohs(aclc.hdr.length);
len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
ntohs(aclc.hdr.length));
len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact);
if (len < ntohs(aclc.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
......
......@@ -22,7 +22,6 @@
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
#define SMC_CLC_V1 0x1 /* SMC version */
#define SMC_TYPE_R 0 /* SMC-R only */
#define SMC_TYPE_D 1 /* SMC-D only */
#define SMC_TYPE_N 2 /* neither SMC-R nor SMC-D */
......@@ -38,7 +37,6 @@
#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found (R or D) */
#define SMC_CLC_DECL_NOSMCDDEV 0x03030001 /* no SMC-D device found */
#define SMC_CLC_DECL_NOSMCRDEV 0x03030002 /* no SMC-R device found */
#define SMC_CLC_DECL_SMCDNOTALK 0x03030003 /* SMC-D dev can't talk to peer */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
......@@ -111,55 +109,58 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
__be16 iparea_offset; /* offset to IP address information area */
} __aligned(4);
#define SMC_CLC_PROPOSAL_MAX_OFFSET 0x28
#define SMC_CLC_PROPOSAL_MAX_PREFIX (SMC_CLC_MAX_V6_PREFIX * \
sizeof(struct smc_clc_ipv6_prefix))
#define SMC_CLC_MAX_LEN (sizeof(struct smc_clc_msg_proposal) + \
SMC_CLC_PROPOSAL_MAX_OFFSET + \
sizeof(struct smc_clc_msg_proposal_prefix) + \
SMC_CLC_PROPOSAL_MAX_PREFIX + \
sizeof(struct smc_clc_msg_trail))
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
struct smc_clc_msg_smcd pclc_smcd;
struct smc_clc_msg_proposal_prefix pclc_prfx;
struct smc_clc_ipv6_prefix pclc_prfx_ipv6[SMC_CLC_MAX_V6_PREFIX];
struct smc_clc_msg_trail pclc_trl;
};
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct { /* SMC-R */
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* buf size (compressed) */
qp_mtu : 4; /* QP mtu */
struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token; /* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* buf size (compressed) */
qp_mtu : 4; /* QP mtu */
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 qp_mtu : 4,
rmbe_size : 4;
u8 qp_mtu : 4,
rmbe_size : 4;
#endif
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct { /* SMC-D */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct smcd_clc_msg_accept_confirm { /* SMCD accept/confirm */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 dmbe_size : 4, /* buf size (compressed) */
reserved3 : 4;
u8 dmbe_size : 4, /* buf size (compressed) */
reserved3 : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved3 : 4,
dmbe_size : 4;
u8 reserved3 : 4,
dmbe_size : 4;
#endif
u16 reserved4;
u32 linkid; /* Link identifier */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
} __packed;
u16 reserved4;
u32 linkid; /* Link identifier */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
} __packed;
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
struct smcd_clc_msg_accept_confirm d0; /* SMC-D */
};
} __packed; /* format defined in RFC7609 */
......@@ -200,6 +201,6 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact);
#endif
......@@ -208,12 +208,11 @@ int smc_close_active(struct smc_sock *smc)
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
smc->clcsock->sk->sk_user_data = NULL;
sk->sk_state_change(sk); /* wake up accept */
if (smc->clcsock && smc->clcsock->sk) {
if (smc->clcsock && smc->clcsock->sk)
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
}
smc_close_cleanup_listen(sk);
release_sock(sk);
flush_work(&smc->tcp_listen_work);
......
......@@ -34,7 +34,6 @@
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
......@@ -70,7 +69,7 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
* creation. For client use a somewhat higher removal delay time,
* otherwise there is a risk of out-of-sync link groups.
*/
if (!lgr->freeing && !lgr->freefast) {
if (!lgr->freeing) {
mod_delayed_work(system_wq, &lgr->free_work,
(!lgr->is_smcd && lgr->role == SMC_CLNT) ?
SMC_LGR_FREE_DELAY_CLNT :
......@@ -78,15 +77,6 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
}
}
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
if (!lgr->freeing && !lgr->freefast) {
lgr->freefast = 1;
mod_delayed_work(system_wq, &lgr->free_work,
SMC_LGR_FREE_DELAY_FAST);
}
}
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
......@@ -227,7 +217,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
if (!list_empty(lgr_list))
list_del_init(lgr_list);
spin_unlock_bh(lgr_lock);
smc_lgr_schedule_free_work_fast(lgr);
__smc_lgr_terminate(lgr, true);
}
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
......@@ -396,10 +386,15 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
rc = SMC_CLC_DECL_MEM;
goto ism_put_vlan;
}
lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
SMC_LGR_ID_SIZE, &lgr->id);
if (!lgr->tx_wq) {
rc = -ENOMEM;
goto free_lgr;
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
lgr->terminating = 0;
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
mutex_init(&lgr->sndbufs_lock);
......@@ -418,7 +413,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
if (ini->is_smcd) {
/* SMC-D specific settings */
get_device(&ini->ism_dev->dev);
lgr->peer_gid = ini->ism_gid;
lgr->peer_gid = ini->ism_peer_gid;
lgr->smcd = ini->ism_dev;
lgr_list = &ini->ism_dev->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
......@@ -437,7 +432,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lnk = &lgr->lnk[link_idx];
rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
goto free_wq;
lgr_list = &smc_lgr_list.list;
lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
......@@ -448,6 +443,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
free_wq:
destroy_workqueue(lgr->tx_wq);
free_lgr:
kfree(lgr);
ism_put_vlan:
......@@ -517,7 +514,7 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
smc->sk.sk_state != SMC_CLOSED) {
rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
if (!rc) {
schedule_delayed_work(&conn->tx_work, 0);
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
smc->sk.sk_data_ready(&smc->sk);
}
} else {
......@@ -824,11 +821,10 @@ static void smc_lgr_free(struct smc_link_group *lgr)
}
smc_lgr_free_bufs(lgr);
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
if (!lgr->terminating) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
}
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
......@@ -889,8 +885,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
u32 rsn = lgr->llc_termination_rsn;
......@@ -1296,9 +1290,9 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
ini->cln_first_contact = SMC_FIRST_CONTACT;
ini->first_contact_local = 1;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
if (role == SMC_CLNT && ini->srv_first_contact)
if (role == SMC_CLNT && ini->first_contact_peer)
/* create new link group as well */
goto create;
......@@ -1307,14 +1301,14 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
list_for_each_entry(lgr, lgr_list, list) {
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
smcd_lgr_match(lgr, ini->ism_dev, ini->ism_peer_gid) :
smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == ini->vlan_id &&
(role == SMC_CLNT || ini->is_smcd ||
lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
ini->first_contact_local = 0;
conn->lgr = lgr;
rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
......@@ -1328,8 +1322,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
if (rc)
return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (role == SMC_CLNT && !ini->first_contact_peer &&
ini->first_contact_local) {
/* Server reuses a link group, but Client wants to start
* a new one
* send out_of_sync decline, reason synchr. error
......@@ -1338,7 +1332,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
}
create:
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
if (ini->first_contact_local) {
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
......@@ -1892,8 +1886,8 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
clc->r0.rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
return 0;
......
......@@ -137,9 +137,6 @@ struct smc_link {
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
#define SMC_REUSE_CONTACT 0 /* follow-on contact to a peer*/
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
struct list_head list;
......@@ -228,9 +225,9 @@ struct smc_link_group {
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
struct work_struct terminate_work; /* abnormal lgr termination */
struct workqueue_struct *tx_wq; /* wq for conn. tx workers */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
u8 freefast : 1; /* free worker scheduled fast */
u8 freeing : 1; /* lgr is being freed */
bool is_smcd; /* SMC-R or SMC-D */
......@@ -294,9 +291,9 @@ struct smc_clc_msg_local;
struct smc_init_info {
u8 is_smcd;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
int srv_first_contact;
int cln_first_contact;
/* SMC-R */
struct smc_clc_msg_local *ib_lcl;
struct smc_ib_device *ib_dev;
......@@ -304,7 +301,7 @@ struct smc_init_info {
u8 ib_port;
u32 ib_clcqpn;
/* SMC-D */
u64 ism_gid;
u64 ism_peer_gid;
struct smcd_dev *ism_dev;
};
......
......@@ -22,6 +22,15 @@
#include "smc.h"
#include "smc_core.h"
struct smc_diag_dump_ctx {
int pos[2];
};
static struct smc_diag_dump_ctx *smc_dump_context(struct netlink_callback *cb)
{
return (struct smc_diag_dump_ctx *)cb->ctx;
}
static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
{
sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
......@@ -193,13 +202,15 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
struct netlink_callback *cb)
struct netlink_callback *cb, int p_type)
{
struct smc_diag_dump_ctx *cb_ctx = smc_dump_context(cb);
struct net *net = sock_net(skb->sk);
int snum = cb_ctx->pos[p_type];
struct nlattr *bc = NULL;
struct hlist_head *head;
int rc = 0, num = 0;
struct sock *sk;
int rc = 0;
read_lock(&prot->h.smc_hash->lock);
head = &prot->h.smc_hash->ht;
......@@ -209,13 +220,18 @@ static int smc_diag_dump_proto(struct proto *prot, struct sk_buff *skb,
sk_for_each(sk, head) {
if (!net_eq(sock_net(sk), net))
continue;
if (num < snum)
goto next;
rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc);
if (rc)
break;
if (rc < 0)
goto out;
next:
num++;
}
out:
read_unlock(&prot->h.smc_hash->lock);
cb_ctx->pos[p_type] = num;
return rc;
}
......@@ -223,10 +239,10 @@ static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int rc = 0;
rc = smc_diag_dump_proto(&smc_proto, skb, cb);
rc = smc_diag_dump_proto(&smc_proto, skb, cb, SMCPROTO_SMC);
if (!rc)
rc = smc_diag_dump_proto(&smc_proto6, skb, cb);
return rc;
smc_diag_dump_proto(&smc_proto6, skb, cb, SMCPROTO_SMC6);
return skb->len;
}
static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
......
......@@ -1691,7 +1691,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
list_add_tail(&qentry->list, &lgr->llc_event_q);
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
schedule_work(&lgr->llc_event_work);
queue_work(system_highpri_wq, &lgr->llc_event_work);
}
/* copy received msg and add it to the event queue */
......
......@@ -928,7 +928,10 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away) {
!ismdev->going_away &&
(!ini->ism_peer_gid ||
!smc_ism_cantalk(ini->ism_peer_gid, ini->vlan_id,
ismdev))) {
ini->ism_dev = ismdev;
break;
}
......
......@@ -228,8 +228,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
/* for a corked socket defer the RDMA writes if there
* is still sufficient sndbuf_space available
*/
schedule_delayed_work(&conn->tx_work,
SMC_TX_CORK_DELAY);
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_CORK_DELAY);
else
smc_tx_sndbuf_nonempty(conn);
} /* while (msg_data_left(msg)) */
......@@ -499,7 +499,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (conn->killed)
return -EPIPE;
rc = 0;
mod_delayed_work(system_wq, &conn->tx_work,
mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_WORK_DELAY);
}
return rc;
......@@ -623,8 +623,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
return;
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
!conn->killed) {
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
SMC_TX_WORK_DELAY);
return;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment