Commit dbec982c authored by David S. Miller's avatar David S. Miller

Merge branch 'net-smc-cleanups'

Ursula Braun says:

====================
net/smc: cleanups 2018-05-18

here are SMC patches for net-next providing restructuring and cleanup
in different areas.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d6830519 3b2dec26
...@@ -46,11 +46,6 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group ...@@ -46,11 +46,6 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation * creation
*/ */
struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
.list = LIST_HEAD_INIT(smc_lgr_list.list),
};
static void smc_tcp_listen_work(struct work_struct *); static void smc_tcp_listen_work(struct work_struct *);
static void smc_set_keepalive(struct sock *sk, int val) static void smc_set_keepalive(struct sock *sk, int val)
...@@ -382,10 +377,13 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) ...@@ -382,10 +377,13 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
static void smc_conn_save_peer_info(struct smc_sock *smc, static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc) struct smc_clc_msg_accept_confirm *clc)
{ {
smc->conn.peer_conn_idx = clc->conn_idx; int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
smc->conn.peer_rmbe_idx = clc->rmbe_idx;
smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); smc->conn.peer_rmbe_size = bufsize;
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
} }
static void smc_link_save_peer_info(struct smc_link *link, static void smc_link_save_peer_info(struct smc_link *link,
...@@ -398,165 +396,186 @@ static void smc_link_save_peer_info(struct smc_link *link, ...@@ -398,165 +396,186 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu; link->peer_mtu = clc->qp_mtu;
} }
/* setup for RDMA connection of client */ /* fall back during connect */
static int smc_connect_rdma(struct smc_sock *smc) static int smc_connect_fallback(struct smc_sock *smc)
{ {
struct smc_clc_msg_accept_confirm aclc; smc->use_fallback = true;
int local_contact = SMC_FIRST_CONTACT; smc_copy_sock_settings_to_clc(smc);
struct smc_ib_device *smcibdev; if (smc->sk.sk_state == SMC_INIT)
struct smc_link *link; smc->sk.sk_state = SMC_ACTIVE;
u8 srv_first_contact; return 0;
int reason_code = 0; }
int rc = 0;
u8 ibport;
sock_hold(&smc->sk); /* sock put in passive closing */
if (smc->use_fallback) /* decline and fall back during connect */
goto out_connected; static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
{
int rc;
if (!tcp_sk(smc->clcsock->sk)->syn_smc) { if (reason_code < 0) /* error, fallback is not possible */
/* peer has not signalled SMC-capability */ return reason_code;
smc->use_fallback = true; if (reason_code != SMC_CLC_DECL_REPLY) {
goto out_connected; rc = smc_clc_send_decline(smc, reason_code);
if (rc < 0)
return rc;
} }
return smc_connect_fallback(smc);
}
/* IPSec connections opt out of SMC-R optimizations */ /* abort connecting */
if (using_ipsec(smc)) { static int smc_connect_abort(struct smc_sock *smc, int reason_code,
reason_code = SMC_CLC_DECL_IPSEC; int local_contact)
goto decline_rdma; {
} if (local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
return reason_code;
}
/* check if there is a rdma device available for this connection. */
/* called for connect and listen */
static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
u8 *ibport)
{
int reason_code = 0;
/* PNET table look up: search active ib_device and port /* PNET table look up: search active ib_device and port
* within same PNETID that also contains the ethernet device * within same PNETID that also contains the ethernet device
* used for the internal TCP socket * used for the internal TCP socket
*/ */
smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
if (!smcibdev) { if (!(*ibdev))
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
} return reason_code;
}
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_ib_device *ibdev, u8 ibport)
{
int rc = 0;
/* do inband token exchange */ /* do inband token exchange */
reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); rc = smc_clc_send_proposal(smc, ibdev, ibport);
if (reason_code < 0) { if (rc)
rc = reason_code; return rc;
goto out_err;
}
if (reason_code > 0) /* configuration error */
goto decline_rdma;
/* receive SMC Accept CLC message */ /* receive SMC Accept CLC message */
reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
SMC_CLC_ACCEPT); }
if (reason_code < 0) {
rc = reason_code; /* setup for RDMA connection of client */
goto out_err; static int smc_connect_rdma(struct smc_sock *smc,
} struct smc_clc_msg_accept_confirm *aclc,
if (reason_code > 0) struct smc_ib_device *ibdev, u8 ibport)
goto decline_rdma; {
int local_contact = SMC_FIRST_CONTACT;
struct smc_link *link;
int reason_code = 0;
srv_first_contact = aclc.hdr.flag;
mutex_lock(&smc_create_lgr_pending); mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl, local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
srv_first_contact); aclc->hdr.flag);
if (local_contact < 0) { if (local_contact < 0) {
rc = local_contact; if (local_contact == -ENOMEM)
if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
else if (rc == -ENOLINK) else if (local_contact == -ENOLINK)
reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
else else
reason_code = SMC_CLC_DECL_INTERR; /* other error */ reason_code = SMC_CLC_DECL_INTERR; /* other error */
goto decline_rdma_unlock; return smc_connect_abort(smc, reason_code, 0);
} }
link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
smc_conn_save_peer_info(smc, &aclc); smc_conn_save_peer_info(smc, aclc);
/* create send buffer and rmb */ /* create send buffer and rmb */
rc = smc_buf_create(smc); if (smc_buf_create(smc))
if (rc) { return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
reason_code = SMC_CLC_DECL_MEM;
goto decline_rdma_unlock;
}
if (local_contact == SMC_FIRST_CONTACT) if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &aclc); smc_link_save_peer_info(link, aclc);
rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); if (smc_rmb_rtoken_handling(&smc->conn, aclc))
if (rc) { return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
reason_code = SMC_CLC_DECL_INTERR; local_contact);
goto decline_rdma_unlock;
}
smc_close_init(smc); smc_close_init(smc);
smc_rx_init(smc); smc_rx_init(smc);
if (local_contact == SMC_FIRST_CONTACT) { if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link); if (smc_ib_ready_link(link))
if (rc) { return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
reason_code = SMC_CLC_DECL_INTERR; local_contact);
goto decline_rdma_unlock;
}
} else { } else {
if (!smc->conn.rmb_desc->reused) { if (!smc->conn.rmb_desc->reused &&
if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) { smc_reg_rmb(link, smc->conn.rmb_desc, true))
reason_code = SMC_CLC_DECL_INTERR; return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
goto decline_rdma_unlock; local_contact);
}
}
} }
smc_rmb_sync_sg_for_device(&smc->conn); smc_rmb_sync_sg_for_device(&smc->conn);
rc = smc_clc_send_confirm(smc); reason_code = smc_clc_send_confirm(smc);
if (rc) if (reason_code)
goto out_err_unlock; return smc_connect_abort(smc, reason_code, local_contact);
smc_tx_init(smc);
if (local_contact == SMC_FIRST_CONTACT) { if (local_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */ /* QP confirmation over RoCE fabric */
reason_code = smc_clnt_conf_first_link(smc); reason_code = smc_clnt_conf_first_link(smc);
if (reason_code < 0) { if (reason_code)
rc = reason_code; return smc_connect_abort(smc, reason_code,
goto out_err_unlock; local_contact);
}
if (reason_code > 0)
goto decline_rdma_unlock;
} }
mutex_unlock(&smc_create_lgr_pending); mutex_unlock(&smc_create_lgr_pending);
smc_tx_init(smc);
out_connected:
smc_copy_sock_settings_to_clc(smc); smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT) if (smc->sk.sk_state == SMC_INIT)
smc->sk.sk_state = SMC_ACTIVE; smc->sk.sk_state = SMC_ACTIVE;
return rc ? rc : local_contact; return 0;
}
/* perform steps before actually connecting */
static int __smc_connect(struct smc_sock *smc)
{
struct smc_clc_msg_accept_confirm aclc;
struct smc_ib_device *ibdev;
int rc = 0;
u8 ibport;
decline_rdma_unlock: sock_hold(&smc->sk); /* sock put in passive closing */
if (local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
rc = smc_clc_send_decline(smc, reason_code);
if (rc < 0)
goto out_err;
}
goto out_connected;
out_err_unlock: if (smc->use_fallback)
if (local_contact == SMC_FIRST_CONTACT) return smc_connect_fallback(smc);
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending); /* if peer has not signalled SMC-capability, fall back */
smc_conn_free(&smc->conn); if (!tcp_sk(smc->clcsock->sk)->syn_smc)
out_err: return smc_connect_fallback(smc);
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */ /* IPSec connections opt out of SMC-R optimizations */
return rc; if (using_ipsec(smc))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
/* check if a RDMA device is available; if not, fall back */
if (smc_check_rdma(smc, &ibdev, &ibport))
return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
/* perform CLC handshake */
rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
if (rc)
return smc_connect_decline_fallback(smc, rc);
/* connect using rdma */
rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
if (rc)
return smc_connect_decline_fallback(smc, rc);
return 0;
} }
static int smc_connect(struct socket *sock, struct sockaddr *addr, static int smc_connect(struct socket *sock, struct sockaddr *addr,
...@@ -592,8 +611,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, ...@@ -592,8 +611,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
if (rc) if (rc)
goto out; goto out;
/* setup RDMA connection */ rc = __smc_connect(smc);
rc = smc_connect_rdma(smc);
if (rc < 0) if (rc < 0)
goto out; goto out;
else else
...@@ -791,182 +809,239 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) ...@@ -791,182 +809,239 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return 0; return 0;
} }
/* setup for RDMA connection of server */ /* listen worker: finish */
static void smc_listen_work(struct work_struct *work) static void smc_listen_out(struct smc_sock *new_smc)
{ {
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
struct smc_sock *lsmc = new_smc->listen_smc; struct smc_sock *lsmc = new_smc->listen_smc;
struct smc_clc_msg_accept_confirm cclc;
int local_contact = SMC_REUSE_CONTACT;
struct sock *newsmcsk = &new_smc->sk; struct sock *newsmcsk = &new_smc->sk;
struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
int rc = 0;
u8 ibport;
if (new_smc->use_fallback)
goto out_connected;
/* check if peer is smc capable */ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (!tcp_sk(newclcsock->sk)->syn_smc) { if (lsmc->sk.sk_state == SMC_LISTEN) {
new_smc->use_fallback = true; smc_accept_enqueue(&lsmc->sk, newsmcsk);
goto out_connected; } else { /* no longer listening */
smc_close_non_accepted(newsmcsk);
} }
release_sock(&lsmc->sk);
/* do inband token exchange - /* Wake up accept */
*wait for and receive SMC Proposal CLC message lsmc->sk.sk_data_ready(&lsmc->sk);
*/ sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), }
SMC_CLC_PROPOSAL);
if (reason_code < 0)
goto out_err;
if (reason_code > 0)
goto decline_rdma;
/* IPSec connections opt out of SMC-R optimizations */ /* listen worker: finish in state connected */
if (using_ipsec(new_smc)) { static void smc_listen_out_connected(struct smc_sock *new_smc)
reason_code = SMC_CLC_DECL_IPSEC; {
goto decline_rdma; struct sock *newsmcsk = &new_smc->sk;
}
/* PNET table look up: search active ib_device and port sk_refcnt_debug_inc(newsmcsk);
* within same PNETID that also contains the ethernet device if (newsmcsk->sk_state == SMC_INIT)
* used for the internal TCP socket newsmcsk->sk_state = SMC_ACTIVE;
*/
smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); smc_listen_out(new_smc);
if (!smcibdev) { }
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma; /* listen worker: finish in error state */
static void smc_listen_out_err(struct smc_sock *new_smc)
{
struct sock *newsmcsk = &new_smc->sk;
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
smc_listen_out(new_smc);
}
/* listen worker: decline and fall back if possible */
static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
int local_contact)
{
/* RDMA setup failed, switch back to TCP */
if (local_contact == SMC_FIRST_CONTACT)
smc_lgr_forget(new_smc->conn.lgr);
if (reason_code < 0) { /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
if (smc_clc_send_decline(new_smc, reason_code) < 0) {
smc_listen_out_err(new_smc);
return;
}
} }
smc_listen_out_connected(new_smc);
}
/* listen worker: check prefixes */
static int smc_listen_rdma_check(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc)
{
struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
pclc = (struct smc_clc_msg_proposal *)&buf;
pclc_prfx = smc_clc_proposal_get_prefix(pclc); pclc_prfx = smc_clc_proposal_get_prefix(pclc);
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
return SMC_CLC_DECL_CNFERR;
rc = smc_clc_prfx_match(newclcsock, pclc_prfx); return 0;
if (rc) { }
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
/* listen worker: initialize connection and buffers */
static int smc_listen_rdma_init(struct smc_sock *new_smc,
struct smc_clc_msg_proposal *pclc,
struct smc_ib_device *ibdev, u8 ibport,
int *local_contact)
{
/* allocate connection / link group */ /* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending); *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl, if (*local_contact < 0) {
0); if (*local_contact == -ENOMEM)
if (local_contact < 0) { return SMC_CLC_DECL_MEM;/* insufficient memory*/
rc = local_contact; return SMC_CLC_DECL_INTERR; /* other error */
if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
goto decline_rdma_unlock;
} }
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
/* create send buffer and rmb */ /* create send buffer and rmb */
rc = smc_buf_create(new_smc); if (smc_buf_create(new_smc))
if (rc) { return SMC_CLC_DECL_MEM;
reason_code = SMC_CLC_DECL_MEM;
goto decline_rdma_unlock;
}
smc_close_init(new_smc); return 0;
smc_rx_init(new_smc); }
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
if (local_contact != SMC_FIRST_CONTACT) { if (local_contact != SMC_FIRST_CONTACT) {
if (!new_smc->conn.rmb_desc->reused) { if (!new_smc->conn.rmb_desc->reused) {
if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) { if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
reason_code = SMC_CLC_DECL_INTERR; return SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock;
}
} }
} }
smc_rmb_sync_sg_for_device(&new_smc->conn); smc_rmb_sync_sg_for_device(&new_smc->conn);
rc = smc_clc_send_accept(new_smc, local_contact); return 0;
if (rc) }
goto out_err_unlock;
/* listen worker: finish RDMA setup */
static void smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
{
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
int reason_code = 0;
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM);
if (reason_code < 0)
goto out_err_unlock;
if (reason_code > 0)
goto decline_rdma_unlock;
smc_conn_save_peer_info(new_smc, &cclc);
if (local_contact == SMC_FIRST_CONTACT) if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &cclc); smc_link_save_peer_info(link, cclc);
rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
if (rc) {
reason_code = SMC_CLC_DECL_INTERR; reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock; goto decline;
} }
if (local_contact == SMC_FIRST_CONTACT) { if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link); if (smc_ib_ready_link(link)) {
if (rc) {
reason_code = SMC_CLC_DECL_INTERR; reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma_unlock; goto decline;
} }
/* QP confirmation over RoCE fabric */ /* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc); reason_code = smc_serv_conf_first_link(new_smc);
if (reason_code < 0) if (reason_code)
/* peer is not aware of a problem */ goto decline;
goto out_err_unlock;
if (reason_code > 0)
goto decline_rdma_unlock;
} }
return;
smc_tx_init(new_smc); decline:
mutex_unlock(&smc_create_lgr_pending); mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, reason_code, local_contact);
}
out_connected: /* setup for RDMA connection of server */
sk_refcnt_debug_inc(newsmcsk); static void smc_listen_work(struct work_struct *work)
if (newsmcsk->sk_state == SMC_INIT) {
newsmcsk->sk_state = SMC_ACTIVE; struct smc_sock *new_smc = container_of(work, struct smc_sock,
enqueue: smc_listen_work);
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); struct socket *newclcsock = new_smc->clcsock;
if (lsmc->sk.sk_state == SMC_LISTEN) { struct smc_clc_msg_accept_confirm cclc;
smc_accept_enqueue(&lsmc->sk, newsmcsk); struct smc_clc_msg_proposal *pclc;
} else { /* no longer listening */ struct smc_ib_device *ibdev;
smc_close_non_accepted(newsmcsk); u8 buf[SMC_CLC_MAX_LEN];
int local_contact = 0;
int reason_code = 0;
int rc = 0;
u8 ibport;
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
return;
} }
release_sock(&lsmc->sk);
/* Wake up accept */ /* check if peer is smc capable */
lsmc->sk.sk_data_ready(&lsmc->sk); if (!tcp_sk(newclcsock->sk)->syn_smc) {
sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ new_smc->use_fallback = true;
return; smc_listen_out_connected(new_smc);
return;
}
decline_rdma_unlock: /* do inband token exchange -
if (local_contact == SMC_FIRST_CONTACT) * wait for and receive SMC Proposal CLC message
smc_lgr_forget(new_smc->conn.lgr); */
mutex_unlock(&smc_create_lgr_pending); pclc = (struct smc_clc_msg_proposal *)&buf;
decline_rdma: reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
/* RDMA setup failed, switch back to TCP */ SMC_CLC_PROPOSAL);
smc_conn_free(&new_smc->conn); if (reason_code) {
new_smc->use_fallback = true; smc_listen_decline(new_smc, reason_code, 0);
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { return;
if (smc_clc_send_decline(new_smc, reason_code) < 0)
goto out_err;
} }
goto out_connected;
out_err_unlock: /* IPSec connections opt out of SMC-R optimizations */
if (local_contact == SMC_FIRST_CONTACT) if (using_ipsec(new_smc)) {
smc_lgr_forget(new_smc->conn.lgr); smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
return;
}
mutex_lock(&smc_create_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
/* check if RDMA is available */
if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
smc_listen_rdma_check(new_smc, pclc) ||
smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
&local_contact) ||
smc_listen_rdma_reg(new_smc, local_contact)) {
/* SMC not supported, decline */
mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
return;
}
/* send SMC Accept CLC message */
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc) {
mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, rc, local_contact);
return;
}
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM);
if (reason_code) {
mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, reason_code, local_contact);
return;
}
/* finish worker */
smc_listen_rdma_finish(new_smc, &cclc, local_contact);
smc_conn_save_peer_info(new_smc, &cclc);
mutex_unlock(&smc_create_lgr_pending); mutex_unlock(&smc_create_lgr_pending);
out_err: smc_listen_out_connected(new_smc);
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
} }
static void smc_tcp_listen_work(struct work_struct *work) static void smc_tcp_listen_work(struct work_struct *work)
...@@ -1227,7 +1302,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, ...@@ -1227,7 +1302,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
if (sk->sk_state == SMC_INIT && if (sk->sk_state == SMC_INIT &&
mask & EPOLLOUT && mask & EPOLLOUT &&
smc->clcsock->sk->sk_state != TCP_CLOSE) { smc->clcsock->sk->sk_state != TCP_CLOSE) {
rc = smc_connect_rdma(smc); rc = __smc_connect(smc);
if (rc < 0) if (rc < 0)
mask |= EPOLLERR; mask |= EPOLLERR;
/* success cases including fallback */ /* success cases including fallback */
...@@ -1421,7 +1496,7 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd, ...@@ -1421,7 +1496,7 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
/* output queue size (not send + not acked) */ /* output queue size (not send + not acked) */
if (smc->sk.sk_state == SMC_LISTEN) if (smc->sk.sk_state == SMC_LISTEN)
return -EINVAL; return -EINVAL;
answ = smc->conn.sndbuf_size - answ = smc->conn.sndbuf_desc->len -
atomic_read(&smc->conn.sndbuf_space); atomic_read(&smc->conn.sndbuf_space);
break; break;
case SIOCOUTQNSD: case SIOCOUTQNSD:
...@@ -1637,19 +1712,7 @@ static int __init smc_init(void) ...@@ -1637,19 +1712,7 @@ static int __init smc_init(void)
static void __exit smc_exit(void) static void __exit smc_exit(void)
{ {
struct smc_link_group *lgr, *lg; smc_core_exit();
LIST_HEAD(lgr_freeing_list);
spin_lock_bh(&smc_lgr_list.lock);
if (!list_empty(&smc_lgr_list.list))
list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
static_branch_disable(&tcp_have_smc); static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client(); smc_ib_unregister_client();
sock_unregister(PF_SMC); sock_unregister(PF_SMC);
......
...@@ -118,7 +118,7 @@ struct smc_connection { ...@@ -118,7 +118,7 @@ struct smc_connection {
struct rb_node alert_node; struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */ struct smc_link_group *lgr; /* link group of connection */
u32 alert_token_local; /* unique conn. id */ u32 alert_token_local; /* unique conn. id */
u8 peer_conn_idx; /* from tcp handshake */ u8 peer_rmbe_idx; /* from tcp handshake */
int peer_rmbe_size; /* size of peer rx buffer */ int peer_rmbe_size; /* size of peer rx buffer */
atomic_t peer_rmbe_space;/* remaining free bytes in peer atomic_t peer_rmbe_space;/* remaining free bytes in peer
* rmbe * rmbe
...@@ -126,9 +126,7 @@ struct smc_connection { ...@@ -126,9 +126,7 @@ struct smc_connection {
int rtoken_idx; /* idx to peer RMB rkey/addr */ int rtoken_idx; /* idx to peer RMB rkey/addr */
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
int sndbuf_size; /* sndbuf size <== sock wmem */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
int rmbe_size; /* RMBE size <== sock rmem */
int rmbe_size_short;/* compressed notation */ int rmbe_size_short;/* compressed notation */
int rmbe_update_limit; int rmbe_update_limit;
/* lower limit for consumer /* lower limit for consumer
...@@ -153,6 +151,7 @@ struct smc_connection { ...@@ -153,6 +151,7 @@ struct smc_connection {
u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq; /* sequence # for CDC send */
spinlock_t send_lock; /* protect wr_sends */ spinlock_t send_lock; /* protect wr_sends */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
* .prod cf. TCP rcv_nxt * .prod cf. TCP rcv_nxt
...@@ -221,41 +220,6 @@ static inline u32 ntoh24(u8 *net) ...@@ -221,41 +220,6 @@ static inline u32 ntoh24(u8 *net)
return be32_to_cpu(t); return be32_to_cpu(t);
} }
#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
#define SMC_RMBE_SIZES 16 /* number of distinct sizes for an RMBE */
/* theoretically, the RFC states that largest size would be 512K,
* i.e. compressed 5 and thus 6 sizes (0..5), despite
* struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
*/
/* convert the RMB size into the compressed notation - minimum 16K.
* In contrast to plain ilog2, this rounds towards the next power of 2,
* so the socket application gets at least its desired sndbuf / rcvbuf size.
*/
static inline u8 smc_compress_bufsize(int size)
{
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
return 0;
size = (size - 1) >> 14;
compressed = ilog2(size) + 1;
if (compressed >= SMC_RMBE_SIZES)
compressed = SMC_RMBE_SIZES - 1;
return compressed;
}
/* convert the RMB size from compressed notation into integer */
static inline int smc_uncompress_bufsize(u8 compressed)
{
u32 size;
size = 0x00000001 << (((int)compressed) + 14);
return (int)size;
}
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
static inline bool using_ipsec(struct smc_sock *smc) static inline bool using_ipsec(struct smc_sock *smc)
{ {
...@@ -269,12 +233,6 @@ static inline bool using_ipsec(struct smc_sock *smc) ...@@ -269,12 +233,6 @@ static inline bool using_ipsec(struct smc_sock *smc)
} }
#endif #endif
struct smc_clc_msg_local;
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk); void smc_close_non_accepted(struct sock *sk);
......
...@@ -44,13 +44,13 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, ...@@ -44,13 +44,13 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
smc = container_of(cdcpend->conn, struct smc_sock, conn); smc = container_of(cdcpend->conn, struct smc_sock, conn);
bh_lock_sock(&smc->sk); bh_lock_sock(&smc->sk);
if (!wc_status) { if (!wc_status) {
diff = smc_curs_diff(cdcpend->conn->sndbuf_size, diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
&cdcpend->conn->tx_curs_fin, &cdcpend->conn->tx_curs_fin,
&cdcpend->cursor); &cdcpend->cursor);
/* sndbuf_space is decreased in smc_sendmsg */ /* sndbuf_space is decreased in smc_sendmsg */
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_add(diff, &cdcpend->conn->sndbuf_space); atomic_add(diff, &cdcpend->conn->sndbuf_space);
/* guarantee 0 <= sndbuf_space <= sndbuf_size */ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic(); smp_mb__after_atomic();
smc_curs_write(&cdcpend->conn->tx_curs_fin, smc_curs_write(&cdcpend->conn->tx_curs_fin,
smc_curs_read(&cdcpend->cursor, cdcpend->conn), smc_curs_read(&cdcpend->cursor, cdcpend->conn),
...@@ -165,19 +165,12 @@ static inline bool smc_cdc_before(u16 seq1, u16 seq2) ...@@ -165,19 +165,12 @@ static inline bool smc_cdc_before(u16 seq1, u16 seq2)
} }
static void smc_cdc_msg_recv_action(struct smc_sock *smc, static void smc_cdc_msg_recv_action(struct smc_sock *smc,
struct smc_link *link,
struct smc_cdc_msg *cdc) struct smc_cdc_msg *cdc)
{ {
union smc_host_cursor cons_old, prod_old; union smc_host_cursor cons_old, prod_old;
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
int diff_cons, diff_prod; int diff_cons, diff_prod;
if (!cdc->prod_flags.failover_validation) {
if (smc_cdc_before(ntohs(cdc->seqno),
conn->local_rx_ctrl.seqno))
/* received seqno is old */
return;
}
smc_curs_write(&prod_old, smc_curs_write(&prod_old,
smc_curs_read(&conn->local_rx_ctrl.prod, conn), smc_curs_read(&conn->local_rx_ctrl.prod, conn),
conn); conn);
...@@ -198,13 +191,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, ...@@ -198,13 +191,13 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smp_mb__after_atomic(); smp_mb__after_atomic();
} }
diff_prod = smc_curs_diff(conn->rmbe_size, &prod_old, diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
&conn->local_rx_ctrl.prod); &conn->local_rx_ctrl.prod);
if (diff_prod) { if (diff_prod) {
/* bytes_to_rcv is decreased in smc_recvmsg */ /* bytes_to_rcv is decreased in smc_recvmsg */
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_add(diff_prod, &conn->bytes_to_rcv); atomic_add(diff_prod, &conn->bytes_to_rcv);
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */ /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
smp_mb__after_atomic(); smp_mb__after_atomic();
smc->sk.sk_data_ready(&smc->sk); smc->sk.sk_data_ready(&smc->sk);
} else if ((conn->local_rx_ctrl.prod_flags.write_blocked) || } else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
...@@ -236,26 +229,11 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, ...@@ -236,26 +229,11 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
} }
/* called under tasklet context */ /* called under tasklet context */
static inline void smc_cdc_msg_recv(struct smc_cdc_msg *cdc, static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
struct smc_link *link, u64 wr_id)
{ {
struct smc_link_group *lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
struct smc_connection *connection;
struct smc_sock *smc;
/* lookup connection */
read_lock_bh(&lgr->conns_lock);
connection = smc_lgr_find_conn(ntohl(cdc->token), lgr);
if (!connection) {
read_unlock_bh(&lgr->conns_lock);
return;
}
smc = container_of(connection, struct smc_sock, conn);
sock_hold(&smc->sk); sock_hold(&smc->sk);
read_unlock_bh(&lgr->conns_lock);
bh_lock_sock(&smc->sk); bh_lock_sock(&smc->sk);
smc_cdc_msg_recv_action(smc, link, cdc); smc_cdc_msg_recv_action(smc, cdc);
bh_unlock_sock(&smc->sk); bh_unlock_sock(&smc->sk);
sock_put(&smc->sk); /* no free sk in softirq-context */ sock_put(&smc->sk); /* no free sk in softirq-context */
} }
...@@ -266,12 +244,31 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) ...@@ -266,12 +244,31 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
{ {
struct smc_link *link = (struct smc_link *)wc->qp->qp_context; struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
struct smc_cdc_msg *cdc = buf; struct smc_cdc_msg *cdc = buf;
struct smc_connection *conn;
struct smc_link_group *lgr;
struct smc_sock *smc;
if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved)) if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
return; /* short message */ return; /* short message */
if (cdc->len != SMC_WR_TX_SIZE) if (cdc->len != SMC_WR_TX_SIZE)
return; /* invalid message */ return; /* invalid message */
smc_cdc_msg_recv(cdc, link, wc->wr_id);
/* lookup connection */
lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
read_lock_bh(&lgr->conns_lock);
conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
read_unlock_bh(&lgr->conns_lock);
if (!conn)
return;
smc = container_of(conn, struct smc_sock, conn);
if (!cdc->prod_flags.failover_validation) {
if (smc_cdc_before(ntohs(cdc->seqno),
conn->local_rx_ctrl.seqno))
/* received seqno is old */
return;
}
smc_cdc_msg_recv(smc, cdc);
} }
static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = { static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = {
......
...@@ -442,7 +442,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) ...@@ -442,7 +442,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
hton24(cclc.qpn, link->roce_qp->qp_num); hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey = cclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short; cclc.rmbe_size = conn->rmbe_size_short;
...@@ -494,7 +494,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) ...@@ -494,7 +494,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
hton24(aclc.qpn, link->roce_qp->qp_num); hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey = aclc.rmb_rkey =
htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu; aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short, aclc.rmbe_size = conn->rmbe_size_short,
......
...@@ -97,7 +97,7 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ ...@@ -97,7 +97,7 @@ struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_local lcl; struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */ u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */ __be32 rmb_rkey; /* RMB rkey */
u8 conn_idx; /* Connection index, which RMBE in RMB */ u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */ __be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD) #if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */ u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */
......
...@@ -30,10 +30,14 @@ ...@@ -30,10 +30,14 @@
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ) #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10)
static u32 smc_lgr_num; /* unique link group number */ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
.list = LIST_HEAD_INIT(smc_lgr_list.list),
.num = 0,
};
static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
bool is_rmb); struct smc_buf_desc *buf_desc);
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{ {
...@@ -181,8 +185,8 @@ static int smc_lgr_create(struct smc_sock *smc, ...@@ -181,8 +185,8 @@ static int smc_lgr_create(struct smc_sock *smc,
INIT_LIST_HEAD(&lgr->sndbufs[i]); INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]); INIT_LIST_HEAD(&lgr->rmbs[i]);
} }
smc_lgr_num += SMC_LGR_NUM_INCR; smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_num, SMC_LGR_ID_SIZE); memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT; lgr->conns_all = RB_ROOT;
...@@ -236,26 +240,21 @@ static int smc_lgr_create(struct smc_sock *smc, ...@@ -236,26 +240,21 @@ static int smc_lgr_create(struct smc_sock *smc,
static void smc_buf_unuse(struct smc_connection *conn) static void smc_buf_unuse(struct smc_connection *conn)
{ {
if (conn->sndbuf_desc) { if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0; conn->sndbuf_desc->used = 0;
conn->sndbuf_size = 0;
}
if (conn->rmb_desc) { if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) { if (!conn->rmb_desc->regerr) {
conn->rmb_desc->reused = 1; conn->rmb_desc->reused = 1;
conn->rmb_desc->used = 0; conn->rmb_desc->used = 0;
conn->rmbe_size = 0;
} else { } else {
/* buf registration failed, reuse not possible */ /* buf registration failed, reuse not possible */
struct smc_link_group *lgr = conn->lgr; struct smc_link_group *lgr = conn->lgr;
struct smc_link *lnk;
write_lock_bh(&lgr->rmbs_lock); write_lock_bh(&lgr->rmbs_lock);
list_del(&conn->rmb_desc->list); list_del(&conn->rmb_desc->list);
write_unlock_bh(&lgr->rmbs_lock); write_unlock_bh(&lgr->rmbs_lock);
lnk = &lgr->lnk[SMC_SINGLE_LINK]; smc_buf_free(lgr, true, conn->rmb_desc);
smc_buf_free(conn->rmb_desc, lnk, true);
} }
} }
} }
...@@ -281,9 +280,11 @@ static void smc_link_clear(struct smc_link *lnk) ...@@ -281,9 +280,11 @@ static void smc_link_clear(struct smc_link *lnk)
smc_wr_free_link_mem(lnk); smc_wr_free_link_mem(lnk);
} }
static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
bool is_rmb) struct smc_buf_desc *buf_desc)
{ {
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
if (is_rmb) { if (is_rmb) {
if (buf_desc->mr_rx[SMC_SINGLE_LINK]) if (buf_desc->mr_rx[SMC_SINGLE_LINK])
smc_ib_put_memory_region( smc_ib_put_memory_region(
...@@ -302,7 +303,6 @@ static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, ...@@ -302,7 +303,6 @@ static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
{ {
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
struct smc_buf_desc *buf_desc, *bf_desc; struct smc_buf_desc *buf_desc, *bf_desc;
struct list_head *buf_list; struct list_head *buf_list;
int i; int i;
...@@ -315,7 +315,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) ...@@ -315,7 +315,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) { list) {
list_del(&buf_desc->list); list_del(&buf_desc->list);
smc_buf_free(buf_desc, lnk, is_rmb); smc_buf_free(lgr, is_rmb, buf_desc);
} }
} }
} }
...@@ -377,6 +377,18 @@ void smc_lgr_terminate(struct smc_link_group *lgr) ...@@ -377,6 +377,18 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
smc_lgr_schedule_free_work(lgr); smc_lgr_schedule_free_work(lgr);
} }
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
smc_lgr_terminate(lgr);
}
}
/* Determine vlan of internal TCP socket. /* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into * @vlan_id: address to store the determined vlan id into
*/ */
...@@ -461,10 +473,10 @@ int smc_conn_create(struct smc_sock *smc, ...@@ -461,10 +473,10 @@ int smc_conn_create(struct smc_sock *smc,
struct smc_clc_msg_local *lcl, int srv_first_contact) struct smc_clc_msg_local *lcl, int srv_first_contact)
{ {
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
int local_contact = SMC_FIRST_CONTACT;
struct smc_link_group *lgr; struct smc_link_group *lgr;
unsigned short vlan_id; unsigned short vlan_id;
enum smc_lgr_role role; enum smc_lgr_role role;
int local_contact = SMC_FIRST_CONTACT;
int rc = 0; int rc = 0;
role = smc->listen_smc ? SMC_SERV : SMC_CLNT; role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
...@@ -530,14 +542,39 @@ int smc_conn_create(struct smc_sock *smc, ...@@ -530,14 +542,39 @@ int smc_conn_create(struct smc_sock *smc,
return rc ? rc : local_contact; return rc ? rc : local_contact;
} }
/* convert the RMB size into the compressed notation - minimum 16K.
* In contrast to plain ilog2, this rounds towards the next power of 2,
* so the socket application gets at least its desired sndbuf / rcvbuf size.
*/
static u8 smc_compress_bufsize(int size)
{
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
return 0;
size = (size - 1) >> 14;
compressed = ilog2(size) + 1;
if (compressed >= SMC_RMBE_SIZES)
compressed = SMC_RMBE_SIZES - 1;
return compressed;
}
/* convert the RMB size from compressed notation into integer */
int smc_uncompress_bufsize(u8 compressed)
{
u32 size;
size = 0x00000001 << (((int)compressed) + 14);
return (int)size;
}
/* try to reuse a sndbuf or rmb description slot for a certain /* try to reuse a sndbuf or rmb description slot for a certain
* buffer size; if not available, return NULL * buffer size; if not available, return NULL
*/ */
static inline static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, rwlock_t *lock,
int compressed_bufsize, struct list_head *buf_list)
rwlock_t *lock,
struct list_head *buf_list)
{ {
struct smc_buf_desc *buf_slot; struct smc_buf_desc *buf_slot;
...@@ -589,7 +626,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, ...@@ -589,7 +626,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
GFP_KERNEL); GFP_KERNEL);
if (rc) { if (rc) {
smc_buf_free(buf_desc, lnk, is_rmb); smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(rc); return ERR_PTR(rc);
} }
sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
...@@ -600,7 +637,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, ...@@ -600,7 +637,7 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
/* SMC protocol depends on mapping to one DMA address only */ /* SMC protocol depends on mapping to one DMA address only */
if (rc != 1) { if (rc != 1) {
smc_buf_free(buf_desc, lnk, is_rmb); smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }
...@@ -611,19 +648,20 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, ...@@ -611,19 +648,20 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
IB_ACCESS_LOCAL_WRITE, IB_ACCESS_LOCAL_WRITE,
buf_desc); buf_desc);
if (rc) { if (rc) {
smc_buf_free(buf_desc, lnk, is_rmb); smc_buf_free(lgr, is_rmb, buf_desc);
return ERR_PTR(rc); return ERR_PTR(rc);
} }
} }
buf_desc->len = bufsize;
return buf_desc; return buf_desc;
} }
static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
{ {
struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr; struct smc_link_group *lgr = conn->lgr;
struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
struct list_head *buf_list; struct list_head *buf_list;
int bufsize, bufsize_short; int bufsize, bufsize_short;
int sk_buf_size; int sk_buf_size;
...@@ -651,7 +689,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) ...@@ -651,7 +689,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
continue; continue;
/* check for reusable slot in the link group */ /* check for reusable slot in the link group */
buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
if (buf_desc) { if (buf_desc) {
memset(buf_desc->cpu_addr, 0, bufsize); memset(buf_desc->cpu_addr, 0, bufsize);
break; /* found reusable slot */ break; /* found reusable slot */
...@@ -675,14 +713,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) ...@@ -675,14 +713,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
if (is_rmb) { if (is_rmb) {
conn->rmb_desc = buf_desc; conn->rmb_desc = buf_desc;
conn->rmbe_size = bufsize;
conn->rmbe_size_short = bufsize_short; conn->rmbe_size_short = bufsize_short;
smc->sk.sk_rcvbuf = bufsize * 2; smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0); atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
} else { } else {
conn->sndbuf_desc = buf_desc; conn->sndbuf_desc = buf_desc;
conn->sndbuf_size = bufsize;
smc->sk.sk_sndbuf = bufsize * 2; smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize); atomic_set(&conn->sndbuf_space, bufsize);
} }
...@@ -738,8 +774,7 @@ int smc_buf_create(struct smc_sock *smc) ...@@ -738,8 +774,7 @@ int smc_buf_create(struct smc_sock *smc)
/* create rmb */ /* create rmb */
rc = __smc_buf_create(smc, true); rc = __smc_buf_create(smc, true);
if (rc) if (rc)
smc_buf_free(smc->conn.sndbuf_desc, smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
&smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
return rc; return rc;
} }
...@@ -806,3 +841,21 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, ...@@ -806,3 +841,21 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn,
return conn->rtoken_idx; return conn->rtoken_idx;
return 0; return 0;
} }
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_freeing_list);
spin_lock_bh(&smc_lgr_list.lock);
if (!list_empty(&smc_lgr_list.list))
list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
spin_unlock_bh(&smc_lgr_list.lock);
list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
list_del_init(&lgr->list);
smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
cancel_delayed_work_sync(&lgr->free_work);
smc_lgr_free(lgr); /* free link group */
}
}
...@@ -23,10 +23,9 @@ ...@@ -23,10 +23,9 @@
struct smc_lgr_list { /* list of link group definition */ struct smc_lgr_list { /* list of link group definition */
struct list_head list; struct list_head list;
spinlock_t lock; /* protects list of link groups */ spinlock_t lock; /* protects list of link groups */
u32 num; /* unique link group number */
}; };
extern struct smc_lgr_list smc_lgr_list; /* list of link groups */
enum smc_lgr_role { /* possible roles of a link group */ enum smc_lgr_role { /* possible roles of a link group */
SMC_CLNT, /* client */ SMC_CLNT, /* client */
SMC_SERV /* server */ SMC_SERV /* server */
...@@ -124,6 +123,7 @@ struct smc_buf_desc { ...@@ -124,6 +123,7 @@ struct smc_buf_desc {
struct list_head list; struct list_head list;
void *cpu_addr; /* virtual address of buffer */ void *cpu_addr; /* virtual address of buffer */
struct page *pages; struct page *pages;
int len; /* length of buffer */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region /* for rmb only: memory region
...@@ -141,6 +141,12 @@ struct smc_rtoken { /* address/key of remote RMB */ ...@@ -141,6 +141,12 @@ struct smc_rtoken { /* address/key of remote RMB */
}; };
#define SMC_LGR_ID_SIZE 4 #define SMC_LGR_ID_SIZE 4
#define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */
#define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */
/* theoretically, the RFC states that largest size would be 512K,
* i.e. compressed 5 and thus 6 sizes (0..5), despite
* struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
*/
struct smc_link_group { struct smc_link_group {
struct list_head list; struct list_head list;
...@@ -205,11 +211,14 @@ static inline struct smc_connection *smc_lgr_find_conn( ...@@ -205,11 +211,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
struct smc_sock; struct smc_sock;
struct smc_clc_msg_accept_confirm; struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
int smc_buf_create(struct smc_sock *smc); int smc_buf_create(struct smc_sock *smc);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn, int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc); struct smc_clc_msg_accept_confirm *clc);
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
...@@ -218,4 +227,9 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); ...@@ -218,4 +227,9 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
void smc_core_exit(void);
#endif #endif
...@@ -101,8 +101,9 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, ...@@ -101,8 +101,9 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = { struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local, .token = conn->alert_token_local,
.sndbuf_size = conn->sndbuf_size, .sndbuf_size = conn->sndbuf_desc ?
.rmbe_size = conn->rmbe_size, conn->sndbuf_desc->len : 0,
.rmbe_size = conn->rmb_desc ? conn->rmb_desc->len : 0,
.peer_rmbe_size = conn->peer_rmbe_size, .peer_rmbe_size = conn->peer_rmbe_size,
.rx_prod.wrap = conn->local_rx_ctrl.prod.wrap, .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap,
......
...@@ -143,17 +143,6 @@ int smc_ib_ready_link(struct smc_link *lnk) ...@@ -143,17 +143,6 @@ int smc_ib_ready_link(struct smc_link *lnk)
return rc; return rc;
} }
static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
struct smc_link_group *lgr, *l;
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
smc_lgr_terminate(lgr);
}
}
/* process context wrapper for might_sleep smc_ib_remember_port_attr */ /* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work) static void smc_ib_port_event_work(struct work_struct *work)
{ {
...@@ -165,7 +154,7 @@ static void smc_ib_port_event_work(struct work_struct *work) ...@@ -165,7 +154,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
smc_ib_remember_port_attr(smcibdev, port_idx + 1); smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask); clear_bit(port_idx, &smcibdev->port_event_mask);
if (!smc_ib_port_active(smcibdev, port_idx + 1)) if (!smc_ib_port_active(smcibdev, port_idx + 1))
smc_ib_port_terminate(smcibdev, port_idx + 1); smc_port_terminate(smcibdev, port_idx + 1);
} }
} }
......
...@@ -51,7 +51,7 @@ static void smc_rx_wake_up(struct sock *sk) ...@@ -51,7 +51,7 @@ static void smc_rx_wake_up(struct sock *sk)
static void smc_rx_update_consumer(struct smc_connection *conn, static void smc_rx_update_consumer(struct smc_connection *conn,
union smc_host_cursor cons, size_t len) union smc_host_cursor cons, size_t len)
{ {
smc_curs_add(conn->rmbe_size, &cons, len); smc_curs_add(conn->rmb_desc->len, &cons, len);
smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn), smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn),
conn); conn);
/* send consumer cursor update if required */ /* send consumer cursor update if required */
...@@ -288,11 +288,11 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, ...@@ -288,11 +288,11 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
conn); conn);
/* subsequent splice() calls pick up where previous left */ /* subsequent splice() calls pick up where previous left */
if (splbytes) if (splbytes)
smc_curs_add(conn->rmbe_size, &cons, splbytes); smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
/* determine chunks where to read from rcvbuf */ /* determine chunks where to read from rcvbuf */
/* either unwrapped case, or 1st chunk of wrapped case */ /* either unwrapped case, or 1st chunk of wrapped case */
chunk_len = min_t(size_t, chunk_len = min_t(size_t, copylen, conn->rmb_desc->len -
copylen, conn->rmbe_size - cons.count); cons.count);
chunk_len_sum = chunk_len; chunk_len_sum = chunk_len;
chunk_off = cons.count; chunk_off = cons.count;
smc_rmb_sync_sg_for_cpu(conn); smc_rmb_sync_sg_for_cpu(conn);
...@@ -331,7 +331,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, ...@@ -331,7 +331,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
/* increased in recv tasklet smc_cdc_msg_rcv() */ /* increased in recv tasklet smc_cdc_msg_rcv() */
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_sub(copylen, &conn->bytes_to_rcv); atomic_sub(copylen, &conn->bytes_to_rcv);
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */ /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
smp_mb__after_atomic(); smp_mb__after_atomic();
if (msg) if (msg)
smc_rx_update_consumer(conn, cons, copylen); smc_rx_update_consumer(conn, cons, copylen);
......
...@@ -180,8 +180,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) ...@@ -180,8 +180,8 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
tx_cnt_prep = prep.count; tx_cnt_prep = prep.count;
/* determine chunks where to write into sndbuf */ /* determine chunks where to write into sndbuf */
/* either unwrapped case, or 1st chunk of wrapped case */ /* either unwrapped case, or 1st chunk of wrapped case */
chunk_len = min_t(size_t, chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len -
copylen, conn->sndbuf_size - tx_cnt_prep); tx_cnt_prep);
chunk_len_sum = chunk_len; chunk_len_sum = chunk_len;
chunk_off = tx_cnt_prep; chunk_off = tx_cnt_prep;
smc_sndbuf_sync_sg_for_cpu(conn); smc_sndbuf_sync_sg_for_cpu(conn);
...@@ -206,21 +206,21 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) ...@@ -206,21 +206,21 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
} }
smc_sndbuf_sync_sg_for_device(conn); smc_sndbuf_sync_sg_for_device(conn);
/* update cursors */ /* update cursors */
smc_curs_add(conn->sndbuf_size, &prep, copylen); smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
smc_curs_write(&conn->tx_curs_prep, smc_curs_write(&conn->tx_curs_prep,
smc_curs_read(&prep, conn), smc_curs_read(&prep, conn),
conn); conn);
/* increased in send tasklet smc_cdc_tx_handler() */ /* increased in send tasklet smc_cdc_tx_handler() */
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_sub(copylen, &conn->sndbuf_space); atomic_sub(copylen, &conn->sndbuf_space);
/* guarantee 0 <= sndbuf_space <= sndbuf_size */ /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic(); smp_mb__after_atomic();
/* since we just produced more new data into sndbuf, /* since we just produced more new data into sndbuf,
* trigger sndbuf consumer: RDMA write into peer RMBE and CDC * trigger sndbuf consumer: RDMA write into peer RMBE and CDC
*/ */
if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) && if ((msg->msg_flags & MSG_MORE || smc_tx_is_corked(smc)) &&
(atomic_read(&conn->sndbuf_space) > (atomic_read(&conn->sndbuf_space) >
(conn->sndbuf_size >> 1))) (conn->sndbuf_desc->len >> 1)))
/* for a corked socket defer the RDMA writes if there /* for a corked socket defer the RDMA writes if there
* is still sufficient sndbuf_space available * is still sufficient sndbuf_space available
*/ */
...@@ -261,7 +261,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, ...@@ -261,7 +261,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
rdma_wr.remote_addr = rdma_wr.remote_addr =
lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
/* RMBE within RMB */ /* RMBE within RMB */
((conn->peer_conn_idx - 1) * conn->peer_rmbe_size) + conn->tx_off +
/* offset within RMBE */ /* offset within RMBE */
peer_rmbe_offset; peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
...@@ -286,7 +286,7 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, ...@@ -286,7 +286,7 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
atomic_sub(len, &conn->peer_rmbe_space); atomic_sub(len, &conn->peer_rmbe_space);
/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */ /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
smp_mb__after_atomic(); smp_mb__after_atomic();
smc_curs_add(conn->sndbuf_size, sent, len); smc_curs_add(conn->sndbuf_desc->len, sent, len);
} }
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
...@@ -309,7 +309,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) ...@@ -309,7 +309,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
/* cf. wmem_alloc - (snd_max - snd_una) */ /* cf. wmem_alloc - (snd_max - snd_una) */
to_send = smc_curs_diff(conn->sndbuf_size, &sent, &prep); to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
if (to_send <= 0) if (to_send <= 0)
return 0; return 0;
...@@ -351,12 +351,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) ...@@ -351,12 +351,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_len_sum = dst_len; dst_len_sum = dst_len;
src_off = sent.count; src_off = sent.count;
/* dst_len determines the maximum src_len */ /* dst_len determines the maximum src_len */
if (sent.count + dst_len <= conn->sndbuf_size) { if (sent.count + dst_len <= conn->sndbuf_desc->len) {
/* unwrapped src case: single chunk of entire dst_len */ /* unwrapped src case: single chunk of entire dst_len */
src_len = dst_len; src_len = dst_len;
} else { } else {
/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
src_len = conn->sndbuf_size - sent.count; src_len = conn->sndbuf_desc->len - sent.count;
} }
src_len_sum = src_len; src_len_sum = src_len;
dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
...@@ -368,8 +368,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) ...@@ -368,8 +368,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++; num_sges++;
src_off += src_len; src_off += src_len;
if (src_off >= conn->sndbuf_size) if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_size; src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */ /* modulo in send ring */
if (src_len_sum == dst_len) if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */ break; /* either on 1st or 2nd iteration */
...@@ -387,7 +387,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) ...@@ -387,7 +387,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
dst_len = len - dst_len; /* remainder */ dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len; dst_len_sum += dst_len;
src_len = min_t(int, src_len = min_t(int,
dst_len, conn->sndbuf_size - sent.count); dst_len, conn->sndbuf_desc->len - sent.count);
src_len_sum = src_len; src_len_sum = src_len;
} }
...@@ -484,11 +484,11 @@ void smc_tx_consumer_update(struct smc_connection *conn) ...@@ -484,11 +484,11 @@ void smc_tx_consumer_update(struct smc_connection *conn)
smc_curs_write(&cfed, smc_curs_write(&cfed,
smc_curs_read(&conn->rx_curs_confirmed, conn), smc_curs_read(&conn->rx_curs_confirmed, conn),
conn); conn);
to_confirm = smc_curs_diff(conn->rmbe_size, &cfed, &cons); to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
((to_confirm > conn->rmbe_update_limit) && ((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) || ((to_confirm > (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) { conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */ conn->alert_token_local) { /* connection healthy */
......
...@@ -24,7 +24,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn) ...@@ -24,7 +24,7 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
return smc_curs_diff(conn->sndbuf_size, &sent, &prep); return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
} }
void smc_tx_work(struct work_struct *work); void smc_tx_work(struct work_struct *work);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment