Commit fe4f961e authored by David S. Miller's avatar David S. Miller

Merge branch 'net-smc-add-event-based-framework-for-LLC-msgs'

Karsten Graul says:

====================
net/smc: add event-based framework for LLC msgs

These patches are the next step towards SMC-R link failover support. They add
a new framework to handle Link Layer Control (LLC) messages and adapt the
existing code to use the new framework.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1569a3c4 41a211d8
......@@ -382,22 +382,24 @@ static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
struct smc_link *link = smc->conn.lnk;
int rest;
struct smc_llc_qentry *qentry;
int rc;
link->lgr->type = SMC_LGR_SINGLE;
/* receive CONFIRM LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(
&link->llc_confirm,
SMC_LLC_WAIT_FIRST_TIME);
if (rest <= 0) {
qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
if (link->llc_confirm_rc)
rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
if (rc)
return SMC_CLC_DECL_RMBE_EC;
rc = smc_ib_modify_qp_rts(link);
......@@ -409,31 +411,30 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_CL;
/* receive ADD LINK request from server over RoCE fabric */
rest = wait_for_completion_interruptible_timeout(&link->llc_add,
SMC_LLC_WAIT_TIME);
if (rest <= 0) {
smc_llc_link_active(link);
/* optional 2nd link, receive ADD LINK request from server */
qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
SMC_LLC_ADD_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
if (rc == -EAGAIN)
rc = 0; /* no DECLINE received, go with one link */
return rc;
}
/* send add link reject message, only one link supported for now */
rc = smc_llc_send_add_link(link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_AL;
smc_llc_link_active(link);
smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
/* tbd: call smc_llc_cli_add_link(link, qentry); */
return 0;
}
......@@ -613,8 +614,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
int i, reason_code = 0;
struct smc_link *link;
int reason_code = 0;
ini->is_smcd = false;
ini->ib_lcl = &aclc->lcl;
......@@ -627,10 +628,28 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
link = smc->conn.lnk;
smc_conn_save_peer_info(smc, aclc);
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
link = smc->conn.lnk;
} else {
/* set link that was assigned by server */
link = NULL;
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *l = &smc->conn.lgr->lnk[i];
if (l->peer_qpn == ntoh24(aclc->qpn)) {
link = l;
break;
}
}
if (!link)
return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
ini->cln_first_contact);
smc->conn.lnk = link;
}
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
......@@ -666,7 +685,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
reason_code = smcr_clnt_conf_first_link(smc);
smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
......@@ -1019,9 +1040,11 @@ void smc_close_non_accepted(struct sock *sk)
static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
struct smc_link *link = smc->conn.lnk;
int rest;
struct smc_llc_qentry *qentry;
int rc;
link->lgr->type = SMC_LGR_SINGLE;
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
return SMC_CLC_DECL_ERR_REGRMB;
......@@ -1031,40 +1054,27 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
return SMC_CLC_DECL_TIMEOUT_CL;
/* receive CONFIRM LINK response from client over the RoCE fabric */
rest = wait_for_completion_interruptible_timeout(
&link->llc_confirm_resp,
SMC_LLC_WAIT_FIRST_TIME);
if (rest <= 0) {
qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK);
if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
if (link->llc_confirm_resp_rc)
rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
if (rc)
return SMC_CLC_DECL_RMBE_EC;
/* send ADD LINK request to client over the RoCE fabric */
rc = smc_llc_send_add_link(link,
link->smcibdev->mac[link->ibport - 1],
link->gid, SMC_LLC_REQ);
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_AL;
/* receive ADD LINK response from client over the RoCE fabric */
rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
SMC_LLC_WAIT_TIME);
if (rest <= 0) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
}
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
smc_llc_link_active(link);
/* initial contact - try to establish second link */
/* tbd: call smc_llc_srv_add_link(link); */
return 0;
}
......@@ -1240,7 +1250,9 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
goto decline;
}
/* QP confirmation over RoCE fabric */
smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
reason_code = smcr_serv_conf_first_link(new_smc);
smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
goto decline;
}
......
......@@ -45,6 +45,7 @@
#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
......
......@@ -200,7 +200,6 @@ static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
{
if (lnk->state == SMC_LNK_ACTIVE &&
!smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
smc_llc_link_deleting(lnk);
return 0;
}
return -ENOTCONN;
......@@ -263,6 +262,7 @@ static void smc_lgr_free_work(struct work_struct *work)
if (smc_link_usable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
wake_up_interruptible_all(&lgr->llc_waiter);
}
smc_lgr_free(lgr);
}
......@@ -445,13 +445,11 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
struct smc_link *lnk)
struct smc_link_group *lgr)
{
struct smc_link_group *lgr = lnk->lgr;
if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
/* unregister rmb with peer */
smc_llc_do_delete_rkey(lnk, rmb_desc);
smc_llc_do_delete_rkey(lgr, rmb_desc);
rmb_desc->is_conf_rkey = false;
}
if (rmb_desc->is_reg_err) {
......@@ -474,7 +472,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
if (conn->rmb_desc && lgr->is_smcd)
conn->rmb_desc->used = 0;
else if (conn->rmb_desc)
smcr_buf_unuse(conn->rmb_desc, conn->lnk);
smcr_buf_unuse(conn->rmb_desc, lgr);
}
/* remove a finished connection from its link group */
......@@ -696,6 +694,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
if (smc_link_usable(lnk))
lnk->state = SMC_LNK_INACTIVE;
}
wake_up_interruptible_all(&lgr->llc_waiter);
}
}
......@@ -767,8 +766,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
continue;
/* tbd - terminate only when no more links are active */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_usable(&lgr->lnk[i]) ||
lgr->lnk[i].state == SMC_LNK_DELETING)
if (!smc_link_usable(&lgr->lnk[i]))
continue;
if (lgr->lnk[i].smcibdev == smcibdev &&
lgr->lnk[i].ibport == ibport) {
......@@ -1167,7 +1165,6 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
if (!smc_link_usable(lnk))
continue;
if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
smcr_buf_unuse(buf_desc, lnk);
rc = -ENOMEM;
goto out;
}
......@@ -1273,6 +1270,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (!is_smcd) {
if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
smcr_buf_unuse(buf_desc, lgr);
return -ENOMEM;
}
}
......@@ -1368,6 +1366,53 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
u32 rkey)
{
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
if (test_bit(i, lgr->rtokens_used_mask) &&
lgr->rtokens[i][lnk_idx].rkey == rkey)
return i;
}
return -ENOENT;
}
/* set rtoken for a new link to an existing rmb */
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
__be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
{
int rtok_idx;
rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
if (rtok_idx == -ENOENT)
return;
lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
}
/* set rtoken for a new link whose link_id is given */
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
__be64 nw_vaddr, __be32 nw_rkey)
{
u64 dma_addr = be64_to_cpu(nw_vaddr);
u32 rkey = ntohl(nw_rkey);
bool found = false;
int link_idx;
for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
if (lgr->lnk[link_idx].link_id == link_id) {
found = true;
break;
}
}
if (!found)
return;
lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
}
/* add a new rtoken from peer */
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
......
......@@ -36,7 +36,6 @@ enum smc_link_state { /* possible states of a link */
SMC_LNK_INACTIVE, /* link is inactive */
SMC_LNK_ACTIVATING, /* link is being activated */
SMC_LNK_ACTIVE, /* link is active */
SMC_LNK_DELETING, /* link is being deleted */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
......@@ -120,20 +119,9 @@ struct smc_link {
struct smc_link_group *lgr; /* parent link group */
enum smc_link_state state; /* state of link */
struct completion llc_confirm; /* wait for rx of conf link */
struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
int llc_confirm_rc; /* rc from confirm link msg */
int llc_confirm_resp_rc; /* rc from conf_resp msg */
struct completion llc_add; /* wait for rx of add link */
struct completion llc_add_resp; /* wait for rx of add link rsp*/
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
struct completion llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
int llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
struct completion llc_delete_rkey_resp; /* w4 rx of del rkey */
int llc_delete_rkey_resp_rc; /* rc from del rkey */
struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
/* For now we just allow one parallel link per link group. The SMC protocol
......@@ -197,6 +185,28 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smcd_dev;
enum smc_lgr_type { /* redundancy state of lgr */
SMC_LGR_NONE, /* no active links, lgr to be deleted */
SMC_LGR_SINGLE, /* 1 active RNIC on each peer */
SMC_LGR_SYMMETRIC, /* 2 active RNICs on each peer */
SMC_LGR_ASYMMETRIC_PEER, /* local has 2, peer 1 active RNICs */
SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */
};
enum smc_llc_flowtype {
SMC_LLC_FLOW_NONE = 0,
SMC_LLC_FLOW_ADD_LINK = 2,
SMC_LLC_FLOW_DEL_LINK = 4,
SMC_LLC_FLOW_RKEY = 6,
};
struct smc_llc_qentry;
struct smc_llc_flow {
enum smc_llc_flowtype type;
struct smc_llc_qentry *qentry;
};
struct smc_link_group {
struct list_head list;
struct rb_root conns_all; /* connection tree */
......@@ -232,12 +242,24 @@ struct smc_link_group {
DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
/* used rtoken elements */
u8 next_link_id;
enum smc_lgr_type type;
/* redundancy state */
struct list_head llc_event_q;
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
struct work_struct llc_event_work;
/* llc event worker */
wait_queue_head_t llc_waiter;
/* w4 next llc event */
struct smc_llc_flow llc_flow_lcl;
/* llc local control field */
struct smc_llc_flow llc_flow_rmt;
/* llc remote control field */
struct smc_llc_qentry *delayed_event;
/* arrived when flow active */
spinlock_t llc_flow_lock;
/* protects llc flow */
int llc_testlink_time;
/* link keep alive time */
};
......@@ -329,6 +351,10 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
__be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
__be64 nw_vaddr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
......
This diff is collapsed.
......@@ -57,12 +57,21 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
void smc_llc_link_active(struct smc_link *link);
void smc_llc_link_deleting(struct smc_link *link);
void smc_llc_link_clear(struct smc_link *link);
int smc_llc_do_confirm_rkey(struct smc_link *link,
int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc);
int smc_llc_do_delete_rkey(struct smc_link *link,
int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc);
int smc_llc_flow_initiate(struct smc_link_group *lgr,
enum smc_llc_flowtype type);
void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
enum smc_llc_reqresp type);
struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
struct smc_link *lnk,
int time_out, u8 exp_msg);
struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment