Commit b5f16484 authored by David S. Miller's avatar David S. Miller

Merge branch 'smc-pnetid-and-SMC-D-support'

Ursula Braun says:

====================
smc: pnetid and SMC-D support

SMC requires a configured pnet table to map Ethernet interfaces to
RoCE adapter ports. For s390 there exists hardware support to group
such devices. The first three patches cover the s390 pnetid support,
enabling SMC-R usage on s390 without configuring an extra pnet table.

SMC currently requires RoCE adapters, and uses RDMA-techniques
implemented with IB-verbs. But s390 offers another method for
intra-CEC Shared Memory communication. The following seven patches
implement a solution to run SMC traffic based on intra-CEC DMA,
called SMC-D.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b0402f01 684b89bc
......@@ -95,4 +95,14 @@ config CCWGROUP
tristate
default (LCS || CTCM || QETH)
config ISM
tristate "Support for ISM vPCI Adapter"
depends on PCI && SMC
default n
help
Select this option if you want to use the Internal Shared Memory
vPCI Adapter.
To compile as a module choose M. The module name is ism.
If unsure, choose N.
endmenu
......@@ -15,3 +15,6 @@ qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o
obj-$(CONFIG_QETH_L2) += qeth_l2.o
qeth_l3-y += qeth_l3_main.o qeth_l3_sys.o
obj-$(CONFIG_QETH_L3) += qeth_l3.o
ism-y := ism_drv.o
obj-$(CONFIG_ISM) += ism.o
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef S390_ISM_H
#define S390_ISM_H
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <net/smc.h>
#define UTIL_STR_LEN 16
/*
* Do not use the first word of the DMB bits to ensure 8 byte aligned access.
*/
#define ISM_DMB_WORD_OFFSET 1
#define ISM_DMB_BIT_OFFSET (ISM_DMB_WORD_OFFSET * 32)
#define ISM_NR_DMBS 1920
#define ISM_REG_SBA 0x1
#define ISM_REG_IEQ 0x2
#define ISM_READ_GID 0x3
#define ISM_ADD_VLAN_ID 0x4
#define ISM_DEL_VLAN_ID 0x5
#define ISM_SET_VLAN 0x6
#define ISM_RESET_VLAN 0x7
#define ISM_QUERY_INFO 0x8
#define ISM_QUERY_RGID 0x9
#define ISM_REG_DMB 0xA
#define ISM_UNREG_DMB 0xB
#define ISM_SIGNAL_IEQ 0xE
#define ISM_UNREG_SBA 0x11
#define ISM_UNREG_IEQ 0x12
#define ISM_ERROR 0xFFFF
struct ism_req_hdr {
u32 cmd;
u16 : 16;
u16 len;
};
struct ism_resp_hdr {
u32 cmd;
u16 ret;
u16 len;
};
union ism_reg_sba {
struct {
struct ism_req_hdr hdr;
u64 sba;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_reg_ieq {
struct {
struct ism_req_hdr hdr;
u64 ieq;
u64 len;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_read_gid {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
u64 gid;
} response;
} __aligned(16);
union ism_qi {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
u32 version;
u32 max_len;
u64 ism_state;
u64 my_gid;
u64 sba;
u64 ieq;
u32 ieq_len;
u32 : 32;
u32 dmbs_owned;
u32 dmbs_used;
u32 vlan_required;
u32 vlan_nr_ids;
u16 vlan_id[64];
} response;
} __aligned(64);
union ism_query_rgid {
struct {
struct ism_req_hdr hdr;
u64 rgid;
u32 vlan_valid;
u32 vlan_id;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_reg_dmb {
struct {
struct ism_req_hdr hdr;
u64 dmb;
u32 dmb_len;
u32 sba_idx;
u32 vlan_valid;
u32 vlan_id;
u64 rgid;
} request;
struct {
struct ism_resp_hdr hdr;
u64 dmb_tok;
} response;
} __aligned(32);
union ism_sig_ieq {
struct {
struct ism_req_hdr hdr;
u64 rgid;
u32 trigger_irq;
u32 event_code;
u64 info;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(32);
union ism_unreg_dmb {
struct {
struct ism_req_hdr hdr;
u64 dmb_tok;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
union ism_cmd_simple {
struct {
struct ism_req_hdr hdr;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(8);
union ism_set_vlan_id {
struct {
struct ism_req_hdr hdr;
u64 vlan_id;
} request;
struct {
struct ism_resp_hdr hdr;
} response;
} __aligned(16);
struct ism_eq_header {
u64 idx;
u64 ieq_len;
u64 entry_len;
u64 : 64;
};
struct ism_eq {
struct ism_eq_header header;
struct smcd_event entry[15];
};
struct ism_sba {
u32 s : 1; /* summary bit */
u32 e : 1; /* event bit */
u32 : 30;
u32 dmb_bits[ISM_NR_DMBS / 32];
u32 reserved[3];
u16 dmbe_mask[ISM_NR_DMBS];
};
struct ism_dev {
spinlock_t lock;
struct pci_dev *pdev;
struct smcd_dev *smcd;
void __iomem *ctl;
struct ism_sba *sba;
dma_addr_t sba_dma_addr;
DECLARE_BITMAP(sba_bitmap, ISM_NR_DMBS);
struct ism_eq *ieq;
dma_addr_t ieq_dma_addr;
int ieq_idx;
};
#define ISM_CREATE_REQ(dmb, idx, sf, offset) \
((dmb) | (idx) << 24 | (sf) << 23 | (offset))
static inline int __ism_move(struct ism_dev *ism, u64 dmb_req, void *data,
unsigned int size)
{
struct zpci_dev *zdev = to_zpci(ism->pdev);
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, size);
return zpci_write_block(req, data, dmb_req);
}
#endif /* S390_ISM_H */
This diff is collapsed.
......@@ -11,6 +11,8 @@
#ifndef _SMC_H
#define _SMC_H
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
struct smc_hashinfo {
rwlock_t lock;
struct hlist_head ht;
......@@ -18,4 +20,67 @@ struct smc_hashinfo {
int smc_hash_sk(struct sock *sk);
void smc_unhash_sk(struct sock *sk);
/* SMCD/ISM device driver interface */
struct smcd_dmb {
u64 dmb_tok;
u64 rgid;
u32 dmb_len;
u32 sba_idx;
u32 vlan_valid;
u32 vlan_id;
void *cpu_addr;
dma_addr_t dma_addr;
};
#define ISM_EVENT_DMB 0
#define ISM_EVENT_GID 1
#define ISM_EVENT_SWR 2
struct smcd_event {
u32 type;
u32 code;
u64 tok;
u64 time;
u64 info;
};
struct smcd_dev;
struct smcd_ops {
int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
u32 vid);
int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
int (*set_vlan_required)(struct smcd_dev *dev);
int (*reset_vlan_required)(struct smcd_dev *dev);
int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
u32 event_code, u64 info);
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
};
struct smcd_dev {
const struct smcd_ops *ops;
struct device dev;
void *priv;
u64 local_gid;
struct list_head list;
spinlock_t lock;
struct smc_connection **conn;
struct list_head vlan;
struct workqueue_struct *event_wq;
u8 pnetid[SMC_MAX_PNETID_LEN];
};
struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
const struct smcd_ops *ops, int max_dmbs);
int smcd_register_dev(struct smcd_dev *smcd);
void smcd_unregister_dev(struct smcd_dev *smcd);
void smcd_free_dev(struct smcd_dev *smcd);
void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
#endif /* _SMC_H */
......@@ -35,6 +35,7 @@ enum {
SMC_DIAG_CONNINFO,
SMC_DIAG_LGRINFO,
SMC_DIAG_SHUTDOWN,
SMC_DIAG_DMBINFO,
__SMC_DIAG_MAX,
};
......@@ -83,4 +84,13 @@ struct smc_diag_lgrinfo {
struct smc_diag_linkinfo lnk[1];
__u8 role;
};
struct smcd_diag_dmbinfo { /* SMC-D Socket internals */
__u32 linkid; /* Link identifier */
__u64 peer_gid; /* Peer GID */
__u64 my_gid; /* My GID */
__u64 token; /* Token of DMB */
__u64 peer_token; /* Token of remote DMBE */
};
#endif /* _UAPI_SMC_DIAG_H_ */
obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
This diff is collapsed.
......@@ -21,8 +21,6 @@
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
#define SMC_MAX_PORTS 2 /* Max # of ports */
extern struct proto smc_proto;
extern struct proto smc_proto6;
......@@ -185,6 +183,11 @@ struct smc_connection {
spinlock_t acurs_lock; /* protect cursors */
#endif
struct work_struct close_work; /* peer sent some closing */
struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */
u8 rx_off; /* receive offset:
* 0 for SMC-R, 32 for SMC-D
*/
u64 peer_token; /* SMC-D token of peer */
};
struct smc_sock { /* smc sock container */
......
......@@ -117,7 +117,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
return rc;
}
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
struct smc_cdc_tx_pend *pend;
struct smc_wr_buf *wr_buf;
......@@ -130,6 +130,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
return smc_cdc_msg_send(conn, wr_buf, pend);
}
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
int rc;
if (conn->lgr->is_smcd) {
spin_lock_bh(&conn->send_lock);
rc = smcd_cdc_msg_send(conn);
spin_unlock_bh(&conn->send_lock);
} else {
rc = smcr_cdc_get_slot_and_msg_send(conn);
}
return rc;
}
static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
unsigned long data)
{
......@@ -157,6 +172,45 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
/* Send a SMC-D CDC header.
* This increments the free space available in our send buffer.
* Also update the confirmed receive buffer with what was sent to the peer.
*/
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
cdc.prod_count = conn->local_tx_ctrl.prod.count;
cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
cdc.cons_count = conn->local_tx_ctrl.cons.count;
cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
smc_curs_write(&conn->rx_curs_confirmed,
smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
/* increased by confirmed number of bytes */
smp_mb__before_atomic();
atomic_add(diff, &conn->sndbuf_space);
/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
smp_mb__after_atomic();
smc_curs_write(&conn->tx_curs_fin,
smc_curs_read(&conn->tx_curs_sent, conn), conn);
smc_tx_sndbuf_nonfull(smc);
return rc;
}
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
......@@ -178,7 +232,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
if (!sock_flag(&smc->sk, SOCK_URGINLINE))
/* we'll skip the urgent byte, so don't account for it */
(*diff_prod)--;
base = (char *)conn->rmb_desc->cpu_addr;
base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;
if (conn->urg_curs.count)
conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
else
......@@ -276,6 +330,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
sock_put(&smc->sk); /* no free sk in softirq-context */
}
/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ
* handler to indicate update in the DMBE.
*
* Context:
* - tasklet context
*/
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
/* Initialize receive tasklet. Called from ISM device IRQ handler to start
* receiver side.
*/
void smcd_cdc_rx_init(struct smc_connection *conn)
{
tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn);
}
/***************************** init, exit, misc ******************************/
static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
......
......@@ -50,6 +50,20 @@ struct smc_cdc_msg {
u8 reserved[18];
} __packed; /* format defined in RFC7609 */
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
u16 prod_wrap;
u32 prod_count;
u8 res2[2];
u16 cons_wrap;
u32 cons_count;
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 res3[8];
} __packed;
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
......@@ -204,9 +218,9 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
smc_curs_write(local, smc_curs_read(&temp, conn), conn);
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
{
local->common.type = peer->common.type;
local->len = peer->len;
......@@ -218,6 +232,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
local->conn_state_flags = peer->conn_state_flags;
}
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
local->prod.wrap = peer->prod_wrap;
local->prod.count = peer->prod_count;
local->cons.wrap = peer->cons_wrap;
local->cons.count = peer->cons_count;
local->prod_flags = peer->prod_flags;
local->conn_state_flags = peer->conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_msg *peer,
struct smc_connection *conn)
{
if (conn->lgr->is_smcd)
smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
else
smcr_cdc_msg_to_host(local, peer, conn);
}
struct smc_cdc_tx_pend;
int smc_cdc_get_free_slot(struct smc_connection *conn,
......@@ -227,6 +262,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
int smcd_cdc_msg_send(struct smc_connection *conn);
int smc_cdc_init(void) __init;
void smcd_cdc_rx_init(struct smc_connection *conn);
#endif /* SMC_CDC_H */
This diff is collapsed.
......@@ -23,6 +23,9 @@
#define SMC_CLC_DECLINE 0x04
#define SMC_CLC_V1 0x1 /* SMC version */
#define SMC_TYPE_R 0 /* SMC-R only */
#define SMC_TYPE_D 1 /* SMC-D only */
#define SMC_TYPE_B 3 /* SMC-R and SMC-D */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
#define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */
......@@ -42,9 +45,11 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
rsvd : 3;
rsvd : 1,
path : 2;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
u8 path : 2,
rsvd : 1,
flag : 1,
version : 4;
#endif
......@@ -77,6 +82,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
} __aligned(4);
struct smc_clc_msg_smcd { /* SMC-D GID information */
u64 gid; /* ISM GID of requestor */
u8 res[32];
};
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
......@@ -94,23 +104,45 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
union {
struct { /* SMC-R */
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 rmbe_idx; /* Index of RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */
qp_mtu : 4; /* QP mtu */
u8 rmbe_size : 4, /* buf size (compressed) */
qp_mtu : 4; /* QP mtu */
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 qp_mtu : 4,
rmbe_size : 4;
u8 qp_mtu : 4,
rmbe_size : 4;
#endif
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* initial packet sequence number */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* packet sequence number */
struct smc_clc_msg_trail smcr_trl;
/* eye catcher "SMCR" EBCDIC */
} __packed;
struct { /* SMC-D */
u64 gid; /* Sender GID */
u64 token; /* DMB token */
u8 dmbe_idx; /* DMBE index */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 dmbe_size : 4, /* buf size (compressed) */
reserved3 : 4;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 reserved3 : 4,
dmbe_size : 4;
#endif
u16 reserved4;
u32 linkid; /* Link identifier */
u32 reserved5[3];
struct smc_clc_msg_trail smcd_trl;
/* eye catcher "SMCD" EBCDIC */
} __packed;
};
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_decline { /* clc decline message */
......@@ -129,13 +161,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
}
/* get SMC-D info from proposal message */
static inline struct smc_clc_msg_smcd *
smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
{
if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
return NULL;
return (struct smc_clc_msg_smcd *)(prop + 1);
}
struct smcd_dev;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
u8 ibport);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_ib_device *smcibdev, u8 ibport,
struct smcd_dev *ismdev);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
......
This diff is collapsed.
......@@ -124,15 +124,28 @@ struct smc_buf_desc {
void *cpu_addr; /* virtual address of buffer */
struct page *pages;
int len; /* length of buffer */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
u32 used; /* currently used / unused */
u8 reused : 1; /* new created / reused */
u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
/* virtual buffer */
struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
/* for rmb only: memory region
* incl. rkey provided to peer
*/
u32 order; /* allocation order */
};
struct { /* SMC-D */
unsigned short sba_idx;
/* SBA index number */
u64 token;
/* DMB token number */
dma_addr_t dma_addr;
/* DMA address */
};
};
};
struct smc_rtoken { /* address/key of remote RMB */
......@@ -148,12 +161,10 @@ struct smc_rtoken { /* address/key of remote RMB */
* struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
*/
struct smcd_dev;
struct smc_link_group {
struct list_head list;
enum smc_lgr_role role; /* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct rb_root conns_all; /* connection tree */
rwlock_t conns_lock; /* protects conns_all */
unsigned int conns_num; /* current # of connections */
......@@ -163,17 +174,35 @@ struct smc_link_group {
rwlock_t sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
rwlock_t rmbs_lock; /* protects rx buffers */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
[SMC_LINKS_PER_LGR_MAX];
/* remote addr/key pairs */
unsigned long rtokens_used_mask[BITS_TO_LONGS(
SMC_RMBS_PER_LGR_MAX)];
/* used rtoken elements */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
u8 sync_err : 1; /* lgr no longer fits to peer */
u8 terminating : 1;/* lgr is terminating */
bool is_smcd; /* SMC-R or SMC-D */
union {
struct { /* SMC-R */
enum smc_lgr_role role;
/* client or server */
struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
/* smc link */
char peer_systemid[SMC_SYSTEMID_LEN];
/* unique system_id of peer */
struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
[SMC_LINKS_PER_LGR_MAX];
/* remote addr/key pairs */
unsigned long rtokens_used_mask[BITS_TO_LONGS
(SMC_RMBS_PER_LGR_MAX)];
/* used rtoken elements */
};
struct { /* SMC-D */
u64 peer_gid;
/* Peer GID (remote) */
struct smcd_dev *smcd;
/* ISM device for VLAN reg. */
};
};
};
/* Find the connection associated with the given alert token in the link group.
......@@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
int smc_buf_create(struct smc_sock *smc);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
struct smc_clc_msg_accept_confirm *clc);
......@@ -227,9 +257,13 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc,
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
void smc_core_exit(void);
#endif
......@@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
......@@ -155,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
goto errout;
}
if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo = {
.linkid = *((u32 *)conn->lgr->id),
.peer_gid = conn->lgr->peer_gid,
.my_gid = conn->lgr->smcd->local_gid,
.token = conn->rmb_desc->token,
.peer_token = conn->peer_token
};
if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
goto errout;
}
nlmsg_end(skb, nlh);
return 0;
......
......@@ -143,6 +143,62 @@ int smc_ib_ready_link(struct smc_link *lnk)
return rc;
}
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
&smcibdev->gid[ibport - 1], &gattr);
if (rc || !gattr.ndev)
return -ENODEV;
memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
dev_put(gattr.ndev);
return 0;
}
/* Create an identifier unique for this instance of SMC-R.
* The MAC-address of the first active registered IB device
* plus a random 2-byte number is used to create this identifier.
* This name is delivered to the peer during connection initialization.
*/
static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
u8 ibport)
{
memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
sizeof(smcibdev->mac[ibport - 1]));
get_random_bytes(&local_systemid[0], 2);
}
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
{
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
memset(&smcibdev->pattr[ibport - 1], 0,
sizeof(smcibdev->pattr[ibport - 1]));
rc = ib_query_port(smcibdev->ibdev, ibport,
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
/* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
sizeof(local_systemid)) &&
smc_ib_port_active(smcibdev, ibport))
/* create unique system identifier */
smc_ib_define_local_systemid(smcibdev, ibport);
out:
return rc;
}
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
......@@ -370,62 +426,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
}
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
&smcibdev->gid[ibport - 1], &gattr);
if (rc || !gattr.ndev)
return -ENODEV;
memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
dev_put(gattr.ndev);
return 0;
}
/* Create an identifier unique for this instance of SMC-R.
* The MAC-address of the first active registered IB device
* plus a random 2-byte number is used to create this identifier.
* This name is delivered to the peer during connection initialization.
*/
static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
u8 ibport)
{
memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
sizeof(smcibdev->mac[ibport - 1]));
get_random_bytes(&local_systemid[0], 2);
}
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
{
return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
int rc;
memset(&smcibdev->pattr[ibport - 1], 0,
sizeof(smcibdev->pattr[ibport - 1]));
rc = ib_query_port(smcibdev->ibdev, ibport,
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
/* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
sizeof(local_systemid)) &&
smc_ib_port_active(smcibdev, ibport))
/* create unique system identifier */
smc_ib_define_local_systemid(smcibdev, ibport);
out:
return rc;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
{
struct ib_cq_init_attr cqattr = {
......@@ -454,9 +454,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
smcibdev->roce_cq_recv = NULL;
goto err;
}
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
smc_ib_global_event_handler);
ib_register_event_handler(&smcibdev->event_handler);
smc_wr_add_dev(smcibdev);
smcibdev->initialized = 1;
return rc;
......@@ -472,7 +469,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
return;
smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
ib_destroy_cq(smcibdev->roce_cq_send);
}
......@@ -483,6 +479,8 @@ static struct ib_client smc_ib_client;
static void smc_ib_add_dev(struct ib_device *ibdev)
{
struct smc_ib_device *smcibdev;
u8 port_cnt;
int i;
if (ibdev->node_type != RDMA_NODE_IB_CA)
return;
......@@ -498,6 +496,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
list_add_tail(&smcibdev->list, &smc_ib_devices.list);
spin_unlock(&smc_ib_devices.lock);
ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
smc_ib_global_event_handler);
ib_register_event_handler(&smcibdev->event_handler);
/* trigger reading of the port attributes */
port_cnt = smcibdev->ibdev->phys_port_cnt;
for (i = 0;
i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
smc_pnetid_by_dev_port(ibdev->dev.parent, i,
smcibdev->pnetid[i]);
}
schedule_work(&smcibdev->port_event_work);
}
/* callback function for ib_register_client() */
......@@ -512,6 +525,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
spin_unlock(&smc_ib_devices.lock);
smc_pnet_remove_by_ibdev(smcibdev);
smc_ib_cleanup_per_ibdev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
kfree(smcibdev);
}
......
......@@ -15,6 +15,7 @@
#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
#include <net/smc.h>
#define SMC_MAX_PORTS 2 /* Max # of ports */
#define SMC_GID_SIZE sizeof(union ib_gid)
......@@ -40,6 +41,8 @@ struct smc_ib_device { /* ib-device infos for smc */
char mac[SMC_MAX_PORTS][ETH_ALEN];
/* mac address per port*/
union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */
u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
/* pnetid per port */
u8 initialized : 1; /* ib dev CQ, evthdl done */
struct work_struct port_event_work;
unsigned long port_event_mask;
......@@ -51,7 +54,6 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
......
// SPDX-License-Identifier: GPL-2.0
/* Shared Memory Communications Direct over ISM devices (SMC-D)
*
* Functions for ISM device.
*
* Copyright IBM Corp. 2018
*/
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <asm/page.h>
#include "smc.h"
#include "smc_core.h"
#include "smc_ism.h"
#include "smc_pnet.h"
struct smcd_dev_list smcd_dev_list = {
.list = LIST_HEAD_INIT(smcd_dev_list.list),
.lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
};
/* Test if an ISM communication is possible. */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
vlan_id);
}
int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
void *data, size_t len)
{
int rc;
rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
pos->offset, data, len);
return rc < 0 ? rc : 0;
}
/* Set a connection using this DMBE. */
void smc_ism_set_conn(struct smc_connection *conn)
{
unsigned long flags;
spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
}
/* Unset a connection using this DMBE. */
void smc_ism_unset_conn(struct smc_connection *conn)
{
unsigned long flags;
if (!conn->rmb_desc)
return;
spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
}
/* Register a VLAN identifier with the ISM device. Use a reference count
* and add a VLAN identifier only when the first DMB using this VLAN is
* registered.
*/
int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
{
struct smc_ism_vlanid *new_vlan, *vlan;
unsigned long flags;
int rc = 0;
if (!vlanid) /* No valid vlan id */
return -EINVAL;
/* create new vlan entry, in case we need it */
new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
if (!new_vlan)
return -ENOMEM;
new_vlan->vlanid = vlanid;
refcount_set(&new_vlan->refcnt, 1);
/* if there is an existing entry, increase count and return */
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
if (vlan->vlanid == vlanid) {
refcount_inc(&vlan->refcnt);
kfree(new_vlan);
goto out;
}
}
/* no existing entry found.
* add new entry to device; might fail, e.g., if HW limit reached
*/
if (smcd->ops->add_vlan_id(smcd, vlanid)) {
kfree(new_vlan);
rc = -EIO;
goto out;
}
list_add_tail(&new_vlan->list, &smcd->vlan);
out:
spin_unlock_irqrestore(&smcd->lock, flags);
return rc;
}
/* Unregister a VLAN identifier with the ISM device. Use a reference count
* and remove a VLAN identifier only when the last DMB using this VLAN is
* unregistered.
*/
int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
{
struct smc_ism_vlanid *vlan;
unsigned long flags;
bool found = false;
int rc = 0;
if (!vlanid) /* No valid vlan id */
return -EINVAL;
spin_lock_irqsave(&smcd->lock, flags);
list_for_each_entry(vlan, &smcd->vlan, list) {
if (vlan->vlanid == vlanid) {
if (!refcount_dec_and_test(&vlan->refcnt))
goto out;
found = true;
break;
}
}
if (!found) {
rc = -ENOENT;
goto out; /* VLAN id not in table */
}
/* Found and the last reference just gone */
if (smcd->ops->del_vlan_id(smcd, vlanid))
rc = -EIO;
list_del(&vlan->list);
kfree(vlan);
out:
spin_unlock_irqrestore(&smcd->lock, flags);
return rc;
}
int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
{
struct smcd_dmb dmb;
memset(&dmb, 0, sizeof(dmb));
dmb.dmb_tok = dmb_desc->token;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.cpu_addr = dmb_desc->cpu_addr;
dmb.dma_addr = dmb_desc->dma_addr;
dmb.dmb_len = dmb_desc->len;
return smcd->ops->unregister_dmb(smcd, &dmb);
}
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
struct smcd_dmb dmb;
int rc;
memset(&dmb, 0, sizeof(dmb));
dmb.dmb_len = dmb_len;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid;
rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
dmb_desc->cpu_addr = dmb.cpu_addr;
dmb_desc->dma_addr = dmb.dma_addr;
dmb_desc->len = dmb.dmb_len;
}
return rc;
}
struct smc_ism_event_work {
struct work_struct work;
struct smcd_dev *smcd;
struct smcd_event event;
};
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
struct smc_ism_event_work *wrk =
container_of(work, struct smc_ism_event_work, work);
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
smc_smcd_terminate(wrk->smcd, wrk->event.tok);
break;
case ISM_EVENT_DMB:
break;
}
kfree(wrk);
}
static void smcd_release(struct device *dev)
{
struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
kfree(smcd->conn);
kfree(smcd);
}
struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
const struct smcd_ops *ops, int max_dmbs)
{
struct smcd_dev *smcd;
smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
if (!smcd)
return NULL;
smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
GFP_KERNEL);
if (!smcd->conn) {
kfree(smcd);
return NULL;
}
smcd->dev.parent = parent;
smcd->dev.release = smcd_release;
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
spin_lock_init(&smcd->lock);
INIT_LIST_HEAD(&smcd->vlan);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
return smcd;
}
EXPORT_SYMBOL_GPL(smcd_alloc_dev);
int smcd_register_dev(struct smcd_dev *smcd)
{
spin_lock(&smcd_dev_list.lock);
list_add_tail(&smcd->list, &smcd_dev_list.list);
spin_unlock(&smcd_dev_list.lock);
return device_add(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_register_dev);
void smcd_unregister_dev(struct smcd_dev *smcd)
{
spin_lock(&smcd_dev_list.lock);
list_del(&smcd->list);
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
smc_smcd_terminate(smcd, 0);
device_del(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_unregister_dev);
void smcd_free_dev(struct smcd_dev *smcd)
{
put_device(&smcd->dev);
}
EXPORT_SYMBOL_GPL(smcd_free_dev);
/* SMCD Device event handler. Called from ISM device interrupt handler.
* Parameters are smcd device pointer,
* - event->type (0 --> DMB, 1 --> GID),
* - event->code (event code),
* - event->tok (either DMB token when event type 0, or GID when event type 1)
* - event->time (time of day)
* - event->info (debug info).
*
* Context:
* - Function called in IRQ context from ISM device driver event handler.
*/
void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
{
struct smc_ism_event_work *wrk;
/* copy event to event work queue, and let it be handled there */
wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
if (!wrk)
return;
INIT_WORK(&wrk->work, smc_ism_event_work);
wrk->smcd = smcd;
wrk->event = *event;
queue_work(smcd->event_wq, &wrk->work);
}
EXPORT_SYMBOL_GPL(smcd_handle_event);
/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
* Parameters are smcd device pointer and DMB number. Find the connection and
* schedule the tasklet for this connection.
*
* Context:
* - Function called in IRQ context from ISM device driver IRQ handler.
*/
void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
{
struct smc_connection *conn = NULL;
unsigned long flags;
spin_lock_irqsave(&smcd->lock, flags);
conn = smcd->conn[dmbno];
if (conn)
tasklet_schedule(&conn->rx_tsklet);
spin_unlock_irqrestore(&smcd->lock, flags);
}
EXPORT_SYMBOL_GPL(smcd_handle_irq);
/* SPDX-License-Identifier: GPL-2.0 */
/* Shared Memory Communications Direct over ISM devices (SMC-D)
*
* SMC-D ISM device structure definitions.
*
* Copyright IBM Corp. 2018
*/
#ifndef SMCD_ISM_H
#define SMCD_ISM_H
#include <linux/uio.h>
#include "smc.h"
struct smcd_dev_list { /* List of SMCD devices */
struct list_head list;
spinlock_t lock; /* Protects list of devices */
};
extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
struct smc_ism_vlanid { /* VLAN id set on ISM device */
struct list_head list;
unsigned short vlanid; /* Vlan id */
refcount_t refcnt; /* Reference count */
};
struct smc_ism_position { /* ISM device position to write to */
u64 token; /* Token of DMB */
u32 offset; /* Offset into DMBE */
u8 index; /* Index of DMBE */
u8 signal; /* Generate interrupt on owner side */
};
struct smcd_dev;
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
void smc_ism_set_conn(struct smc_connection *conn);
void smc_ism_unset_conn(struct smc_connection *conn);
int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
struct smc_buf_desc *dmb_desc);
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
#endif
......@@ -22,13 +22,12 @@
#include "smc_pnet.h"
#include "smc_ib.h"
#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
#include "smc_ism.h"
static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
[SMC_PNETID_NAME] = {
.type = NLA_NUL_STRING,
.len = SMC_MAX_PNET_ID_LEN - 1
.len = SMC_MAX_PNETID_LEN - 1
},
[SMC_PNETID_ETHNAME] = {
.type = NLA_NUL_STRING,
......@@ -65,7 +64,7 @@ static struct smc_pnettable {
*/
struct smc_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
char pnet_name[SMC_MAX_PNETID_LEN + 1];
struct net_device *ndev;
struct smc_ib_device *smcibdev;
u8 ib_port;
......@@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
return false;
while (--end >= bf && isspace(*end))
;
if (end - bf >= SMC_MAX_PNET_ID_LEN)
if (end - bf >= SMC_MAX_PNETID_LEN)
return false;
while (bf <= end) {
if (!isalnum(*bf))
......@@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
kfree(pnetelem);
return rc;
}
rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
if (rc)
smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
return rc;
}
......@@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_REBOOT:
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
default:
break;
return NOTIFY_DONE;
}
return NOTIFY_DONE;
}
static struct notifier_block smc_netdev_notifier = {
......@@ -515,26 +511,91 @@ void smc_pnet_exit(void)
genl_unregister_family(&smc_pnet_nl_family);
}
/* PNET table analysis for a given sock:
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
/* Determine one base device for stacked net devices.
* If the lower device level contains more than one devices
* (for instance with bonding slaves), just the first device
* is used to reach a base device.
*/
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport)
static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
{
struct dst_entry *dst = sk_dst_get(sk);
struct smc_pnetentry *pnetelem;
int i, nest_lvl;
*smcibdev = NULL;
*ibport = 0;
rtnl_lock();
nest_lvl = dev_get_nest_level(ndev);
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
if (list_empty(lower))
break;
lower = lower->next;
ndev = netdev_lower_get_next(ndev, &lower);
}
rtnl_unlock();
return ndev;
}
/* Determine the corresponding IB device port based on the hardware PNETID.
* Searching stops at the first matching active IB device port.
*/
static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_ib_device **smcibdev,
u8 *ibport)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smc_ib_device *ibdev;
int i;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smc_ib_devices.lock);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
for (i = 1; i <= SMC_MAX_PORTS; i++) {
if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
SMC_MAX_PNETID_LEN) &&
smc_ib_port_active(ibdev, i)) {
*smcibdev = ibdev;
*ibport = i;
break;
}
}
}
spin_unlock(&smc_ib_devices.lock);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
struct smcd_dev **smcismdev)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct smcd_dev *ismdev;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid))
return; /* pnetid could not be determined */
spin_lock(&smcd_dev_list.lock);
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
*smcismdev = ismdev;
break;
}
}
spin_unlock(&smcd_dev_list.lock);
}
/* Lookup of coupled ib_device via SMC pnet table */
static void smc_pnet_find_roce_by_table(struct net_device *netdev,
struct smc_ib_device **smcibdev,
u8 *ibport)
{
struct smc_pnetentry *pnetelem;
if (!dst)
return;
if (!dst->dev)
goto out_rel;
read_lock(&smc_pnettable.lock);
list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
if (dst->dev == pnetelem->ndev) {
if (netdev == pnetelem->ndev) {
if (smc_ib_port_active(pnetelem->smcibdev,
pnetelem->ib_port)) {
*smcibdev = pnetelem->smcibdev;
......@@ -544,6 +605,54 @@ void smc_pnet_find_roce_resource(struct sock *sk,
}
}
read_unlock(&smc_pnettable.lock);
}
/* PNET table analysis for a given sock:
* determine ib_device and port belonging to used internal TCP socket
* ethernet interface.
*/
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport)
{
struct dst_entry *dst = sk_dst_get(sk);
*smcibdev = NULL;
*ibport = 0;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
/* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
if (*smcibdev)
goto out_rel;
/* lookup via SMC PNET table */
smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
out_rel:
dst_release(dst);
out:
return;
}
void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
{
struct dst_entry *dst = sk_dst_get(sk);
*smcismdev = NULL;
if (!dst)
goto out;
if (!dst->dev)
goto out_rel;
/* if possible, lookup via hardware-defined pnetid */
smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
out_rel:
dst_release(dst);
out:
return;
}
......@@ -12,12 +12,28 @@
#ifndef _SMC_PNET_H
#define _SMC_PNET_H
#if IS_ENABLED(CONFIG_HAVE_PNETID)
#include <asm/pnet.h>
#endif
struct smc_ib_device;
struct smcd_dev;
static inline int smc_pnetid_by_dev_port(struct device *dev,
unsigned short port, u8 *pnetid)
{
#if IS_ENABLED(CONFIG_HAVE_PNETID)
return pnet_id_by_dev_port(dev, port, pnetid);
#else
return -ENOENT;
#endif
}
int smc_pnet_init(void) __init;
void smc_pnet_exit(void);
int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
void smc_pnet_find_roce_resource(struct sock *sk,
struct smc_ib_device **smcibdev, u8 *ibport);
void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
#endif
......@@ -305,7 +305,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
rcvbuf_base = conn->rmb_desc->cpu_addr;
rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
do { /* while (read_remaining) */
if (read_done >= target || (pipe && read_done))
......
......@@ -24,6 +24,7 @@
#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
#include "smc_ism.h"
#include "smc_tx.h"
#define SMC_TX_WORK_DELAY HZ
......@@ -250,6 +251,24 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
/***************************** sndbuf consumer *******************************/
/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
u32 offset, int signal)
{
struct smc_ism_position pos;
int rc;
memset(&pos, 0, sizeof(pos));
pos.token = conn->peer_token;
pos.index = conn->peer_rmbe_idx;
pos.offset = conn->tx_off + offset;
pos.signal = signal;
rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
if (rc)
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
return rc;
}
/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_sge sges[])
......@@ -297,21 +316,104 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
smc_curs_add(conn->sndbuf_desc->len, sent, len);
}
/* SMC-R helper for smc_tx_rdma_writes() */
static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
size_t dst_off, size_t dst_len)
{
dma_addr_t dma_addr =
sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int src_len_sum = src_len, dst_len_sum = dst_len;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
int sent_count = src_off;
int srcchunk, dstchunk;
int num_sges;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
if (rc)
return rc;
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
sent_count);
src_len_sum = src_len;
}
return 0;
}
/* SMC-D helper for smc_tx_rdma_writes() */
static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t src_off, size_t src_len,
size_t dst_off, size_t dst_len)
{
int src_len_sum = src_len, dst_len_sum = dst_len;
int srcchunk, dstchunk;
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
void *data = conn->sndbuf_desc->cpu_addr + src_off;
rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
sizeof(struct smcd_cdc_msg), 0);
if (rc)
return rc;
dst_off += src_len;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
src_len_sum = src_len;
}
return 0;
}
/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
* usable snd_wnd as max transmit
*/
static int smc_tx_rdma_writes(struct smc_connection *conn)
{
size_t src_off, src_len, dst_off, dst_len; /* current chunk values */
size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
size_t len, src_len, dst_off, dst_len; /* current chunk values */
union smc_host_cursor sent, prep, prod, cons;
struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
struct smc_link_group *lgr = conn->lgr;
struct smc_cdc_producer_flags *pflags;
int to_send, rmbespace;
struct smc_link *link;
dma_addr_t dma_addr;
int num_sges;
int rc;
/* source: sndbuf */
......@@ -341,7 +443,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
len = min(to_send, rmbespace);
/* initialize variables for first iteration of subsequent nested loop */
link = &lgr->lnk[SMC_SINGLE_LINK];
dst_off = prod.count;
if (prod.wrap == cons.wrap) {
/* the filled destination area is unwrapped,
......@@ -358,8 +459,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
*/
dst_len = len;
}
dst_len_sum = dst_len;
src_off = sent.count;
/* dst_len determines the maximum src_len */
if (sent.count + dst_len <= conn->sndbuf_desc->len) {
/* unwrapped src case: single chunk of entire dst_len */
......@@ -368,38 +467,15 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
src_len = conn->sndbuf_desc->len - sent.count;
}
src_len_sum = src_len;
dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
sges[srcchunk].addr = dma_addr + src_off;
sges[srcchunk].length = src_len;
sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
num_sges++;
src_off += src_len;
if (src_off >= conn->sndbuf_desc->len)
src_off -= conn->sndbuf_desc->len;
/* modulo in send ring */
if (src_len_sum == dst_len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
if (rc)
return rc;
if (dst_len_sum == len)
break; /* either on 1st or 2nd iteration */
/* prepare next (== 2nd) iteration */
dst_off = 0; /* modulo offset in RMBE ring buffer */
dst_len = len - dst_len; /* remainder */
dst_len_sum += dst_len;
src_len = min_t(int,
dst_len, conn->sndbuf_desc->len - sent.count);
src_len_sum = src_len;
}
if (conn->lgr->is_smcd)
rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
dst_off, dst_len);
else
rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
dst_off, dst_len);
if (rc)
return rc;
if (conn->urg_tx_pend && len == to_send)
pflags->urg_data_present = 1;
......@@ -420,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags;
struct smc_cdc_tx_pend *pend;
......@@ -467,6 +543,37 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
int rc = 0;
spin_lock_bh(&conn->send_lock);
if (!pflags->urg_data_present)
rc = smc_tx_rdma_writes(conn);
if (!rc)
rc = smcd_cdc_msg_send(conn);
if (!rc && pflags->urg_data_present) {
pflags->urg_data_pending = 0;
pflags->urg_data_present = 0;
}
spin_unlock_bh(&conn->send_lock);
return rc;
}
int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
int rc;
if (conn->lgr->is_smcd)
rc = smcd_tx_sndbuf_nonempty(conn);
else
rc = smcr_tx_sndbuf_nonempty(conn);
return rc;
}
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit
*/
......@@ -495,7 +602,8 @@ void smc_tx_work(struct work_struct *work)
void smc_tx_consumer_update(struct smc_connection *conn, bool force)
{
union smc_host_cursor cfed, cons;
union smc_host_cursor cfed, cons, prod;
int sender_free = conn->rmb_desc->len;
int to_confirm;
smc_curs_write(&cons,
......@@ -505,11 +613,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
smc_curs_read(&conn->rx_curs_confirmed, conn),
conn);
to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
if (to_confirm > conn->rmbe_update_limit) {
smc_curs_write(&prod,
smc_curs_read(&conn->local_rx_ctrl.prod, conn),
conn);
sender_free = conn->rmb_desc->len -
smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
}
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
force ||
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmb_desc->len / 2)) ||
((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
......
......@@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
void smc_tx_consumer_update(struct smc_connection *conn, bool force);
int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
u32 offset, int signal);
#endif /* SMC_TX_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment