Commit 7c034dfd authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull InfiniBand/RDMA updates from Roland Dreier:

 - IPoIB fixes from Doug Ledford and Erez Shitrit

 - iSER updates from Sagi Grimberg

 - mlx4 GUID handling changes from Yishai Hadas

 - other misc fixes

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (51 commits)
  mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures
  IB/iser: Rewrite bounce buffer code path
  IB/iser: Bump version to 1.6
  IB/iser: Remove code duplication for a single DMA entry
  IB/iser: Pass struct iser_mem_reg to iser_fast_reg_mr and iser_reg_sig_mr
  IB/iser: Modify struct iser_mem_reg members
  IB/iser: Make fastreg pool cache friendly
  IB/iser: Move PI context alloc/free to routines
  IB/iser: Move fastreg descriptor pool get/put to helper functions
  IB/iser: Merge build page-vec into register page-vec
  IB/iser: Get rid of struct iser_rdma_regd
  IB/iser: Remove redundant assignments in iser_reg_page_vec
  IB/iser: Move memory reg/dereg routines to iser_memory.c
  IB/iser: Don't pass ib_device to fall_to_bounce_buff routine
  IB/iser: Remove a redundant struct iser_data_buf
  IB/iser: Remove redundant cmd_data_len calculation
  IB/iser: Fix wrong calculation of protection buffer length
  IB/iser: Handle fastreg/local_inv completion errors
  IB/iser: Fix unload during ep_poll wrong dereference
  ib_srpt: convert printk's to pr_* functions
  ...
parents 1204c464 c1c2fef6
...@@ -187,8 +187,10 @@ Check RDMA and NFS Setup ...@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2): assumes you have two IB hosts named host1 and host2):
host1$ ifconfig ib0 a.b.c.x host1$ ip link set dev ib0 up
host2$ ifconfig ib0 a.b.c.y host1$ ip address add dev ib0 a.b.c.x
host2$ ip link set dev ib0 up
host2$ ip address add dev ib0 a.b.c.y
host1$ ping a.b.c.y host1$ ping a.b.c.y
host2$ ping a.b.c.x host2$ ping a.b.c.x
...@@ -229,7 +231,8 @@ NFS/RDMA Setup ...@@ -229,7 +231,8 @@ NFS/RDMA Setup
$ modprobe ib_mthca $ modprobe ib_mthca
$ modprobe ib_ipoib $ modprobe ib_ipoib
$ ifconfig ib0 a.b.c.d $ ip li set dev ib0 up
$ ip addr add dev ib0 a.b.c.d
NOTE: use unique addresses for the client and server NOTE: use unique addresses for the client and server
......
...@@ -8803,6 +8803,15 @@ W: http://www.emulex.com ...@@ -8803,6 +8803,15 @@ W: http://www.emulex.com
S: Supported S: Supported
F: drivers/net/ethernet/emulex/benet/ F: drivers/net/ethernet/emulex/benet/
EMULEX ONECONNECT ROCE DRIVER
M: Selvin Xavier <selvin.xavier@emulex.com>
M: Devesh Sharma <devesh.sharma@emulex.com>
M: Mitesh Ahuja <mitesh.ahuja@emulex.com>
L: linux-rdma@vger.kernel.org
W: http://www.emulex.com
S: Supported
F: drivers/infiniband/hw/ocrdma/
SFC NETWORK DRIVER SFC NETWORK DRIVER
M: Solarflare linux maintainers <linux-net-drivers@solarflare.com> M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
M: Shradha Shah <sshah@solarflare.com> M: Shradha Shah <sshah@solarflare.com>
......
...@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ...@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync) if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!size)
return ERR_PTR(-EINVAL);
/* /*
* If the combination of the addr and size requested for this memory * If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error. * region causes an integer overflow, return error.
*/ */
if ((PAGE_ALIGN(addr + size) <= size) || if (((addr + size) < addr) ||
(PAGE_ALIGN(addr + size) <= addr)) PAGE_ALIGN(addr + size) < (addr + size))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (!can_do_mlock()) if (!can_do_mlock())
......
...@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uqp); kfree(uqp);
} }
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object; struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_uverbs_event_file *ev_file = cq->cq_context;
...@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, ...@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(ucq); kfree(ucq);
} }
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object; struct ib_mr *mr = uobj->object;
......
This diff is collapsed.
...@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, ...@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr, tun_qp->ring[i].addr,
rx_buf_size, rx_buf_size,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
kfree(tun_qp->ring[i].addr);
goto err;
}
} }
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
...@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, ...@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr, tun_qp->tx_ring[i].buf.addr,
tx_buf_size, tx_buf_size,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev,
tun_qp->tx_ring[i].buf.map)) {
kfree(tun_qp->tx_ring[i].buf.addr);
goto tx_err;
}
tun_qp->tx_ring[i].ah = NULL; tun_qp->tx_ring[i].ah = NULL;
} }
spin_lock_init(&tun_qp->tx_lock); spin_lock_init(&tun_qp->tx_lock);
......
...@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); ...@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL"); MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION); MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 1; int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
static const char mlx4_ib_version[] = static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
...@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, ...@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_SLAVE_INIT: case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */ /* here, p is the slave id */
do_slave_init(ibdev, p, 1); do_slave_init(ibdev, p, 1);
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
1);
}
}
return; return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
0);
}
}
/* here, p is the slave id */ /* here, p is the slave id */
do_slave_init(ibdev, p, 0); do_slave_init(ibdev, p, 0);
return; return;
......
...@@ -342,14 +342,9 @@ struct mlx4_ib_ah { ...@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
enum mlx4_guid_alias_rec_status { enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE, MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET, MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
}; };
enum mlx4_guid_alias_rec_ownership { #define GUID_STATE_NEED_PORT_INIT 0x01
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
enum mlx4_guid_alias_rec_method { enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
...@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det { ...@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/ unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ u64 time_to_run;
}; };
struct mlx4_sriov_alias_guid_port_rec_det { struct mlx4_sriov_alias_guid_port_rec_det {
...@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det { ...@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct delayed_work alias_guid_work; struct delayed_work alias_guid_work;
u8 port; u8 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent; struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list; struct list_head cb_list;
}; };
...@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, ...@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr); struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
......
...@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) ...@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr), sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
kfree(qp->sqp_proxy_rcv[i].addr);
goto err;
}
} }
return 0; return 0;
...@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, ...@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
*lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
wr->wr.ud.hlen);
*lso_seg_len = halign; *lso_seg_len = halign;
return 0; return 0;
} }
......
...@@ -46,21 +46,17 @@ ...@@ -46,21 +46,17 @@
static ssize_t show_admin_alias_guid(struct device *dev, static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev; struct mlx4_ib_dev *mdev = port->dev;
__be64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ; sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; mlx4_ib_iov_dentry->entry_num,
port->num);
return sprintf(buf, "%llx\n", return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
} }
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
...@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev, ...@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev; struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val; u64 sysadmin_ag_val;
unsigned long flags;
record_num = mlx4_ib_iov_dentry->entry_num / 8; record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
...@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev, ...@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
pr_err("GUID 0 block 0 is RO\n"); pr_err("GUID 0 block 0 is RO\n");
return count; return count;
} }
spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
sscanf(buf, "%llx", &sysadmin_ag_val); sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num]. all_rec_per_port[record_num].
...@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev, ...@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
/* Change the state to be pending for update */ /* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ; = MLX4_GUID_INFO_STATUS_IDLE ;
mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method mlx4_ib_iov_dentry->entry_num,
= MLX4_GUID_INFO_RECORD_SET; port->num);
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
/* set the record index */ /* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1); mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count; return count;
......
...@@ -87,7 +87,6 @@ enum { ...@@ -87,7 +87,6 @@ enum {
IPOIB_FLAG_ADMIN_UP = 2, IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3, IPOIB_PKEY_ASSIGNED = 3,
IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7, IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_UMCAST = 10,
...@@ -98,9 +97,15 @@ enum { ...@@ -98,9 +97,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */ IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ /*
* For IPOIB_MCAST_FLAG_BUSY
* When set, in flight join and mcast->mc is unreliable
* When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
* haven't started yet
* When clear and mcast->mc is valid pointer, join was successful
*/
IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3, IPOIB_MCAST_FLAG_ATTACHED = 3,
IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16, MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256, IPOIB_CM_COPYBREAK = 256,
...@@ -148,6 +153,7 @@ struct ipoib_mcast { ...@@ -148,6 +153,7 @@ struct ipoib_mcast {
unsigned long created; unsigned long created;
unsigned long backoff; unsigned long backoff;
unsigned long delay_until;
unsigned long flags; unsigned long flags;
unsigned char logcount; unsigned char logcount;
...@@ -292,6 +298,11 @@ struct ipoib_neigh_table { ...@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
struct completion deleted; struct completion deleted;
}; };
struct ipoib_qp_state_validate {
struct work_struct work;
struct ipoib_dev_priv *priv;
};
/* /*
* Device private locking: network stack tx_lock protects members used * Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside * in TX fast path, lock protects everything else. lock nests inside
...@@ -317,6 +328,7 @@ struct ipoib_dev_priv { ...@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list; struct list_head multicast_list;
struct rb_root multicast_tree; struct rb_root multicast_tree;
struct workqueue_struct *wq;
struct delayed_work mcast_task; struct delayed_work mcast_task;
struct work_struct carrier_on_task; struct work_struct carrier_on_task;
struct work_struct flush_light; struct work_struct flush_light;
...@@ -426,11 +438,6 @@ struct ipoib_neigh { ...@@ -426,11 +438,6 @@ struct ipoib_neigh {
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) #define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
{
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
void ipoib_neigh_dtor(struct ipoib_neigh *neigh); void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{ {
...@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work); ...@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev); void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev, int flush); int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
...@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); ...@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work); void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev); int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev, int flush); int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev);
......
...@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even ...@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
} }
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
queue_delayed_work(ipoib_workqueue, queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it /* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
...@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv); ipoib_cm_start_rx_drain(priv);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
} else } else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
...@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list); list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); queue_work(priv->wq, &priv->cm.rx_reap_task);
} }
return; return;
} }
...@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
} }
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
...@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, ...@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
} }
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
...@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path ...@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev; tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list); list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
queue_work(ipoib_workqueue, &priv->cm.start_task); queue_work(priv->wq, &priv->cm.start_task);
return tx; return tx;
} }
...@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) ...@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags); spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list); list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task); queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n", ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4); tx->neigh->daddr + 4);
tx->neigh = NULL; tx->neigh = NULL;
...@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, ...@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb); skb_queue_tail(&priv->cm.skb_queue, skb);
if (e) if (e)
queue_work(ipoib_workqueue, &priv->cm.skb_task); queue_work(priv->wq, &priv->cm.skb_task);
} }
static void ipoib_cm_rx_reap(struct work_struct *work) static void ipoib_cm_rx_reap(struct work_struct *work)
...@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work) ...@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
} }
if (!list_empty(&priv->cm.passive_ids)) if (!list_empty(&priv->cm.passive_ids))
queue_delayed_work(ipoib_workqueue, queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
} }
......
...@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref) ...@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
u64 mapping[IPOIB_UD_RX_SG]) u64 mapping[IPOIB_UD_RX_SG])
{ {
if (ipoib_ud_need_sg(priv->max_ib_mtu)) { ib_dma_unmap_single(priv->ca, mapping[0],
ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
DMA_FROM_DEVICE);
} else
ib_dma_unmap_single(priv->ca, mapping[0],
IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE);
}
static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
struct sk_buff *skb,
unsigned int length)
{
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
unsigned int size;
/*
* There is only two buffers needed for max_payload = 4K,
* first buf size is IPOIB_UD_HEAD_SIZE
*/
skb->tail += IPOIB_UD_HEAD_SIZE;
skb->len += length;
size = length - IPOIB_UD_HEAD_SIZE;
skb_frag_size_set(frag, size);
skb->data_len += size;
skb->truesize += PAGE_SIZE;
} else
skb_put(skb, length);
} }
static int ipoib_ib_post_receive(struct net_device *dev, int id) static int ipoib_ib_post_receive(struct net_device *dev, int id)
...@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) ...@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb; struct sk_buff *skb;
int buf_size; int buf_size;
int tailroom;
u64 *mapping; u64 *mapping;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) { buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
buf_size = IPOIB_UD_HEAD_SIZE;
tailroom = 128; /* reserve some tailroom for IP/TCP headers */
} else {
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
tailroom = 0;
}
skb = dev_alloc_skb(buf_size + tailroom + 4); skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
...@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) ...@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
goto error; goto error;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
struct page *page = alloc_page(GFP_ATOMIC);
if (!page)
goto partial_error;
skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
mapping[1] =
ib_dma_map_page(priv->ca, page,
0, PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
goto partial_error;
}
priv->rx_ring[id].skb = skb; priv->rx_ring[id].skb = skb;
return skb; return skb;
partial_error:
ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
error: error:
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
return NULL; return NULL;
...@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wc->byte_len, wc->slid); wc->byte_len, wc->slid);
ipoib_ud_dma_unmap_rx(priv, mapping); ipoib_ud_dma_unmap_rx(priv, mapping);
ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
skb_put(skb, wc->byte_len);
/* First byte of dgid signals multicast when 0xff */ /* First byte of dgid signals multicast when 0xff */
dgid = &((struct ib_grh *)skb->data)->dgid; dgid = &((struct ib_grh *)skb->data)->dgid;
...@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN); skb_pull(skb, IPOIB_ENCAP_LEN);
skb->truesize = SKB_TRUESIZE(skb->len);
++dev->stats.rx_packets; ++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len; dev->stats.rx_bytes += skb->len;
...@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, ...@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
} }
} }
/*
* As the result of a completion error the QP Can be transferred to SQE states.
* The function checks if the (send)QP is in SQE state and
* moves it back to RTS state, that in order to have it functional again.
*/
static void ipoib_qp_state_validate_work(struct work_struct *work)
{
struct ipoib_qp_state_validate *qp_work =
container_of(work, struct ipoib_qp_state_validate, work);
struct ipoib_dev_priv *priv = qp_work->priv;
struct ib_qp_attr qp_attr;
struct ib_qp_init_attr query_init_attr;
int ret;
ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
if (ret) {
ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
__func__, ret);
goto free_res;
}
pr_info("%s: QP: 0x%x is in state: %d\n",
__func__, priv->qp->qp_num, qp_attr.qp_state);
/* currently support only in SQE->RTS transition*/
if (qp_attr.qp_state == IB_QPS_SQE) {
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
if (ret) {
pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
ret, priv->qp->qp_num);
goto free_res;
}
pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
__func__, priv->qp->qp_num);
} else {
pr_warn("QP (%d) will stay in state: %d\n",
priv->qp->qp_num, qp_attr.qp_state);
}
free_res:
kfree(qp_work);
}
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_wake_queue(dev); netif_wake_queue(dev);
if (wc->status != IB_WC_SUCCESS && if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR) wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_qp_state_validate *qp_work;
ipoib_warn(priv, "failed send event " ipoib_warn(priv, "failed send event "
"(status=%d, wrid=%d vend_err %x)\n", "(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err); wc->status, wr_id, wc->vendor_err);
qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
if (!qp_work) {
ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
__func__, priv->qp->qp_num);
return;
}
INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
qp_work->priv = priv;
queue_work(priv->wq, &qp_work->work);
}
} }
static int poll_tx(struct ipoib_dev_priv *priv) static int poll_tx(struct ipoib_dev_priv *priv)
...@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work) ...@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev); __ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
} }
static void ipoib_flush_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
ipoib_reap_ah(&priv->ah_reap_task.work);
}
static void ipoib_stop_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
static void ipoib_ib_tx_timer_func(unsigned long ctx) static void ipoib_ib_tx_timer_func(unsigned long ctx)
{ {
drain_tx_cq((struct net_device *)ctx); drain_tx_cq((struct net_device *)ctx);
} }
int ipoib_ib_dev_open(struct net_device *dev, int flush) int ipoib_ib_dev_open(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret; int ret;
...@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush) ...@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
} }
clear_bit(IPOIB_STOP_REAPER, &priv->flags); clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ)); round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
...@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush) ...@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop: dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi); napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev, flush); ipoib_ib_dev_stop(dev);
return -1; return -1;
} }
...@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev) ...@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev); return ipoib_mcast_start_thread(dev);
} }
int ipoib_ib_dev_down(struct net_device *dev, int flush) int ipoib_ib_dev_down(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) ...@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev); netif_carrier_off(dev);
ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
...@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev) ...@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable(); local_bh_enable();
} }
int ipoib_ib_dev_stop(struct net_device *dev, int flush) int ipoib_ib_dev_stop(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr; struct ib_qp_attr qp_attr;
...@@ -877,24 +902,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) ...@@ -877,24 +902,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n"); ipoib_warn(priv, "Failed to modify QP to RESET state\n");
/* Wait for all AHs to be reaped */ ipoib_flush_ah(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies;
while (!list_empty(&priv->dead_ahs)) {
__ipoib_reap_ah(dev);
if (time_after(jiffies, begin + HZ)) {
ipoib_warn(priv, "timing out; will leak address handles\n");
break;
}
msleep(1);
}
ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
...@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev); (unsigned long) dev);
if (dev->flags & IFF_UP) { if (dev->flags & IFF_UP) {
if (ipoib_ib_dev_open(dev, 1)) { if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
return -ENODEV; return -ENODEV;
} }
...@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (level == IPOIB_FLUSH_LIGHT) { if (level == IPOIB_FLUSH_LIGHT) {
ipoib_mark_paths_invalid(dev); ipoib_mark_paths_invalid(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
ipoib_flush_ah(dev);
} }
if (level >= IPOIB_FLUSH_NORMAL) if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_down(dev, 0); ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) { if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_stop(dev);
if (ipoib_ib_dev_open(dev, 0) != 0) if (ipoib_ib_dev_open(dev) != 0)
return; return;
if (netif_queue_stopped(dev)) if (netif_queue_stopped(dev))
netif_start_queue(dev); netif_start_queue(dev);
...@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ...@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/ */
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev, 1); ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev); ipoib_mcast_dev_flush(dev);
/*
* All of our ah references aren't free until after
* ipoib_mcast_dev_flush(), ipoib_flush_paths, and
* the neighbor garbage collection is stopped and reaped.
* That should all be done now, so make a final ah flush.
*/
ipoib_stop_ah(dev);
ipoib_transport_dev_cleanup(dev); ipoib_transport_dev_cleanup(dev);
} }
......
...@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev) ...@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_ib_dev_open(dev, 1)) { if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0; return 0;
goto err_disable; goto err_disable;
...@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev) ...@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0; return 0;
err_stop: err_stop:
ipoib_ib_dev_stop(dev, 1); ipoib_ib_dev_stop(dev);
err_disable: err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
...@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev) ...@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev); netif_stop_queue(dev);
ipoib_ib_dev_down(dev, 1); ipoib_ib_dev_down(dev);
ipoib_ib_dev_stop(dev, 0); ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv; struct ipoib_dev_priv *cpriv;
...@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, ...@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path)) if (!path->query && path_rec_start(dev, path))
goto err_path; goto err_path;
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
__skb_queue_tail(&neigh->queue, skb); __skb_queue_tail(&neigh->queue, skb);
else
goto err_drop;
} }
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
...@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, ...@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
new_path = 1; new_path = 1;
} }
if (path) { if (path) {
__skb_queue_tail(&path->queue, skb); if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (!path->query && path_rec_start(dev, path)) { if (!path->query && path_rec_start(dev, path)) {
spin_unlock_irqrestore(&priv->lock, flags); spin_unlock_irqrestore(&priv->lock, flags);
...@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) ...@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return; return;
} }
queue_work(ipoib_workqueue, &priv->restart_task); queue_work(priv->wq, &priv->restart_task);
} }
static int ipoib_get_iflink(const struct net_device *dev) static int ipoib_get_iflink(const struct net_device *dev)
...@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work) ...@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv); __ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
} }
...@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) ...@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */ /* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval); arp_tbl.gc_interval);
return 0; return 0;
...@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
if (ipoib_neigh_hash_init(priv) < 0)
goto out;
/* Allocate RX/TX "rings" to hold queued skbs */ /* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL); GFP_KERNEL);
if (!priv->rx_ring) { if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size); ca->name, ipoib_recvq_size);
goto out_neigh_hash_cleanup; goto out;
} }
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
...@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) ...@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port)) if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup; goto out_tx_ring_cleanup;
/*
* Must be after ipoib_ib_dev_init so we can allocate a per
* device wq there and use it here
*/
if (ipoib_neigh_hash_init(priv) < 0)
goto out_dev_uninit;
return 0; return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
out_tx_ring_cleanup: out_tx_ring_cleanup:
vfree(priv->tx_ring); vfree(priv->tx_ring);
out_rx_ring_cleanup: out_rx_ring_cleanup:
kfree(priv->rx_ring); kfree(priv->rx_ring);
out_neigh_hash_cleanup:
ipoib_neigh_hash_uninit(dev);
out: out:
return -ENOMEM; return -ENOMEM;
} }
...@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev) ...@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
} }
unregister_netdevice_many(&head); unregister_netdevice_many(&head);
/*
* Must be before ipoib_ib_dev_cleanup or we delete an in use
* work queue
*/
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev); ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring); kfree(priv->rx_ring);
...@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev) ...@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL; priv->rx_ring = NULL;
priv->tx_ring = NULL; priv->tx_ring = NULL;
ipoib_neigh_hash_uninit(dev);
} }
static const struct header_ops ipoib_header_ops = { static const struct header_ops ipoib_header_ops = {
...@@ -1646,10 +1663,11 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1646,10 +1663,11 @@ static struct net_device *ipoib_add_port(const char *format,
register_failed: register_failed:
ib_unregister_event_handler(&priv->event_handler); ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
/* Stop GC if started before flush */ /* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue); flush_workqueue(priv->wq);
event_failed: event_failed:
ipoib_dev_cleanup(priv->dev); ipoib_dev_cleanup(priv->dev);
...@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device) ...@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) { list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler); ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
rtnl_lock(); rtnl_lock();
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
...@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device) ...@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */ /* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task); cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue); flush_workqueue(priv->wq);
unregister_netdev(priv->dev); unregister_netdev(priv->dev);
free_netdev(priv->dev); free_netdev(priv->dev);
...@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void) ...@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
return ret; return ret;
/* /*
* We create our own workqueue mainly because we want to be * We create a global workqueue here that is used for all flush
* able to flush it when devices are being removed. We can't * operations. However, if you attempt to flush a workqueue
* use schedule_work()/flush_scheduled_work() because both * from a task on that same workqueue, it deadlocks the system.
* unregister_netdev() and linkwatch_event take the rtnl lock, * We want to be able to flush the tasks associated with a
* so flush_scheduled_work() can deadlock during device * specific net device, so we also create a workqueue for each
* removal. * netdevice. We queue up the tasks for that device only on
* its private workqueue, and we only queue up flush events
* on our global flush workqueue. This avoids the deadlocks.
*/ */
ipoib_workqueue = create_singlethread_workqueue("ipoib"); ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) { if (!ipoib_workqueue) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_fs; goto err_fs;
......
...@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd; goto out_free_pd;
} }
/*
* the various IPoIB tasks assume they will never race against
* themselves, so always use a single thread workqueue
*/
priv->wq = create_singlethread_workqueue("ipoib_wq");
if (!priv->wq) {
printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
goto out_free_mr;
}
size = ipoib_recvq_size + 1; size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev); ret = ipoib_cm_dev_init(dev);
if (!ret) { if (!ret) {
...@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */ size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else else
size += ipoib_recvq_size * ipoib_max_conn_qp; size += ipoib_recvq_size * ipoib_max_conn_qp;
} } else
goto out_free_wq;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) { if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr; goto out_cm_dev_cleanup;
} }
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL, priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
...@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->tx_wr.send_flags = IB_SEND_SIGNALED; priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey; priv->rx_sge[0].lkey = priv->mr->lkey;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_sge[1].length = PAGE_SIZE; priv->rx_wr.num_sge = 1;
priv->rx_sge[1].lkey = priv->mr->lkey;
priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
} else {
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
}
priv->rx_wr.next = NULL; priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge; priv->rx_wr.sg_list = priv->rx_sge;
...@@ -236,12 +242,19 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) ...@@ -236,12 +242,19 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
out_free_recv_cq: out_free_recv_cq:
ib_destroy_cq(priv->recv_cq); ib_destroy_cq(priv->recv_cq);
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
out_free_wq:
destroy_workqueue(priv->wq);
priv->wq = NULL;
out_free_mr: out_free_mr:
ib_dereg_mr(priv->mr); ib_dereg_mr(priv->mr);
ipoib_cm_dev_cleanup(dev);
out_free_pd: out_free_pd:
ib_dealloc_pd(priv->pd); ib_dealloc_pd(priv->pd);
return -ENODEV; return -ENODEV;
} }
...@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) ...@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
ipoib_cm_dev_cleanup(dev); ipoib_cm_dev_cleanup(dev);
if (priv->wq) {
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
if (ib_dereg_mr(priv->mr)) if (ib_dereg_mr(priv->mr))
ipoib_warn(priv, "ib_dereg_mr failed\n"); ipoib_warn(priv, "ib_dereg_mr failed\n");
if (ib_dealloc_pd(priv->pd)) if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n"); ipoib_warn(priv, "ib_dealloc_pd failed\n");
} }
void ipoib_event(struct ib_event_handler *handler, void ipoib_event(struct ib_event_handler *handler,
......
...@@ -69,7 +69,7 @@ ...@@ -69,7 +69,7 @@
#define DRV_NAME "iser" #define DRV_NAME "iser"
#define PFX DRV_NAME ": " #define PFX DRV_NAME ": "
#define DRV_VER "1.5" #define DRV_VER "1.6"
#define iser_dbg(fmt, arg...) \ #define iser_dbg(fmt, arg...) \
do { \ do { \
...@@ -218,22 +218,21 @@ enum iser_data_dir { ...@@ -218,22 +218,21 @@ enum iser_data_dir {
/** /**
* struct iser_data_buf - iSER data buffer * struct iser_data_buf - iSER data buffer
* *
* @buf: pointer to the sg list * @sg: pointer to the sg list
* @size: num entries of this sg * @size: num entries of this sg
* @data_len: total beffer byte len * @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg * @dma_nents: returned by dma_map_sg
* @copy_buf: allocated copy buf for SGs unaligned * @orig_sg: pointer to the original sg list (in case
* for rdma which are copied * we used a copy)
* @sg_single: SG-ified clone of a non SG SC or * @orig_size: num entris of orig sg list
* unaligned SG
*/ */
struct iser_data_buf { struct iser_data_buf {
void *buf; struct scatterlist *sg;
unsigned int size; unsigned int size;
unsigned long data_len; unsigned long data_len;
unsigned int dma_nents; unsigned int dma_nents;
char *copy_buf; struct scatterlist *orig_sg;
struct scatterlist sg_single; unsigned int orig_size;
}; };
/* fwd declarations */ /* fwd declarations */
...@@ -244,35 +243,14 @@ struct iscsi_endpoint; ...@@ -244,35 +243,14 @@ struct iscsi_endpoint;
/** /**
* struct iser_mem_reg - iSER memory registration info * struct iser_mem_reg - iSER memory registration info
* *
* @lkey: MR local key * @sge: memory region sg element
* @rkey: MR remote key * @rkey: memory region remote key
* @va: MR start address (buffer va)
* @len: MR length
* @mem_h: pointer to registration context (FMR/Fastreg) * @mem_h: pointer to registration context (FMR/Fastreg)
*/ */
struct iser_mem_reg { struct iser_mem_reg {
u32 lkey; struct ib_sge sge;
u32 rkey; u32 rkey;
u64 va; void *mem_h;
u64 len;
void *mem_h;
};
/**
* struct iser_regd_buf - iSER buffer registration desc
*
* @reg: memory registration info
* @virt_addr: virtual address of buffer
* @device: reference to iser device
* @direction: dma direction (for dma_unmap)
* @data_size: data buffer size in bytes
*/
struct iser_regd_buf {
struct iser_mem_reg reg;
void *virt_addr;
struct iser_device *device;
enum dma_data_direction direction;
unsigned int data_size;
}; };
enum iser_desc_type { enum iser_desc_type {
...@@ -534,11 +512,9 @@ struct iser_conn { ...@@ -534,11 +512,9 @@ struct iser_conn {
* @sc: link to scsi command * @sc: link to scsi command
* @command_sent: indicate if command was sent * @command_sent: indicate if command was sent
* @dir: iser data direction * @dir: iser data direction
* @rdma_regd: task rdma registration desc * @rdma_reg: task rdma registration desc
* @data: iser data buffer desc * @data: iser data buffer desc
* @data_copy: iser data copy buffer desc (bounce buffer)
* @prot: iser protection buffer desc * @prot: iser protection buffer desc
* @prot_copy: iser protection copy buffer desc (bounce buffer)
*/ */
struct iscsi_iser_task { struct iscsi_iser_task {
struct iser_tx_desc desc; struct iser_tx_desc desc;
...@@ -547,11 +523,9 @@ struct iscsi_iser_task { ...@@ -547,11 +523,9 @@ struct iscsi_iser_task {
struct scsi_cmnd *sc; struct scsi_cmnd *sc;
int command_sent; int command_sent;
int dir[ISER_DIRS_NUM]; int dir[ISER_DIRS_NUM];
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM]; struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
struct iser_data_buf data[ISER_DIRS_NUM]; struct iser_data_buf data[ISER_DIRS_NUM];
struct iser_data_buf data_copy[ISER_DIRS_NUM];
struct iser_data_buf prot[ISER_DIRS_NUM]; struct iser_data_buf prot[ISER_DIRS_NUM];
struct iser_data_buf prot_copy[ISER_DIRS_NUM];
}; };
struct iser_page_vec { struct iser_page_vec {
...@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn); ...@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem, struct iser_data_buf *mem,
struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir); enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
...@@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn, ...@@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *dst_addr, struct sockaddr *dst_addr,
int non_blocking); int non_blocking);
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir); enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
...@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max); ...@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
void iser_free_fastreg_pool(struct ib_conn *ib_conn); void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector); enum iser_data_dir cmd_dir, sector_t *sector);
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn);
void
iser_reg_desc_put(struct ib_conn *ib_conn,
struct fast_reg_descriptor *desc);
#endif #endif
...@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) ...@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{ {
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
int err; int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
...@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) ...@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
iser_err("Failed to set up Data-IN RDMA\n"); iser_err("Failed to set up Data-IN RDMA\n");
return err; return err;
} }
regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
hdr->flags |= ISER_RSV; hdr->flags |= ISER_RSV;
hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->read_stag = cpu_to_be32(mem_reg->rkey);
hdr->read_va = cpu_to_be64(regd_buf->reg.va); hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
task->itt, regd_buf->reg.rkey, task->itt, mem_reg->rkey,
(unsigned long long)regd_buf->reg.va); (unsigned long long)mem_reg->sge.addr);
return 0; return 0;
} }
...@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, ...@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
{ {
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device; struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
int err; int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header; struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
...@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task, ...@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err; return err;
} }
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
if (unsol_sz < edtl) { if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV; hdr->flags |= ISER_WSV;
hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); hdr->write_stag = cpu_to_be32(mem_reg->rkey);
hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n", "VA:%#llX + unsol:%d\n",
task->itt, regd_buf->reg.rkey, task->itt, mem_reg->rkey,
(unsigned long long)regd_buf->reg.va, unsol_sz); (unsigned long long)mem_reg->sge.addr, unsol_sz);
} }
if (imm_sz > 0) { if (imm_sz > 0) {
iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
task->itt, imm_sz); task->itt, imm_sz);
tx_dsg->addr = regd_buf->reg.va; tx_dsg->addr = mem_reg->sge.addr;
tx_dsg->length = imm_sz; tx_dsg->length = imm_sz;
tx_dsg->lkey = regd_buf->reg.lkey; tx_dsg->lkey = mem_reg->sge.lkey;
iser_task->desc.num_sge = 2; iser_task->desc.num_sge = 2;
} }
...@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn, ...@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
} }
if (scsi_sg_count(sc)) { /* using a scatter list */ if (scsi_sg_count(sc)) { /* using a scatter list */
data_buf->buf = scsi_sglist(sc); data_buf->sg = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc); data_buf->size = scsi_sg_count(sc);
} }
data_buf->data_len = scsi_bufflen(sc); data_buf->data_len = scsi_bufflen(sc);
if (scsi_prot_sg_count(sc)) { if (scsi_prot_sg_count(sc)) {
prot_buf->buf = scsi_prot_sglist(sc); prot_buf->sg = scsi_prot_sglist(sc);
prot_buf->size = scsi_prot_sg_count(sc); prot_buf->size = scsi_prot_sg_count(sc);
prot_buf->data_len = data_buf->data_len >> prot_buf->data_len = (data_buf->data_len >>
ilog2(sc->device->sector_size) * 8; ilog2(sc->device->sector_size)) * 8;
} }
if (hdr->flags & ISCSI_FLAG_CMD_READ) { if (hdr->flags & ISCSI_FLAG_CMD_READ) {
...@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn, ...@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iser_conn *iser_conn = conn->dd_data; struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data; struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL; struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf; struct iser_mem_reg *mem_reg;
unsigned long buf_offset; unsigned long buf_offset;
unsigned long data_seg_len; unsigned long data_seg_len;
uint32_t itt; uint32_t itt;
...@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn, ...@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
/* build the tx desc */ /* build the tx desc */
iser_initialize_task_headers(task, tx_desc); iser_initialize_task_headers(task, tx_desc);
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1]; tx_dsg = &tx_desc->tx_sg[1];
tx_dsg->addr = regd_buf->reg.va + buf_offset; tx_dsg->addr = mem_reg->sge.addr + buf_offset;
tx_dsg->length = data_seg_len; tx_dsg->length = data_seg_len;
tx_dsg->lkey = regd_buf->reg.lkey; tx_dsg->lkey = mem_reg->sge.lkey;
tx_desc->num_sge = 2; tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
...@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) ...@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0; iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0; iser_task->prot[ISER_DIR_OUT].data_len = 0;
memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
sizeof(struct iser_regd_buf)); sizeof(struct iser_mem_reg));
memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
sizeof(struct iser_regd_buf)); sizeof(struct iser_mem_reg));
} }
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
...@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) ...@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
/* if we were reading, copy back to unaligned sglist, /* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy * anyway dma_unmap and free the copy
*/ */
if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { if (iser_task->data[ISER_DIR_IN].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_IN], &iser_task->data[ISER_DIR_IN],
&iser_task->data_copy[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { if (iser_task->data[ISER_DIR_OUT].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_OUT], &iser_task->data[ISER_DIR_OUT],
&iser_task->data_copy[ISER_DIR_OUT],
ISER_DIR_OUT); ISER_DIR_OUT);
} }
if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { if (iser_task->prot[ISER_DIR_IN].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN], &iser_task->prot[ISER_DIR_IN],
&iser_task->prot_copy[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT], &iser_task->prot[ISER_DIR_OUT],
&iser_task->prot_copy[ISER_DIR_OUT],
ISER_DIR_OUT); ISER_DIR_OUT);
} }
......
This diff is collapsed.
...@@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn) ...@@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
ib_conn->fmr.page_vec = NULL; ib_conn->fmr.page_vec = NULL;
} }
static int
iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
struct fast_reg_descriptor *desc)
{
struct iser_pi_context *pi_ctx = NULL;
struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
.flags = IB_MR_SIGNATURE_EN};
int ret = 0;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx)
return -ENOMEM;
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
return ret;
}
static void
iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
{
ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
ib_dereg_mr(pi_ctx->prot_mr);
ib_destroy_mr(pi_ctx->sig_mr);
kfree(pi_ctx);
}
static int static int
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
bool pi_enable, struct fast_reg_descriptor *desc) bool pi_enable, struct fast_reg_descriptor *desc)
...@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, ...@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
desc->reg_indicators |= ISER_DATA_KEY_VALID; desc->reg_indicators |= ISER_DATA_KEY_VALID;
if (pi_enable) { if (pi_enable) {
struct ib_mr_init_attr mr_init_attr = {0}; ret = iser_alloc_pi_ctx(ib_device, pd, desc);
struct iser_pi_context *pi_ctx = NULL; if (ret)
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx) {
iser_err("Failed to allocate pi context\n");
ret = -ENOMEM;
goto pi_ctx_alloc_failure; goto pi_ctx_alloc_failure;
}
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
iser_err("Failed to allocate prot frpl ret=%d\n",
ret);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
iser_err("Failed to allocate prot frmr ret=%d\n",
ret);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
mr_init_attr.max_reg_descriptors = 2;
mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
iser_err("Failed to allocate signature enabled mr err=%d\n",
ret);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
} }
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
iser_dbg("Create fr_desc %p page_list %p\n",
desc, desc->data_frpl->page_list);
return 0; return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
pi_ctx_alloc_failure: pi_ctx_alloc_failure:
ib_dereg_mr(desc->data_mr); ib_dereg_mr(desc->data_mr);
fast_reg_mr_failure: fast_reg_mr_failure:
...@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) ...@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_del(&desc->list); list_del(&desc->list);
ib_free_fast_reg_page_list(desc->data_frpl); ib_free_fast_reg_page_list(desc->data_frpl);
ib_dereg_mr(desc->data_mr); ib_dereg_mr(desc->data_mr);
if (desc->pi_ctx) { if (desc->pi_ctx)
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); iser_free_pi_ctx(desc->pi_ctx);
ib_dereg_mr(desc->pi_ctx->prot_mr);
ib_destroy_mr(desc->pi_ctx->sig_mr);
kfree(desc->pi_ctx);
}
kfree(desc); kfree(desc);
++i; ++i;
} }
...@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id) ...@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
struct iser_conn *iser_conn; struct iser_conn *iser_conn;
iser_conn = (struct iser_conn *)cma_id->context; iser_conn = (struct iser_conn *)cma_id->context;
iser_conn->state = ISER_CONN_DOWN; iser_conn->state = ISER_CONN_TERMINATING;
} }
/** /**
...@@ -992,93 +1000,6 @@ int iser_connect(struct iser_conn *iser_conn, ...@@ -992,93 +1000,6 @@ int iser_connect(struct iser_conn *iser_conn,
return err; return err;
} }
/**
* iser_reg_page_vec - Register physical memory
*
* returns: 0 on success, errno code on failure
*/
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
struct ib_pool_fmr *mem;
u64 io_addr;
u64 *page_list;
int status;
page_list = page_vec->pages;
io_addr = page_list[0];
mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_list,
page_vec->length,
io_addr);
if (IS_ERR(mem)) {
status = (int)PTR_ERR(mem);
iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
return status;
}
mem_reg->lkey = mem->fmr->lkey;
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
mem_reg->len = page_vec->data_size;
iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
"entry[0]: (0x%08lx,%ld)] -> "
"[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
page_vec, page_vec->length,
(unsigned long)page_vec->pages[0],
(unsigned long)page_vec->data_size,
(unsigned int)mem_reg->lkey, mem_reg->mem_h,
(unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
return 0;
}
/**
* Unregister (previosuly registered using FMR) memory.
* If memory is non-FMR does nothing.
*/
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
if (ret)
iser_err("ib_fmr_pool_unmap failed %d\n", ret);
reg->mem_h = NULL;
}
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
struct iser_conn *iser_conn = iser_task->iser_conn;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!desc)
return;
reg->mem_h = NULL;
spin_lock_bh(&ib_conn->lock);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
}
int iser_post_recvl(struct iser_conn *iser_conn) int iser_post_recvl(struct iser_conn *iser_conn)
{ {
struct ib_recv_wr rx_wr, *rx_wr_failed; struct ib_recv_wr rx_wr, *rx_wr_failed;
...@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn, ...@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn, iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED); ISCSI_ERR_CONN_FAILED);
if (wc->wr_id == ISER_FASTREG_LI_WRID)
return;
if (is_iser_tx_desc(iser_conn, wr_id)) { if (is_iser_tx_desc(iser_conn, wr_id)) {
struct iser_tx_desc *desc = wr_id; struct iser_tx_desc *desc = wr_id;
...@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc) ...@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
else else
iser_dbg("flush error: wr id %llx\n", wc->wr_id); iser_dbg("flush error: wr id %llx\n", wc->wr_id);
if (wc->wr_id != ISER_FASTREG_LI_WRID &&
wc->wr_id != ISER_BEACON_WRID)
iser_handle_comp_error(ib_conn, wc);
/* complete in case all flush errors were consumed */
if (wc->wr_id == ISER_BEACON_WRID) if (wc->wr_id == ISER_BEACON_WRID)
/* all flush errors were consumed */
complete(&ib_conn->flush_comp); complete(&ib_conn->flush_comp);
else
iser_handle_comp_error(ib_conn, wc);
} }
} }
...@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context) ...@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector) enum iser_data_dir cmd_dir, sector_t *sector)
{ {
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = reg->mem_h; struct fast_reg_descriptor *desc = reg->mem_h;
unsigned long sector_size = iser_task->sc->device->sector_size; unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status; struct ib_mr_status mr_status;
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h> #include <linux/atomic.h>
...@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target, ...@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
if (!attr) if (!attr)
return -ENOMEM; return -ENOMEM;
ret = ib_find_pkey(target->srp_host->srp_dev->dev, ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port, target->srp_host->port,
be16_to_cpu(target->pkey), be16_to_cpu(target->pkey),
&attr->pkey_index); &attr->pkey_index);
if (ret) if (ret)
goto out; goto out;
......
This diff is collapsed.
...@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, ...@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
return err; return err;
} }
if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
/* compute slave's gid block */ __be64 guid = mlx4_get_admin_guid(dev, slave,
smp->attr_mod = cpu_to_be32(slave / 8); port);
/* execute cmd */
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, /* set the PF admin guid to the FW/HW burned
vhcr->in_modifier, opcode_modifier, * GUID, if it wasn't yet set
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); */
if (!err) { if (slave == 0 && guid == 0) {
/* if needed, move slave gid to index 0 */ smp->attr_mod = 0;
if (slave % 8) err = mlx4_cmd_box(dev,
memcpy(outsmp->data, inbox->dma,
outsmp->data + (slave % 8) * 8, 8); outbox->dma,
/* delete all other gids */ vhcr->in_modifier,
memset(outsmp->data + 8, 0, 56); opcode_modifier,
vhcr->op,
MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_NATIVE);
if (err)
return err;
mlx4_set_admin_guid(dev,
*(__be64 *)outsmp->
data, slave, port);
} else {
memcpy(outsmp->data, &guid, 8);
} }
return err;
/* clean all other gids */
memset(outsmp->data + 8, 0, 56);
return 0;
} }
if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
...@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) ...@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].vlan_idx = NO_INDX;
vf_oper->vport[port].mac_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX;
mlx4_set_random_admin_guid(dev, i, port);
} }
spin_lock_init(&s_state->lock); spin_lock_init(&s_state->lock);
} }
......
...@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) ...@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1; priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
} }
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
flr_slave);
queue_work(priv->mfunc.master.comm_wq, queue_work(priv->mfunc.master.comm_wq,
&priv->mfunc.master.slave_flr_event_work); &priv->mfunc.master.slave_flr_event_work);
break; break;
......
...@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx) ...@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
} }
EXPORT_SYMBOL_GPL(mlx4_counter_free); EXPORT_SYMBOL_GPL(mlx4_counter_free);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return priv->mfunc.master.vf_admin[entry].vport[port].guid;
}
EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
__be64 guid;
/* hw GUID */
if (entry == 0)
return;
get_random_bytes((char *)&guid, sizeof(guid));
guid &= ~(cpu_to_be64(1ULL << 56));
guid |= cpu_to_be64(1ULL << 57);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
static int mlx4_setup_hca(struct mlx4_dev *dev) static int mlx4_setup_hca(struct mlx4_dev *dev)
{ {
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
......
...@@ -499,6 +499,7 @@ struct mlx4_vport_state { ...@@ -499,6 +499,7 @@ struct mlx4_vport_state {
bool spoofchk; bool spoofchk;
u32 link_state; u32 link_state;
u8 qos_vport; u8 qos_vport;
__be64 guid;
}; };
struct mlx4_vf_admin_state { struct mlx4_vf_admin_state {
......
...@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) ...@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
return 0; return 0;
} }
#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
static void free_4k(struct mlx5_core_dev *dev, u64 addr) static void free_4k(struct mlx5_core_dev *dev, u64 addr)
{ {
struct fw_page *fwp; struct fw_page *fwp;
int n; int n;
fwp = find_fw_page(dev, addr & PAGE_MASK); fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
if (!fwp) { if (!fwp) {
mlx5_core_warn(dev, "page not found\n"); mlx5_core_warn(dev, "page not found\n");
return; return;
} }
n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++; fwp->free_count++;
set_bit(n, &fwp->bitmask); set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root); rb_erase(&fwp->rb_node, &dev->priv.page_root);
if (fwp->free_count != 1) if (fwp->free_count != 1)
list_del(&fwp->list); list_del(&fwp->list);
dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE, dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
DMA_BIDIRECTIONAL); PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page); __free_page(fwp->page);
kfree(fwp); kfree(fwp);
} else if (fwp->free_count == 1) { } else if (fwp->free_count == 1) {
......
...@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); ...@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
int port);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
int mlx4_flow_attach(struct mlx4_dev *dev, int mlx4_flow_attach(struct mlx4_dev *dev,
struct mlx4_net_trans_rule *rule, u64 *reg_id); struct mlx4_net_trans_rule *rule, u64 *reg_id);
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment