Commit 93fd8eb0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "This is an unusually large bunch of bug fixes for the later rc cycle,
  rxe and mlx5 both dumped a lot of things at once. rxe continues to fix
  itself, and mlx5 is fixing a bunch of "queue counters" related bugs.

  There is one highly notable bug fix regarding the qkey. This small
  security check was missed in the original 2005 implementation and it
  allows some significant issues.

  Summary:

   - Two rtrs bug fixes for error unwind bugs

   - Several rxe bug fixes:
      * Incorrect Rx packet validation
      * Using memory without a refcount
      * Syzkaller found use before initialization
      * Regression fix for missing locking with the tasklet conversion
        from this merge window

   - Have bnxt report the correct link properties to userspace, this was
     a regression in v6.3

   - Several mlx5 bug fixes:
      * Kernel crash triggerable by userspace for the RAW ethernet
        profile
      * Defend against steering refcounting issues created by userspace
      * Incorrect change of QP port affinity parameters in some LAG
        configurations

   - Fix mlx5 Q counters:
      * Do not over allocate Q counters to allow userspace to use the
        full port capacity
      * Kernel crash triggered by eswitch due to mis-use of Q counters
      * Incorrect mlx5_device for Q counters in some LAG configurations

   - Properly implement the IBA spec restricting privileged qkeys to
     root

   - Always an error when reading from a disassociated device's event
     queue

   - isert bug fixes:
      * Avoid a deadlock with the CM handler and CM ID destruction
      * Correct list corruption due to incorrect locking
      * Fix a use after free around connection tear down"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/rxe: Fix rxe_cq_post
  IB/isert: Fix incorrect release of isert connection
  IB/isert: Fix possible list corruption in CMA handler
  IB/isert: Fix dead lock in ib_isert
  RDMA/mlx5: Fix affinity assignment
  IB/uverbs: Fix to consider event queue closing also upon non-blocking mode
  RDMA/uverbs: Restrict usage of privileged QKEYs
  RDMA/cma: Always set static rate to 0 for RoCE
  RDMA/mlx5: Fix Q-counters query in LAG mode
  RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters
  RDMA/mlx5: Fix Q-counters per vport allocation
  RDMA/mlx5: Create an indirect flow table for steering anchor
  RDMA/mlx5: Initiate dropless RQ for RAW Ethernet functions
  RDMA/rxe: Fix the use-before-initialization error of resp_pkts
  RDMA/bnxt_re: Fix reporting active_{speed,width} attributes
  RDMA/rxe: Fix ref count error in check_rkey()
  RDMA/rxe: Fix packet length checks
  RDMA/rtrs: Fix rxe_dealloc_pd warning
  RDMA/rtrs: Fix the last iu->buf leak in err path
parents b7feaa49 0c7e314a
......@@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
......@@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!ndev)
return -ENODEV;
ib.rec.rate = iboe_get_rate(ndev);
ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
......
......@@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
if (cmd->base.attr_mask & IB_QP_QKEY)
if (cmd->base.attr_mask & IB_QP_QKEY) {
if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
ret = -EPERM;
goto release_qp;
}
attr->qkey = cmd->base.qkey;
}
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
......
......@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
spin_unlock_irq(&ev_queue->lock);
if (ev_queue->is_closed) {
spin_unlock_irq(&ev_queue->lock);
return -EIO;
}
spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
......@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
return -ERESTARTSYS;
spin_lock_irq(&ev_queue->lock);
/* If device was disassociated and no event exists set an error */
if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
spin_unlock_irq(&ev_queue->lock);
return -EIO;
}
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
......
......@@ -135,8 +135,6 @@ struct bnxt_re_dev {
struct delayed_work worker;
u8 cur_prio_map;
u16 active_speed;
u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
......
......@@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
int rc;
memset(port_attr, 0, sizeof(*port_attr));
......@@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
port_attr->active_speed = rdev->active_speed;
port_attr->active_width = rdev->active_width;
rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
&port_attr->active_width);
return 0;
return rc;
}
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
......
......@@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
......
......@@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;
return &dev->port[port_num - 1].cnts;
return is_mdev_switchdev_mode(dev->mdev) ?
&dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}
/**
......@@ -262,7 +263,7 @@ static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return do_alloc_stats(cnts);
}
......@@ -329,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;
......@@ -336,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;
mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
if (!mdev)
return -EOPNOTSUPP;
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
......@@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
int j = 0, i;
int j = 0, i, size;
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = basic_q_cnts[i].offset;
offsets[j] = names[i].offset;
}
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = out_of_seq_q_cnts[i].offset;
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = retrans_q_cnts[i].offset;
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = extended_err_cnts[i].offset;
offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
offsets[j] = roce_accl_cnts[i].offset;
offsets[j] = names[i].offset;
}
}
......@@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
u32 num_counters, num_op_counters = 0;
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
u32 num_counters, num_op_counters = 0, size;
num_counters = ARRAY_SIZE(basic_q_cnts);
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
ARRAY_SIZE(basic_q_cnts);
num_counters = size;
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
num_counters += size;
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
num_counters += size;
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
num_counters += ARRAY_SIZE(extended_err_cnts);
num_counters += size;
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
num_counters += ARRAY_SIZE(roce_accl_cnts);
num_counters += size;
cnts->num_q_counters = num_counters;
if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
if (is_vport)
goto skip_non_qcounters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
......@@ -725,11 +753,11 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports;
int num_cnt_ports = dev->num_ports;
int i, j;
num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
vport_qcounters_supported(dev)) ? dev->num_ports : 1;
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
......@@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
int num_cnt_ports;
int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
vport_qcounters_supported(dev)) ? dev->num_ports : 1;
/*
* In switchdev we need to allocate two ports, one that is used for
* the device Q_counters and it is essentially the real Q_counters of
* this device, while the other is used as a helper for PF to be able to
* query all other vports.
*/
if (is_mdev_switchdev_mode(dev->mdev))
num_cnt_ports = min(2, num_cnt_ports);
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
......
......@@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
......@@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
enum mlx5_flow_namespace_type ns_type)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
if (ft_prio->anchor.ft)
return 0;
ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
if (!ns)
return -EOPNOTSUPP;
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = 0;
ft_attr.max_fte = 2;
ft_attr.level = 1;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
return PTR_ERR(ft);
ft_prio->anchor.ft = ft;
return 0;
}
static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
{
if (ft_prio->anchor.ft) {
mlx5_destroy_flow_table(ft_prio->anchor.ft);
ft_prio->anchor.ft = NULL;
}
}
static int
steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
void *flow_group_in;
int err = 0;
if (ft_prio->anchor.fg_drop)
return 0;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
if (IS_ERR(fg)) {
err = PTR_ERR(fg);
goto out;
}
ft_prio->anchor.fg_drop = fg;
out:
kvfree(flow_group_in);
return err;
}
static void
steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
{
if (ft_prio->anchor.fg_drop) {
mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
ft_prio->anchor.fg_drop = NULL;
}
}
static int
steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_group *fg;
void *flow_group_in;
int err = 0;
if (ft_prio->anchor.fg_goto_table)
return 0;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
if (IS_ERR(fg)) {
err = PTR_ERR(fg);
goto out;
}
ft_prio->anchor.fg_goto_table = fg;
out:
kvfree(flow_group_in);
return err;
}
static void
steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
{
if (ft_prio->anchor.fg_goto_table) {
mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
ft_prio->anchor.fg_goto_table = NULL;
}
}
static int
steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
{
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *handle;
if (ft_prio->anchor.rule_drop)
return 0;
flow_act.fg = ft_prio->anchor.fg_drop;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
NULL, 0);
if (IS_ERR(handle))
return PTR_ERR(handle);
ft_prio->anchor.rule_drop = handle;
return 0;
}
static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
{
if (ft_prio->anchor.rule_drop) {
mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
ft_prio->anchor.rule_drop = NULL;
}
}
static int
steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
{
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *handle;
if (ft_prio->anchor.rule_goto_table)
return 0;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
flow_act.fg = ft_prio->anchor.fg_goto_table;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = ft_prio->flow_table;
handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
&dest, 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
ft_prio->anchor.rule_goto_table = handle;
return 0;
}
static void
steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
{
if (ft_prio->anchor.rule_goto_table) {
mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
ft_prio->anchor.rule_goto_table = NULL;
}
}
static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
enum mlx5_flow_namespace_type ns_type)
{
int err;
err = steering_anchor_create_ft(dev, ft_prio, ns_type);
if (err)
return err;
err = steering_anchor_create_fg_drop(ft_prio);
if (err)
goto destroy_ft;
err = steering_anchor_create_fg_goto_table(ft_prio);
if (err)
goto destroy_fg_drop;
err = steering_anchor_create_rule_drop(ft_prio);
if (err)
goto destroy_fg_goto_table;
err = steering_anchor_create_rule_goto_table(ft_prio);
if (err)
goto destroy_rule_drop;
return 0;
destroy_rule_drop:
steering_anchor_destroy_rule_drop(ft_prio);
destroy_fg_goto_table:
steering_anchor_destroy_fg_goto_table(ft_prio);
destroy_fg_drop:
steering_anchor_destroy_fg_drop(ft_prio);
destroy_ft:
steering_anchor_destroy_ft(ft_prio);
return err;
}
static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
{
steering_anchor_destroy_rule_goto_table(ft_prio);
steering_anchor_destroy_rule_drop(ft_prio);
steering_anchor_destroy_fg_goto_table(ft_prio);
steering_anchor_destroy_fg_drop(ft_prio);
steering_anchor_destroy_ft(ft_prio);
}
static int steering_anchor_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
......@@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return -EBUSY;
mutex_lock(&obj->dev->flow_db->lock);
if (!--obj->ft_prio->anchor.rule_goto_table_ref)
steering_anchor_destroy_rule_goto_table(obj->ft_prio);
put_flow_table(obj->dev, obj->ft_prio, true);
mutex_unlock(&obj->dev->flow_db->lock);
......@@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return 0;
}
static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
int count)
{
while (count--)
mlx5_steering_anchor_destroy_res(&prio[count]);
}
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
{
fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
}
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
......@@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return -ENOMEM;
mutex_lock(&dev->flow_db->lock);
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
if (IS_ERR(ft_prio)) {
mutex_unlock(&dev->flow_db->lock);
err = PTR_ERR(ft_prio);
goto free_obj;
}
ft_prio->refcount++;
ft_id = mlx5_flow_table_id(ft_prio->flow_table);
mutex_unlock(&dev->flow_db->lock);
if (!ft_prio->anchor.rule_goto_table_ref) {
err = steering_anchor_create_res(dev, ft_prio, ns_type);
if (err)
goto put_flow_table;
}
ft_prio->anchor.rule_goto_table_ref++;
ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
&ft_id, sizeof(ft_id));
if (err)
goto put_flow_table;
goto destroy_res;
mutex_unlock(&dev->flow_db->lock);
uobj->object = obj;
obj->dev = dev;
......@@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return 0;
destroy_res:
--ft_prio->anchor.rule_goto_table_ref;
mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
mutex_lock(&dev->flow_db->lock);
put_flow_table(dev, ft_prio, true);
mutex_unlock(&dev->flow_db->lock);
free_obj:
......
......@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
#else
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
......@@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
return 0;
}
inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
#endif
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
/* When a steering anchor is created, a special flow table is also
* created for the user to reference. Since the user can reference it,
* the kernel cannot trust that when the user destroys the steering
* anchor, they no longer reference the flow table.
*
* To address this issue, when a user destroys a steering anchor, only
* the flow steering rule in the table is destroyed, but the table
* itself is kept to deal with the above scenario. The remaining
* resources are only removed when the RDMA device is destroyed, which
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
......@@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
mlx5_ib_restrack_init,
NULL),
......
......@@ -237,8 +237,19 @@ enum {
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
struct mlx5_ib_anchor {
struct mlx5_flow_table *ft;
struct mlx5_flow_group *fg_goto_table;
struct mlx5_flow_group *fg_drop;
struct mlx5_flow_handle *rule_goto_table;
struct mlx5_flow_handle *rule_drop;
unsigned int rule_goto_table_ref;
};
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
......@@ -1587,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
return 0;
if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
return 0;
return dev->lag_active ||
(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
......
......@@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
if (!mlx5_ib_lag_should_assign_affinity(dev) &&
mlx5_lag_is_lacp_owner(dev->mdev))
MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
......
......@@ -113,8 +113,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
spin_unlock_irqrestore(&cq->cq_lock, flags);
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
......@@ -122,6 +120,8 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
spin_unlock_irqrestore(&cq->cq_lock, flags);
return 0;
}
......
......@@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
/* remove udp header */
skb_pull(skb, sizeof(struct udphdr));
rxe_rcv(skb);
return 0;
......@@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
return -EIO;
}
/* remove udp header */
skb_pull(skb, sizeof(struct udphdr));
rxe_rcv(skb);
return 0;
......
......@@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
skb_queue_head_init(&qp->req_pkts);
skb_queue_head_init(&qp->resp_pkts);
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
......@@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
skb_queue_head_init(&qp->req_pkts);
rxe_init_task(&qp->req.task, qp, rxe_requester);
rxe_init_task(&qp->comp.task, qp, rxe_completer);
......@@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
}
}
skb_queue_head_init(&qp->resp_pkts);
rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
......
......@@ -489,8 +489,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
rxe_put(mw);
rxe_get(mr);
rxe_put(mw);
mw = NULL;
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
......
......@@ -657,9 +657,13 @@ static int
isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
struct isert_np *isert_np = cma_id->context;
ib_drain_qp(isert_conn->qp);
mutex_lock(&isert_np->mutex);
list_del_init(&isert_conn->node);
mutex_unlock(&isert_np->mutex);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
......@@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np)
{
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
LIST_HEAD(drop_conn_list);
if (isert_np->cm_id)
rdma_destroy_id(isert_np->cm_id);
......@@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
isert_connect_release(isert_conn);
list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
......@@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
isert_connect_release(isert_conn);
list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
mutex_unlock(&isert_np->mutex);
list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) {
list_del_init(&isert_conn->node);
isert_connect_release(isert_conn);
}
np->np_context = NULL;
kfree(isert_np);
}
......@@ -2560,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn)
isert_put_unsol_pending_cmds(conn);
isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work);
}
static void isert_free_conn(struct iscsit_conn *conn)
......
......@@ -2040,6 +2040,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
return 0;
}
/* The caller should do the cleanup in case of error */
static int create_cm(struct rtrs_clt_con *con)
{
struct rtrs_path *s = con->c.path;
......@@ -2062,14 +2063,14 @@ static int create_cm(struct rtrs_clt_con *con)
err = rdma_set_reuseaddr(cm_id, 1);
if (err != 0) {
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
goto destroy_cm;
return err;
}
err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
(struct sockaddr *)&clt_path->s.dst_addr,
RTRS_CONNECT_TIMEOUT_MS);
if (err) {
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
goto destroy_cm;
return err;
}
/*
* Combine connection status and session events. This is needed
......@@ -2084,29 +2085,15 @@ static int create_cm(struct rtrs_clt_con *con)
if (err == 0)
err = -ETIMEDOUT;
/* Timedout or interrupted */
goto errr;
}
if (con->cm_err < 0) {
err = con->cm_err;
goto errr;
return err;
}
if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) {
if (con->cm_err < 0)
return con->cm_err;
if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING)
/* Device removal */
err = -ECONNABORTED;
goto errr;
}
return -ECONNABORTED;
return 0;
errr:
stop_cm(con);
mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
mutex_unlock(&con->con_mutex);
destroy_cm:
destroy_cm(con);
return err;
}
static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
......@@ -2334,7 +2321,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
static int init_conns(struct rtrs_clt_path *clt_path)
{
unsigned int cid;
int err;
int err, i;
/*
* On every new session connections increase reconnect counter
......@@ -2350,10 +2337,8 @@ static int init_conns(struct rtrs_clt_path *clt_path)
goto destroy;
err = create_cm(to_clt_con(clt_path->s.con[cid]));
if (err) {
destroy_con(to_clt_con(clt_path->s.con[cid]));
if (err)
goto destroy;
}
}
err = alloc_path_reqs(clt_path);
if (err)
......@@ -2364,15 +2349,21 @@ static int init_conns(struct rtrs_clt_path *clt_path)
return 0;
destroy:
while (cid--) {
struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]);
/* Make sure we do the cleanup in the order they are created */
for (i = 0; i <= cid; i++) {
struct rtrs_clt_con *con;
stop_cm(con);
if (!clt_path->s.con[i])
break;
mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
mutex_unlock(&con->con_mutex);
destroy_cm(con);
con = to_clt_con(clt_path->s.con[i]);
if (con->c.cm_id) {
stop_cm(con);
mutex_lock(&con->con_mutex);
destroy_con_cq_qp(con);
mutex_unlock(&con->con_mutex);
destroy_cm(con);
}
destroy_con(con);
}
/*
......
......@@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
goto err;
iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) {
kfree(iu->buf);
goto err;
}
iu->cqe.done = done;
iu->size = size;
......
......@@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return pci_num_vf(dev->pdev) ? true : false;
}
static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
{
/* LACP owner conditions:
* 1) Function is physical.
* 2) LAG is supported by FW.
* 3) LAG is managed by driver (currently the only option).
*/
return MLX5_CAP_GEN(dev, vport_group_manager) &&
(MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
MLX5_CAP_GEN(dev, lag_master);
}
int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev);
static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
{
......
......@@ -1238,6 +1238,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev)
return dev->priv.sriov.max_vfs;
}
static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
{
/* LACP owner conditions:
* 1) Function is physical.
* 2) LAG is supported by FW.
* 3) LAG is managed by driver (currently the only option).
*/
return MLX5_CAP_GEN(dev, vport_group_manager) &&
(MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
MLX5_CAP_GEN(dev, lag_master);
}
static inline int mlx5_get_gid_table_len(u16 param)
{
if (param > 4) {
......
......@@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
return 0;
}
static inline int iboe_get_rate(struct net_device *dev)
{
struct ethtool_link_ksettings cmd;
int err;
rtnl_lock();
err = __ethtool_get_link_ksettings(dev, &cmd);
rtnl_unlock();
if (err)
return IB_RATE_PORT_CURRENT;
if (cmd.base.speed >= 40000)
return IB_RATE_40_GBPS;
else if (cmd.base.speed >= 30000)
return IB_RATE_30_GBPS;
else if (cmd.base.speed >= 20000)
return IB_RATE_20_GBPS;
else if (cmd.base.speed >= 10000)
return IB_RATE_10_GBPS;
else
return IB_RATE_PORT_CURRENT;
}
static inline int rdma_link_local_addr(struct in6_addr *addr)
{
if (addr->s6_addr32[0] == htonl(0xfe800000) &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment