Commit a0354d23 authored by Jason Gunthorpe's avatar Jason Gunthorpe

Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

Saeed Mahameed says:
====================

This pr contains changes from  mlx5-next branch,
already reviewed on netdev and rdma mailing lists, links below.

1) From Leon, Dynamically assign MSI-X vectors count
Already Acked by Bjorn Helgaas.
https://patchwork.kernel.org/project/netdevbpf/cover/20210314124256.70253-1-leon@kernel.org/

2) Cleanup series:
https://patchwork.kernel.org/project/netdevbpf/cover/20210311070915.321814-1-saeed@kernel.org/

From Mark, E-Switch cleanups and refactoring, and the addition
of single FDB mode needed HW bits.

From Mikhael, Remove unused struct field

From Saeed, Cleanup W=1 prototype warning

From Zheng, Esw related cleanup

From Tariq, User order-0 page allocation for EQs

====================

* mlx5-next:
  net/mlx5: Implement sriov_get_vf_total_msix/count() callbacks
  net/mlx5: Dynamically assign MSI-X vectors count
  net/mlx5: Add dynamic MSI-X capabilities bits
  PCI/IOV: Add sysfs MSI-X vector assignment interface
  net/mlx5: Use order-0 allocations for EQs
  net/mlx5: Add IFC bits needed for single FDB mode
  net/mlx5: E-Switch, Refactor send to vport to be more generic
  RDMA/mlx5: Use representor E-Switch when getting netdev and metadata
  net/mlx5: E-Switch, Add eswitch pointer to each representor
  net/mlx5: E-Switch, Add match on vhca id to default send rules
  net/mlx5: Remove unused mlx5_core_health member recover_work
  net/mlx5: simplify the return expression of mlx5_esw_offloads_pair()
  net/mlx5: Cleanup prototype warning
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parents 69455df0 e71b75f7
......@@ -375,3 +375,32 @@ Description:
The value comes from the PCI kernel device state and can be one
of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
The file is read only.
What: /sys/bus/pci/devices/.../sriov_vf_total_msix
Date: January 2021
Contact: Leon Romanovsky <leonro@nvidia.com>
Description:
This file is associated with a SR-IOV physical function (PF).
It contains the total number of MSI-X vectors available for
assignment to all virtual functions (VFs) associated with PF.
The value will be zero if the device doesn't support this
functionality. For supported devices, the value will be
constant and won't be changed after MSI-X vectors assignment.
What: /sys/bus/pci/devices/.../sriov_vf_msix_count
Date: January 2021
Contact: Leon Romanovsky <leonro@nvidia.com>
Description:
This file is associated with a SR-IOV virtual function (VF).
It allows configuration of the number of MSI-X vectors for
the VF. This allows devices that have a global pool of MSI-X
vectors to optimally divide them between VFs based on VF usage.
The values accepted are:
* > 0 - this number will be reported as the Table Size in the
VF's MSI-X capability
* < 0 - not valid
* = 0 - will reset to the device default value
The file is writable if the PF is bound to a driver that
implements ->sriov_set_msix_vec_count().
......@@ -879,7 +879,7 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_for_match(esw,
mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
rep->vport));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
......
......@@ -20,7 +20,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
rep->rep_data[REP_IB].priv = ibdev;
write_lock(&ibdev->port[vport_index].roce.netdev_lock);
ibdev->port[vport_index].roce.netdev =
mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
return 0;
......@@ -123,8 +123,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
rep = dev->port[port - 1].rep;
return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport,
sq->base.mqp.qpn);
return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
}
static int mlx5r_rep_probe(struct auxiliary_device *adev,
......
......@@ -127,7 +127,6 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev,
u32 *port_num)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct net_device *rep_ndev;
struct mlx5_ib_port *port;
int i;
......@@ -138,7 +137,7 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
continue;
read_lock(&port->roce.netdev_lock);
rep_ndev = mlx5_ib_get_rep_netdev(esw,
rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
port->rep->vport);
if (rep_ndev == ndev) {
read_unlock(&port->roce.netdev_lock);
......
......@@ -128,7 +128,7 @@ int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg
if (err)
return err;
err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent);
err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
if (err)
return err;
......
......@@ -411,8 +411,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
}
/* Add re-inject rule to the PF/representor sqs */
flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
rep->vport,
flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
......
......@@ -271,7 +271,7 @@ static void init_eq_buf(struct mlx5_eq *eq)
struct mlx5_eqe *eqe;
int i;
for (i = 0; i < eq->nent; i++) {
for (i = 0; i < eq_get_size(eq); i++) {
eqe = get_eqe(eq, i);
eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
}
......@@ -281,8 +281,10 @@ static int
create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
struct mlx5_eq_param *param)
{
u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
struct mlx5_cq_table *cq_table = &eq->cq_table;
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
struct mlx5_priv *priv = &dev->priv;
u8 vecidx = param->irq_index;
__be64 *pas;
......@@ -297,16 +299,18 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
eq->cons_index = 0;
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
&eq->frag_buf, dev->priv.numa_node);
if (err)
return err;
mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
......@@ -315,7 +319,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
}
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
mlx5_fill_page_array(&eq->buf, pas);
mlx5_fill_page_frag_array(&eq->frag_buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
......@@ -326,11 +330,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
MLX5_SET(eqc, eqc, intr, vecidx);
MLX5_SET(eqc, eqc, log_page_size,
eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
if (err)
......@@ -356,7 +360,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
kvfree(in);
err_buf:
mlx5_buf_free(dev, &eq->buf);
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
}
......@@ -413,7 +417,7 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
eq->eqn);
synchronize_irq(eq->irqn);
mlx5_buf_free(dev, &eq->buf);
mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
}
......@@ -764,10 +768,11 @@ EXPORT_SYMBOL(mlx5_eq_destroy_generic);
struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
{
u32 ci = eq->cons_index + cc;
u32 nent = eq_get_size(eq);
struct mlx5_eqe *eqe;
eqe = get_eqe(eq, ci & (eq->nent - 1));
eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
eqe = get_eqe(eq, ci & (nent - 1));
eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
/* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
......
......@@ -1036,7 +1036,8 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
}
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
struct mlx5_eswitch_rep *rep,
u32 sqn)
{
struct mlx5_flow_act flow_act = {0};
......@@ -1054,21 +1055,30 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
/* source vport is the esw manager */
MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
MLX5_CAP_GEN(rep->esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
MLX5_SET_TO_ONES(fte_match_set_misc, misc,
source_eswitch_owner_vhca_id);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport.num = vport;
dest.vport.num = rep->vport;
dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
PTR_ERR(flow_rule));
out:
kvfree(spec);
return flow_rule;
......@@ -1702,6 +1712,12 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
misc_parameters.source_eswitch_owner_vhca_id);
MLX5_SET(create_flow_group_in, flow_group_in,
source_eswitch_owner_vhca_id_valid, 1);
}
ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
......@@ -2288,13 +2304,8 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw)
{
int err;
err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
if (err)
return err;
return 0;
return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
}
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
......@@ -3146,6 +3157,7 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
esw->offloads.rep_ops[rep_type] = ops;
mlx5_esw_for_all_reps(esw, i, rep) {
if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
rep->esw = esw;
rep_data = &rep->rep_data[rep_type];
atomic_set(&rep_data->state, REP_REGISTERED);
}
......
......@@ -35,7 +35,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
}
/**
* Set lag port affinity
* mlx5_lag_set_port_affinity
*
* @ldev: lag device
* @port:
......
......@@ -22,15 +22,15 @@ struct mlx5_cq_table {
};
struct mlx5_eq {
struct mlx5_frag_buf_ctrl fbc;
struct mlx5_frag_buf frag_buf;
struct mlx5_core_dev *dev;
struct mlx5_cq_table cq_table;
__be32 __iomem *doorbell;
u32 cons_index;
struct mlx5_frag_buf buf;
unsigned int vecidx;
unsigned int irqn;
u8 eqn;
int nent;
struct mlx5_rsc_debug *dbg;
};
......@@ -47,16 +47,21 @@ struct mlx5_eq_comp {
struct list_head list;
};
static inline u32 eq_get_size(struct mlx5_eq *eq)
{
return eq->fbc.sz_m1 + 1;
}
static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
{
return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
return mlx5_frag_buf_get_wqe(&eq->fbc, entry);
}
static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
{
struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1);
return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe;
}
static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
......
......@@ -571,6 +571,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
mlx5_vhca_state_cap_handle(dev, set_hca_cap);
if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
}
......@@ -1696,6 +1700,8 @@ static struct pci_driver mlx5_core_driver = {
.shutdown = shutdown,
.err_handler = &mlx5_err_handler,
.sriov_configure = mlx5_core_sriov_configure,
.sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
};
static void mlx5_core_verify_params(void)
......
......@@ -140,6 +140,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
......@@ -174,6 +175,11 @@ int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
struct notifier_block *nb);
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int msix_vec_count);
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct cpumask *
mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
......@@ -273,4 +279,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
}
#endif /* __MLX5_CORE_H__ */
......@@ -61,6 +61,79 @@ static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
return &irq_table->irq[vecidx];
}
/**
* mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
* to be ssigned to each VF.
* @dev: PF to work on
* @num_vfs: Number of enabled VFs
*/
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
{
int num_vf_msix, min_msix, max_msix;
num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
if (!num_vf_msix)
return 0;
min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
/* Limit maximum number of MSI-X vectors so the default configuration
* has some available in the pool. This will allow the user to increase
* the number of vectors in a VF without having to first size-down other
* VFs.
*/
return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
}
/**
* mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
* @dev: PF to work on
* @function_id: Internal PCI VF function IDd
* @msix_vec_count: Number of MSI-X vectors to set
*/
int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
int msix_vec_count)
{
int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
int num_vf_msix, min_msix, max_msix;
void *hca_cap, *cap;
int ret;
num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
if (!num_vf_msix)
return 0;
if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
return -EOPNOTSUPP;
min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
if (msix_vec_count < min_msix)
return -EINVAL;
if (msix_vec_count > max_msix)
return -EOVERFLOW;
hca_cap = kzalloc(sz, GFP_KERNEL);
if (!hca_cap)
return -ENOMEM;
cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
kfree(hca_cap);
return ret;
}
int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
struct notifier_block *nb)
{
......
......@@ -71,8 +71,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int vf;
int err, vf, num_msix_count;
if (!MLX5_ESWITCH_MANAGER(dev))
goto enable_vfs_hca;
......@@ -85,12 +84,22 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
}
enable_vfs_hca:
num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
for (vf = 0; vf < num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
continue;
}
err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count);
if (err) {
mlx5_core_warn(dev,
"failed to set MSI-X vector counts VF %d, err %d\n",
vf, err);
continue;
}
sriov->vfs_ctx[vf].enabled = 1;
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
err = sriov_restore_guids(dev, vf);
......@@ -178,6 +187,41 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
return err ? err : num_vfs;
}
int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
{
struct pci_dev *pf = pci_physfn(vf);
struct mlx5_core_sriov *sriov;
struct mlx5_core_dev *dev;
int num_vf_msix, id;
dev = pci_get_drvdata(pf);
num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
if (!num_vf_msix)
return -EOPNOTSUPP;
if (!msix_vec_count)
msix_vec_count =
mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
sriov = &dev->priv.sriov;
/* Reversed translation of PCI VF function number to the internal
* function_id, which exists in the name of virtfn symlink.
*/
for (id = 0; id < pci_num_vf(pf); id++) {
if (!sriov->vfs_ctx[id].enabled)
continue;
if (vf->devfn == pci_iov_virtfn_devfn(pf, id))
break;
}
if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled)
return -EINVAL;
return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
}
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
......
......@@ -34,11 +34,6 @@
#include "wq.h"
#include "mlx5_core.h"
static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
{
return ((u32)1 << log_sz) << log_stride;
}
int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl)
......
......@@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
return (dev->devfn + dev->sriov->offset +
dev->sriov->stride * vf_id) & 0xff;
}
EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
/*
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
......@@ -157,6 +158,92 @@ int pci_iov_sysfs_link(struct pci_dev *dev,
return rc;
}
#ifdef CONFIG_PCI_MSI
static ssize_t sriov_vf_total_msix_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
u32 vf_total_msix = 0;
device_lock(dev);
if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
goto unlock;
vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
unlock:
device_unlock(dev);
return sysfs_emit(buf, "%u\n", vf_total_msix);
}
static DEVICE_ATTR_RO(sriov_vf_total_msix);
static ssize_t sriov_vf_msix_count_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct pci_dev *vf_dev = to_pci_dev(dev);
struct pci_dev *pdev = pci_physfn(vf_dev);
int val, ret;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
if (val < 0)
return -EINVAL;
device_lock(&pdev->dev);
if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
ret = -EOPNOTSUPP;
goto err_pdev;
}
device_lock(&vf_dev->dev);
if (vf_dev->driver) {
/*
* A driver is already attached to this VF and has configured
* itself based on the current MSI-X vector count. Changing
* the vector size could mess up the driver, so block it.
*/
ret = -EBUSY;
goto err_dev;
}
ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
err_dev:
device_unlock(&vf_dev->dev);
err_pdev:
device_unlock(&pdev->dev);
return ret ? : count;
}
static DEVICE_ATTR_WO(sriov_vf_msix_count);
#endif
static struct attribute *sriov_vf_dev_attrs[] = {
#ifdef CONFIG_PCI_MSI
&dev_attr_sriov_vf_msix_count.attr,
#endif
NULL,
};
static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
struct pci_dev *pdev = to_pci_dev(dev);
if (!pdev->is_virtfn)
return 0;
return a->mode;
}
const struct attribute_group sriov_vf_dev_attr_group = {
.attrs = sriov_vf_dev_attrs,
.is_visible = sriov_vf_attrs_are_visible,
};
int pci_iov_add_virtfn(struct pci_dev *dev, int id)
{
int i;
......@@ -400,18 +487,21 @@ static DEVICE_ATTR_RO(sriov_stride);
static DEVICE_ATTR_RO(sriov_vf_device);
static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
static struct attribute *sriov_dev_attrs[] = {
static struct attribute *sriov_pf_dev_attrs[] = {
&dev_attr_sriov_totalvfs.attr,
&dev_attr_sriov_numvfs.attr,
&dev_attr_sriov_offset.attr,
&dev_attr_sriov_stride.attr,
&dev_attr_sriov_vf_device.attr,
&dev_attr_sriov_drivers_autoprobe.attr,
#ifdef CONFIG_PCI_MSI
&dev_attr_sriov_vf_total_msix.attr,
#endif
NULL,
};
static umode_t sriov_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
......@@ -421,9 +511,9 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
const struct attribute_group sriov_dev_attr_group = {
.attrs = sriov_dev_attrs,
.is_visible = sriov_attrs_are_visible,
const struct attribute_group sriov_pf_dev_attr_group = {
.attrs = sriov_pf_dev_attrs,
.is_visible = sriov_pf_attrs_are_visible,
};
int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
......
......@@ -1567,7 +1567,8 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
&pci_dev_attr_group,
&pci_dev_hp_attr_group,
#ifdef CONFIG_PCI_IOV
&sriov_dev_attr_group,
&sriov_pf_dev_attr_group,
&sriov_vf_dev_attr_group,
#endif
&pci_bridge_attr_group,
&pcie_dev_attr_group,
......
......@@ -501,7 +501,8 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
void pci_restore_iov_state(struct pci_dev *dev);
int pci_iov_bus_range(struct pci_bus *bus);
extern const struct attribute_group sriov_dev_attr_group;
extern const struct attribute_group sriov_pf_dev_attr_group;
extern const struct attribute_group sriov_vf_dev_attr_group;
#else
static inline int pci_iov_init(struct pci_dev *dev)
{
......
......@@ -438,7 +438,6 @@ struct mlx5_core_health {
unsigned long flags;
struct work_struct fatal_report_work;
struct work_struct report_work;
struct delayed_work recover_work;
struct devlink_health_reporter *fw_reporter;
struct devlink_health_reporter *fw_fatal_reporter;
};
......@@ -874,6 +873,11 @@ static inline u32 mlx5_base_mkey(const u32 key)
return key & 0xffffff00u;
}
static inline u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
{
return ((u32)1 << log_sz) << log_stride;
}
static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
u8 log_stride, u8 log_sz,
u16 strides_offset,
......
......@@ -48,6 +48,7 @@ struct mlx5_eswitch_rep {
/* Only IB rep is using vport_index */
u16 vport_index;
u32 vlan_refcount;
struct mlx5_eswitch *esw;
};
void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
......@@ -61,8 +62,8 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
u16 vport_num);
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
u16 vport_num, u32 sqn);
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
......
......@@ -806,9 +806,11 @@ struct mlx5_ifc_e_switch_cap_bits {
u8 vport_svlan_insert[0x1];
u8 vport_cvlan_insert_if_not_exist[0x1];
u8 vport_cvlan_insert_overwrite[0x1];
u8 reserved_at_5[0x3];
u8 reserved_at_5[0x2];
u8 esw_shared_ingress_acl[0x1];
u8 esw_uplink_ingress_acl[0x1];
u8 reserved_at_9[0x10];
u8 root_ft_on_other_esw[0x1];
u8 reserved_at_a[0xf];
u8 esw_functions_changed[0x1];
u8 reserved_at_1a[0x1];
u8 ecpf_vport_exists[0x1];
......@@ -1502,7 +1504,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_270[0x6];
u8 lag_dct[0x2];
u8 lag_tx_port_affinity[0x1];
u8 reserved_at_279[0x2];
u8 lag_native_fdb_selection[0x1];
u8 reserved_at_27a[0x1];
u8 lag_master[0x1];
u8 num_lag_ports[0x4];
......@@ -1680,7 +1683,16 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_6e0[0x10];
u8 sf_base_id[0x10];
u8 reserved_at_700[0x80];
u8 reserved_at_700[0x8];
u8 num_total_dynamic_vf_msix[0x18];
u8 reserved_at_720[0x14];
u8 dynamic_msix_table_size[0xc];
u8 reserved_at_740[0xc];
u8 min_dynamic_vf_msix_table_size[0x4];
u8 reserved_at_750[0x4];
u8 max_dynamic_vf_msix_table_size[0xc];
u8 reserved_at_760[0x20];
u8 vhca_tunnel_commands[0x40];
u8 reserved_at_7c0[0x40];
};
......@@ -10036,14 +10048,19 @@ struct mlx5_ifc_set_flow_table_root_in_bits {
u8 reserved_at_60[0x20];
u8 table_type[0x8];
u8 reserved_at_88[0x18];
u8 reserved_at_88[0x7];
u8 table_of_other_vport[0x1];
u8 table_vport_number[0x10];
u8 reserved_at_a0[0x8];
u8 table_id[0x18];
u8 reserved_at_c0[0x8];
u8 underlay_qpn[0x18];
u8 reserved_at_e0[0x120];
u8 table_eswitch_owner_vhca_id_valid[0x1];
u8 reserved_at_e1[0xf];
u8 table_eswitch_owner_vhca_id[0x10];
u8 reserved_at_100[0x100];
};
enum {
......@@ -10273,7 +10290,8 @@ struct mlx5_ifc_dcbx_param_bits {
};
struct mlx5_ifc_lagc_bits {
u8 reserved_at_0[0x1d];
u8 fdb_selection_mode[0x1];
u8 reserved_at_1[0x1c];
u8 lag_state[0x3];
u8 reserved_at_20[0x14];
......
......@@ -856,6 +856,12 @@ struct module;
* e.g. drivers/net/e100.c.
* @sriov_configure: Optional driver callback to allow configuration of
* number of VFs to enable via sysfs "sriov_numvfs" file.
* @sriov_set_msix_vec_count: PF Driver callback to change number of MSI-X
* vectors on a VF. Triggered via sysfs "sriov_vf_msix_count".
* This will change MSI-X Table Size in the VF Message Control
* registers.
* @sriov_get_vf_total_msix: PF driver callback to get the total number of
* MSI-X vectors available for distribution to the VFs.
* @err_handler: See Documentation/PCI/pci-error-recovery.rst
* @groups: Sysfs attribute groups.
* @driver: Driver model structure.
......@@ -871,6 +877,8 @@ struct pci_driver {
int (*resume)(struct pci_dev *dev); /* Device woken up */
void (*shutdown)(struct pci_dev *dev);
int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
u32 (*sriov_get_vf_total_msix)(struct pci_dev *pf);
const struct pci_error_handlers *err_handler;
const struct attribute_group **groups;
struct device_driver driver;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment