Commit 38b9e0f6 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2019-08-09' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2019-08-09

This series includes update to mlx5 ethernet and core driver:

In first #11 patches, Vlad submits part 2 of 3 part series to allow
TC flow handling for concurrent execution.

1) TC flow handling for concurrent execution (part 2)

Vald Says:
==========

Refactor data structures that are shared between flows in tc.
Currently, all cls API hardware offloads driver callbacks require caller
to hold rtnl lock when calling them. Cls API has already been updated to
update software filters in parallel (on classifiers that support
unlocked execution), however hardware offloads code still obtains rtnl
lock before calling driver tc callbacks. This set implements support for
unlocked execution of tc hairpin, mod_hdr and encap subsystem. The
changed implemented in these subsystems are very similar in general.

The main difference is that hairpin is accessed through mlx5e_tc_table
(legacy mode), mod_hdr is accessed through both mlx5e_tc_table and
mlx5_esw_offload (legacy and switchdev modes) and encap is only accessed
through mlx5_esw_offload (switchdev mode).

1.1) Hairpin handling and structure mlx5e_hairpin_entry refactored in
following way:

- Hairpin structure is extended with atomic reference counter. This
  approach allows to lookup of hairpin entry and obtain reference to it
  with hairpin_tbl_lock protection and then continue using the entry
  unlocked (including provisioning to hardware).

- To support unlocked provisioning of hairpin entry to hardware, the entry
  is extended with 'res_ready' completion and is inserted to hairpin_tbl
  before calling the firmware. With this approach any concurrent users that
  attempt to use the same hairpin entry wait for completion first to
  prevent access to entries that are not fully initialized.

- Hairpin entry is extended with new flows_lock spinlock to protect the
  list when multiple concurrent tc instances update flows attached to
  the same hairpin entry.

1.2) Modify header handling code and structure mlx5e_mod_hdr_entry
are refactored in the following way:

- Mod_hdr structure is extended with atomic reference counter. This
  approach allows to lookup of mod_hdr entry and obtain reference to it
  with mod_hdr_tbl_lock protection and then continue using the entry
  unlocked (including provisioning to hardware).

- To support unlocked provisioning of mod_hdr entry to hardware, the entry
  is extended with 'res_ready' completion and is inserted to mod_hdr_tbl
  before calling the firmware. With this approach any concurrent users that
  attempt to use the same mod_hdr entry wait for completion first to
  prevent access to entries that are not fully initialized.

- Mod_Hdr entry is extended with new flows_lock spinlock to protect the
  list when multiple concurrent tc instances update flows attached to
  the same mod_hdr entry.

1.3) Encapsulation handling code and Structure mlx5e_encap_entry
are refactored in the following way:

- encap structure is extended with atomic reference counter. This
  approach allows to lookup of encap entry and obtain reference to it
  with encap_tbl_lock protection and then continue using the entry
  unlocked (including provisioning to hardware).

- To support unlocked provisioning of encap entry to hardware, the entry is
  extended with 'res_ready' completion and is inserted to encap_tbl before
  calling the firmware. With this approach any concurrent users that
  attempt to use the same encap entry wait for completion first to prevent
  access to entries that are not fully initialized.

- As a difference from approach used to refactor hairpin and mod_hdr,
  encap entry is not extended with any per-entry fine-grained lock.
  Instead, encap_table_lock is used to synchronize all operations on
  encap table and instances of mlx5e_encap_entry. This is necessary
  because single flow can be attached to multiple encap entries
  simultaneously. During new flow creation or neigh update event all of
  encaps that flow is attached to must be accessed together as in atomic
  manner, which makes usage of per-entry lock infeasible.

- Encap entry is extended with new flows_lock spinlock to protect the
  list when multiple concurrent tc instances update flows attached to
  the same encap entry.

==========

3) Parav improves the way port representors report their parent ID and
port index.

4) Use refcount_t for refcount in vxlan data base from  Chuhong Yuan
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 62ad42ec b51c225e
......@@ -16,7 +16,8 @@ struct mlx5e_tc_table {
struct rhashtable ht;
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
struct mod_hdr_tbl mod_hdr;
struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
DECLARE_HASHTABLE(hairpin_tbl, 8);
struct notifier_block netdevice_nb;
......
......@@ -389,24 +389,17 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
.set_pauseparam = mlx5e_uplink_rep_set_pauseparam,
};
static int mlx5e_rep_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid)
static void mlx5e_rep_get_port_parent_id(struct net_device *dev,
struct netdev_phys_item_id *ppid)
{
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
u64 parent_id;
priv = netdev_priv(dev);
esw = priv->mdev->priv.eswitch;
if (esw->mode == MLX5_ESWITCH_NONE)
return -EOPNOTSUPP;
parent_id = mlx5_query_nic_system_image_guid(priv->mdev);
ppid->id_len = sizeof(parent_id);
memcpy(ppid->id, &parent_id, sizeof(parent_id));
return 0;
}
static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
......@@ -613,12 +606,17 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
neigh_connected = (nud_state & NUD_VALID) && !dead;
list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e))
continue;
encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
priv = netdev_priv(e->out_dev);
if (encap_connected != neigh_connected ||
!ether_addr_equal(e->h_dest, ha))
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
mlx5e_rep_neigh_entry_release(nhe);
rtnl_unlock();
......@@ -1748,37 +1746,46 @@ is_devlink_port_supported(const struct mlx5_core_dev *dev,
mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport);
}
static unsigned int
vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num)
{
return (MLX5_CAP_GEN(dev, vhca_id) << 16) | vport_num;
}
static int register_devlink_port(struct mlx5_core_dev *dev,
struct mlx5e_rep_priv *rpriv)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct netdev_phys_item_id ppid = {};
int ret;
unsigned int dl_port_index = 0;
if (!is_devlink_port_supported(dev, rpriv))
return 0;
ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
if (ret)
return ret;
mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
if (rep->vport == MLX5_VPORT_UPLINK)
if (rep->vport == MLX5_VPORT_UPLINK) {
devlink_port_attrs_set(&rpriv->dl_port,
DEVLINK_PORT_FLAVOUR_PHYSICAL,
PCI_FUNC(dev->pdev->devfn), false, 0,
&ppid.id[0], ppid.id_len);
else if (rep->vport == MLX5_VPORT_PF)
dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
} else if (rep->vport == MLX5_VPORT_PF) {
devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
&ppid.id[0], ppid.id_len,
dev->pdev->devfn);
else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport))
dl_port_index = rep->vport;
} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch,
rpriv->rep->vport)) {
devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
&ppid.id[0], ppid.id_len,
dev->pdev->devfn,
rep->vport - 1);
dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
}
return devlink_port_register(devlink, &rpriv->dl_port, rep->vport);
return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
}
static void unregister_devlink_port(struct mlx5_core_dev *dev,
......
......@@ -164,6 +164,9 @@ struct mlx5e_encap_entry {
u8 flags;
char *encap_header;
int encap_size;
refcount_t refcnt;
struct completion res_ready;
int compl_result;
};
struct mlx5e_rep_sq {
......
......@@ -75,6 +75,8 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
bool mlx5e_encap_take(struct mlx5e_encap_entry *e);
void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e);
struct mlx5e_neigh_hash_entry;
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
......
......@@ -1999,8 +1999,10 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
if (err)
goto abort;
mutex_init(&esw->offloads.encap_tbl_lock);
hash_init(esw->offloads.encap_tbl);
hash_init(esw->offloads.mod_hdr_tbl);
mutex_init(&esw->offloads.mod_hdr.lock);
hash_init(esw->offloads.mod_hdr.hlist);
atomic64_set(&esw->offloads.num_flows, 0);
mutex_init(&esw->state_lock);
......@@ -2037,6 +2039,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
esw_offloads_cleanup_reps(esw);
mutex_destroy(&esw->offloads.mod_hdr.lock);
mutex_destroy(&esw->offloads.encap_tbl_lock);
kfree(esw->vports);
kfree(esw);
}
......
......@@ -181,8 +181,9 @@ struct mlx5_esw_offload {
struct mlx5_eswitch_rep *vport_reps;
struct list_head peer_flows;
struct mutex peer_mutex;
struct mutex encap_tbl_lock; /* protects encap_tbl */
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
struct mod_hdr_tbl mod_hdr;
DECLARE_HASHTABLE(termtbl_tbl, 8);
struct mutex termtbl_mutex; /* protects termtbl hash */
const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
......
......@@ -1393,10 +1393,9 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
int esw_offloads_init_reps(struct mlx5_eswitch *esw)
{
int total_vports = esw->total_vports;
struct mlx5_core_dev *dev = esw->dev;
struct mlx5_eswitch_rep *rep;
u8 hw_id[ETH_ALEN], rep_type;
int vport_index;
u8 rep_type;
esw->offloads.vport_reps = kcalloc(total_vports,
sizeof(struct mlx5_eswitch_rep),
......@@ -1404,12 +1403,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
if (!esw->offloads.vport_reps)
return -ENOMEM;
mlx5_query_mac_address(dev, hw_id);
mlx5_esw_for_all_reps(esw, vport_index, rep) {
rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
rep->vport_index = vport_index;
ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
atomic_set(&rep->rep_data[rep_type].state,
......
......@@ -32,6 +32,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/refcount.h>
#include <linux/mlx5/driver.h>
#include <net/vxlan.h>
#include "mlx5_core.h"
......@@ -48,7 +49,7 @@ struct mlx5_vxlan {
struct mlx5_vxlan_port {
struct hlist_node hlist;
atomic_t refcount;
refcount_t refcount;
u16 udp_port;
};
......@@ -113,7 +114,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
if (vxlanp) {
atomic_inc(&vxlanp->refcount);
refcount_inc(&vxlanp->refcount);
return 0;
}
......@@ -137,7 +138,7 @@ int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
}
vxlanp->udp_port = port;
atomic_set(&vxlanp->refcount, 1);
refcount_set(&vxlanp->refcount, 1);
spin_lock_bh(&vxlan->lock);
hash_add(vxlan->htable, &vxlanp->hlist, port);
......@@ -170,7 +171,7 @@ int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
goto out_unlock;
}
if (atomic_dec_and_test(&vxlanp->refcount)) {
if (refcount_dec_and_test(&vxlanp->refcount)) {
hash_del(&vxlanp->hlist);
remove = true;
}
......
......@@ -44,7 +44,6 @@ struct mlx5_eswitch_rep_data {
struct mlx5_eswitch_rep {
struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES];
u16 vport;
u8 hw_id[ETH_ALEN];
u16 vlan;
/* Only IB rep is using vport_index */
u16 vport_index;
......
......@@ -126,6 +126,11 @@ struct mlx5_flow_destination {
};
};
struct mod_hdr_tbl {
struct mutex lock; /* protects hlist */
DECLARE_HASHTABLE(hlist, 8);
};
struct mlx5_flow_namespace *
mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
struct mlx5_flow_namespace *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment