Commit bcbf1be0 authored by David S. Miller's avatar David S. Miller

Merge branch 'udp_tunnel-convert-Intel-drivers-with-shared-tables'

Jakub Kicinski says:

====================
udp_tunnel: convert Intel drivers with shared tables

This set converts Intel drivers which have the ability to spawn
multiple netdevs, but have only one UDP tunnel port table.

Appropriate support is added to the core infra in patch 1,
followed by netdevsim support and a selftest.

The table sharing works by core attaching the same table
structure to all devices sharing the table. This means the
reference count has to accommodate potentially large values.

Once core is ready i40e and ice are converted. These are
complex drivers, but we got a tested-by from Aaron, so we
should be good :)

Compared to v1 I've made sure the selftest is executable.

Other than that patches 8 and 9 are actually from the Mellanox
conversion series were kept out to avoid Mellanox vs Intel
conflicts.

Last patch is new, some docs to let users knows ethtool
can now display UDP tunnel info.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8744c0a8 33a1aaf8
...@@ -58,3 +58,31 @@ forwarding table using the new bridge command. ...@@ -58,3 +58,31 @@ forwarding table using the new bridge command.
3. Show forwarding table:: 3. Show forwarding table::
# bridge fdb show dev vxlan0 # bridge fdb show dev vxlan0
The following NIC features may indicate support for UDP tunnel-related
offloads (most commonly VXLAN features, but support for a particular
encapsulation protocol is NIC specific):
- `tx-udp_tnl-segmentation`
- `tx-udp_tnl-csum-segmentation`
ability to perform TCP segmentation offload of UDP encapsulated frames
- `rx-udp_tunnel-port-offload`
receive side parsing of UDP encapsulated frames which allows NICs to
perform protocol-aware offloads, like checksum validation offload of
inner frames (only needed by NICs without protocol-agnostic offloads)
For devices supporting `rx-udp_tunnel-port-offload` the list of currently
offloaded ports can be interrogated with `ethtool`::
$ ethtool --show-tunnels eth0
Tunnel information for eth0:
UDP port table 0:
Size: 4
Types: vxlan
No entries
UDP port table 1:
Size: 4
Types: geneve, vxlan-gpe
Entries (1):
port 1230, vxlan-gpe
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <net/pkt_cls.h> #include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h>
#include <net/udp_tunnel.h>
#include <net/xdp_sock.h> #include <net/xdp_sock.h>
#include "i40e_type.h" #include "i40e_type.h"
#include "i40e_prototype.h" #include "i40e_prototype.h"
...@@ -133,7 +134,6 @@ enum i40e_state_t { ...@@ -133,7 +134,6 @@ enum i40e_state_t {
__I40E_PORT_SUSPENDED, __I40E_PORT_SUSPENDED,
__I40E_VF_DISABLE, __I40E_VF_DISABLE,
__I40E_MACVLAN_SYNC_PENDING, __I40E_MACVLAN_SYNC_PENDING,
__I40E_UDP_FILTER_SYNC_PENDING,
__I40E_TEMP_LINK_POLLING, __I40E_TEMP_LINK_POLLING,
__I40E_CLIENT_SERVICE_REQUESTED, __I40E_CLIENT_SERVICE_REQUESTED,
__I40E_CLIENT_L2_CHANGE, __I40E_CLIENT_L2_CHANGE,
...@@ -478,8 +478,8 @@ struct i40e_pf { ...@@ -478,8 +478,8 @@ struct i40e_pf {
struct list_head l3_flex_pit_list; struct list_head l3_flex_pit_list;
struct list_head l4_flex_pit_list; struct list_head l4_flex_pit_list;
struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; struct udp_tunnel_nic_shared udp_tunnel_shared;
u16 pending_udp_bitmap; struct udp_tunnel_nic_info udp_tunnel_nic;
struct hlist_head cloud_filter_list; struct hlist_head cloud_filter_list;
u16 num_cloud_filters; u16 num_cloud_filters;
......
This diff is collapsed.
...@@ -1268,8 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, ...@@ -1268,8 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
bool is_tun = tun == ICE_FD_HW_SEG_TUN; bool is_tun = tun == ICE_FD_HW_SEG_TUN;
int err; int err;
if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL, if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num))
&port_num))
continue; continue;
err = ice_fdir_write_fltr(pf, input, add, is_tun); err = ice_fdir_write_fltr(pf, input, add, is_tun);
if (err) if (err)
...@@ -1647,8 +1646,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) ...@@ -1647,8 +1646,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
} }
/* return error if not an update and no available filters */ /* return error if not an update and no available filters */
fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ? fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1;
2 : 1;
if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) && if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) { ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n"); dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n");
......
...@@ -556,7 +556,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, ...@@ -556,7 +556,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len); memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
loc = pkt; loc = pkt;
} else { } else {
if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port)) if (!ice_get_open_tunnel_port(hw, &tnl_port))
return ICE_ERR_DOES_NOT_EXIST; return ICE_ERR_DOES_NOT_EXIST;
if (!ice_fdir_pkt[idx].tun_pkt) if (!ice_fdir_pkt[idx].tun_pkt)
return ICE_ERR_PARAM; return ICE_ERR_PARAM;
......
...@@ -19,12 +19,11 @@ ...@@ -19,12 +19,11 @@
#define ICE_PKG_CNT 4 #define ICE_PKG_CNT 4
bool bool
ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port);
u16 *port); int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
enum ice_status unsigned int idx, struct udp_tunnel_info *ti);
ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port); int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all); unsigned int idx, struct udp_tunnel_info *ti);
bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index);
enum ice_status enum ice_status
ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
......
...@@ -298,6 +298,7 @@ struct ice_pkg_enum { ...@@ -298,6 +298,7 @@ struct ice_pkg_enum {
enum ice_tunnel_type { enum ice_tunnel_type {
TNL_VXLAN = 0, TNL_VXLAN = 0,
TNL_GENEVE, TNL_GENEVE,
__TNL_TYPE_CNT,
TNL_LAST = 0xFF, TNL_LAST = 0xFF,
TNL_ALL = 0xFF, TNL_ALL = 0xFF,
}; };
...@@ -311,11 +312,8 @@ struct ice_tunnel_entry { ...@@ -311,11 +312,8 @@ struct ice_tunnel_entry {
enum ice_tunnel_type type; enum ice_tunnel_type type;
u16 boost_addr; u16 boost_addr;
u16 port; u16 port;
u16 ref;
struct ice_boost_tcam_entry *boost_entry; struct ice_boost_tcam_entry *boost_entry;
u8 valid; u8 valid;
u8 in_use;
u8 marked;
}; };
#define ICE_TUNNEL_MAX_ENTRIES 16 #define ICE_TUNNEL_MAX_ENTRIES 16
...@@ -323,6 +321,7 @@ struct ice_tunnel_entry { ...@@ -323,6 +321,7 @@ struct ice_tunnel_entry {
struct ice_tunnel_table { struct ice_tunnel_table {
struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES]; struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
u16 count; u16 count;
u16 valid_count[__TNL_TYPE_CNT];
}; };
struct ice_pkg_es { struct ice_pkg_es {
......
...@@ -2873,6 +2873,7 @@ static void ice_set_ops(struct net_device *netdev) ...@@ -2873,6 +2873,7 @@ static void ice_set_ops(struct net_device *netdev)
} }
netdev->netdev_ops = &ice_netdev_ops; netdev->netdev_ops = &ice_netdev_ops;
netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
ice_set_ethtool_ops(netdev); ice_set_ethtool_ops(netdev);
} }
...@@ -3978,7 +3979,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ...@@ -3978,7 +3979,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct ice_pf *pf; struct ice_pf *pf;
struct ice_hw *hw; struct ice_hw *hw;
int err; int i, err;
/* this driver uses devres, see /* this driver uses devres, see
* Documentation/driver-api/driver-model/devres.rst * Documentation/driver-api/driver-model/devres.rst
...@@ -4073,11 +4074,37 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ...@@ -4073,11 +4074,37 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_devlink_init_regions(pf); ice_devlink_init_regions(pf);
pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
i = 0;
if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
pf->hw.udp_tunnel_nic.tables[i].n_entries =
pf->hw.tnl.valid_count[TNL_VXLAN];
pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
UDP_TUNNEL_TYPE_VXLAN;
i++;
}
if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
pf->hw.udp_tunnel_nic.tables[i].n_entries =
pf->hw.tnl.valid_count[TNL_GENEVE];
pf->hw.udp_tunnel_nic.tables[i].tunnel_types =
UDP_TUNNEL_TYPE_GENEVE;
i++;
}
pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; pf->num_alloc_vsi = hw->func_caps.guar_num_vsi;
if (!pf->num_alloc_vsi) { if (!pf->num_alloc_vsi) {
err = -EIO; err = -EIO;
goto err_init_pf_unroll; goto err_init_pf_unroll;
} }
if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
dev_warn(&pf->pdev->dev,
"limiting the VSI count due to UDP tunnel limitation %d > %d\n",
pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
}
pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
GFP_KERNEL); GFP_KERNEL);
...@@ -6574,70 +6601,6 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) ...@@ -6574,70 +6601,6 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
pf->tx_timeout_recovery_level++; pf->tx_timeout_recovery_level++;
} }
/**
* ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
* @netdev: This physical port's netdev
* @ti: Tunnel endpoint information
*/
static void
ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
enum ice_tunnel_type tnl_type;
u16 port = ntohs(ti->port);
enum ice_status status;
switch (ti->type) {
case UDP_TUNNEL_TYPE_VXLAN:
tnl_type = TNL_VXLAN;
break;
case UDP_TUNNEL_TYPE_GENEVE:
tnl_type = TNL_GENEVE;
break;
default:
netdev_err(netdev, "Unknown tunnel type\n");
return;
}
status = ice_create_tunnel(&pf->hw, tnl_type, port);
if (status == ICE_ERR_OUT_OF_RANGE)
netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n",
port);
else if (status)
netdev_err(netdev, "Error adding UDP tunnel - %s\n",
ice_stat_str(status));
}
/**
* ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
* @netdev: This physical port's netdev
* @ti: Tunnel endpoint information
*/
static void
ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
u16 port = ntohs(ti->port);
enum ice_status status;
bool retval;
retval = ice_tunnel_port_in_use(&pf->hw, port, NULL);
if (!retval) {
netdev_info(netdev, "port %d not found in UDP tunnels list\n",
port);
return;
}
status = ice_destroy_tunnel(&pf->hw, port, false);
if (status)
netdev_err(netdev, "error deleting port %d from UDP tunnels list\n",
port);
}
/** /**
* ice_open - Called when a network interface becomes active * ice_open - Called when a network interface becomes active
* @netdev: network interface device structure * @netdev: network interface device structure
...@@ -6830,6 +6793,6 @@ static const struct net_device_ops ice_netdev_ops = { ...@@ -6830,6 +6793,6 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_bpf = ice_xdp, .ndo_bpf = ice_xdp,
.ndo_xdp_xmit = ice_xdp_xmit, .ndo_xdp_xmit = ice_xdp_xmit,
.ndo_xsk_wakeup = ice_xsk_wakeup, .ndo_xsk_wakeup = ice_xsk_wakeup,
.ndo_udp_tunnel_add = ice_udp_tunnel_add, .ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
.ndo_udp_tunnel_del = ice_udp_tunnel_del, .ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
}; };
...@@ -676,6 +676,9 @@ struct ice_hw { ...@@ -676,6 +676,9 @@ struct ice_hw {
struct mutex tnl_lock; struct mutex tnl_lock;
struct ice_tunnel_table tnl; struct ice_tunnel_table tnl;
struct udp_tunnel_nic_shared udp_tunnel_shared;
struct udp_tunnel_nic_info udp_tunnel_nic;
/* HW block tables */ /* HW block tables */
struct ice_blk_info blk[ICE_BLK_COUNT]; struct ice_blk_info blk[ICE_BLK_COUNT];
struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */ struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/u64_stats_sync.h> #include <linux/u64_stats_sync.h>
#include <net/devlink.h> #include <net/devlink.h>
#include <net/udp_tunnel.h>
#include <net/xdp.h> #include <net/xdp.h>
#define DRV_NAME "netdevsim" #define DRV_NAME "netdevsim"
...@@ -84,7 +85,8 @@ struct netdevsim { ...@@ -84,7 +85,8 @@ struct netdevsim {
struct { struct {
u32 inject_error; u32 inject_error;
u32 sleep; u32 sleep;
u32 ports[2][NSIM_UDP_TUNNEL_N_PORTS]; u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS];
struct debugfs_u32_array dfs_ports[2]; struct debugfs_u32_array dfs_ports[2];
} udp_ports; } udp_ports;
...@@ -209,9 +211,13 @@ struct nsim_dev { ...@@ -209,9 +211,13 @@ struct nsim_dev {
bool fail_trap_policer_set; bool fail_trap_policer_set;
bool fail_trap_policer_counter_get; bool fail_trap_policer_counter_get;
struct { struct {
struct udp_tunnel_nic_shared utn_shared;
u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS];
bool sync_all; bool sync_all;
bool open_only; bool open_only;
bool ipv4_only; bool ipv4_only;
bool shared;
bool static_iana_vxlan;
u32 sleep; u32 sleep;
} udp_ports; } udp_ports;
}; };
......
...@@ -22,11 +22,13 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table, ...@@ -22,11 +22,13 @@ nsim_udp_tunnel_set_port(struct net_device *dev, unsigned int table,
msleep(ns->udp_ports.sleep); msleep(ns->udp_ports.sleep);
if (!ret) { if (!ret) {
if (ns->udp_ports.ports[table][entry]) if (ns->udp_ports.ports[table][entry]) {
WARN(1, "entry already in use\n");
ret = -EBUSY; ret = -EBUSY;
else } else {
ns->udp_ports.ports[table][entry] = ns->udp_ports.ports[table][entry] =
be16_to_cpu(ti->port) << 16 | ti->type; be16_to_cpu(ti->port) << 16 | ti->type;
}
} }
netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n",
...@@ -50,10 +52,13 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table, ...@@ -50,10 +52,13 @@ nsim_udp_tunnel_unset_port(struct net_device *dev, unsigned int table,
if (!ret) { if (!ret) {
u32 val = be16_to_cpu(ti->port) << 16 | ti->type; u32 val = be16_to_cpu(ti->port) << 16 | ti->type;
if (val == ns->udp_ports.ports[table][entry]) if (val == ns->udp_ports.ports[table][entry]) {
ns->udp_ports.ports[table][entry] = 0; ns->udp_ports.ports[table][entry] = 0;
else } else {
WARN(1, "entry not installed %x vs %x\n",
val, ns->udp_ports.ports[table][entry]);
ret = -ENOENT; ret = -ENOENT;
}
} }
netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n",
...@@ -107,7 +112,7 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data, ...@@ -107,7 +112,7 @@ nsim_udp_tunnels_info_reset_write(struct file *file, const char __user *data,
struct net_device *dev = file->private_data; struct net_device *dev = file->private_data;
struct netdevsim *ns = netdev_priv(dev); struct netdevsim *ns = netdev_priv(dev);
memset(&ns->udp_ports.ports, 0, sizeof(ns->udp_ports.ports)); memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports));
rtnl_lock(); rtnl_lock();
udp_tunnel_nic_reset_ntf(dev); udp_tunnel_nic_reset_ntf(dev);
rtnl_unlock(); rtnl_unlock();
...@@ -127,6 +132,17 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, ...@@ -127,6 +132,17 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
struct netdevsim *ns = netdev_priv(dev); struct netdevsim *ns = netdev_priv(dev);
struct udp_tunnel_nic_info *info; struct udp_tunnel_nic_info *info;
if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) {
dev_err(&nsim_dev->nsim_bus_dev->dev,
"shared can't be used in conjunction with open_only\n");
return -EINVAL;
}
if (!nsim_dev->udp_ports.shared)
ns->udp_ports.ports = ns->udp_ports.__ports;
else
ns->udp_ports.ports = nsim_dev->udp_ports.__ports;
debugfs_create_u32("udp_ports_inject_error", 0600, debugfs_create_u32("udp_ports_inject_error", 0600,
ns->nsim_dev_port->ddir, ns->nsim_dev_port->ddir,
&ns->udp_ports.inject_error); &ns->udp_ports.inject_error);
...@@ -168,6 +184,10 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev, ...@@ -168,6 +184,10 @@ int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,
info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY;
if (nsim_dev->udp_ports.ipv4_only) if (nsim_dev->udp_ports.ipv4_only)
info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY;
if (nsim_dev->udp_ports.shared)
info->shared = &nsim_dev->udp_ports.utn_shared;
if (nsim_dev->udp_ports.static_iana_vxlan)
info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
dev->udp_tunnel_nic_info = info; dev->udp_tunnel_nic_info = info;
return 0; return 0;
...@@ -187,6 +207,10 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev) ...@@ -187,6 +207,10 @@ void nsim_udp_tunnels_debugfs_create(struct nsim_dev *nsim_dev)
&nsim_dev->udp_ports.open_only); &nsim_dev->udp_ports.open_only);
debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.ipv4_only); &nsim_dev->udp_ports.ipv4_only);
debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.shared);
debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.static_iana_vxlan);
debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir,
&nsim_dev->udp_ports.sleep); &nsim_dev->udp_ports.sleep);
} }
...@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags { ...@@ -200,11 +200,27 @@ enum udp_tunnel_nic_info_flags {
UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3),
}; };
struct udp_tunnel_nic;
#define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2)
struct udp_tunnel_nic_shared {
struct udp_tunnel_nic *udp_tunnel_nic_info;
struct list_head devices;
};
struct udp_tunnel_nic_shared_node {
struct net_device *dev;
struct list_head list;
};
/** /**
* struct udp_tunnel_nic_info - driver UDP tunnel offload information * struct udp_tunnel_nic_info - driver UDP tunnel offload information
* @set_port: callback for adding a new port * @set_port: callback for adding a new port
* @unset_port: callback for removing a port * @unset_port: callback for removing a port
* @sync_table: callback for syncing the entire port table at once * @sync_table: callback for syncing the entire port table at once
* @shared: reference to device global state (optional)
* @flags: device flags from enum udp_tunnel_nic_info_flags * @flags: device flags from enum udp_tunnel_nic_info_flags
* @tables: UDP port tables this device has * @tables: UDP port tables this device has
* @tables.n_entries: number of entries in this table * @tables.n_entries: number of entries in this table
...@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags { ...@@ -213,6 +229,12 @@ enum udp_tunnel_nic_info_flags {
* Drivers are expected to provide either @set_port and @unset_port callbacks * Drivers are expected to provide either @set_port and @unset_port callbacks
* or the @sync_table callback. Callbacks are invoked with rtnl lock held. * or the @sync_table callback. Callbacks are invoked with rtnl lock held.
* *
* Devices which (misguidedly) share the UDP tunnel port table across multiple
* netdevs should allocate an instance of struct udp_tunnel_nic_shared and
* point @shared at it.
* There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices
* sharing a table.
*
* Known limitations: * Known limitations:
* - UDP tunnel port notifications are fundamentally best-effort - * - UDP tunnel port notifications are fundamentally best-effort -
* it is likely the driver will both see skbs which use a UDP tunnel port, * it is likely the driver will both see skbs which use a UDP tunnel port,
...@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info { ...@@ -234,6 +256,8 @@ struct udp_tunnel_nic_info {
/* all at once */ /* all at once */
int (*sync_table)(struct net_device *dev, unsigned int table); int (*sync_table)(struct net_device *dev, unsigned int table);
struct udp_tunnel_nic_shared *shared;
unsigned int flags; unsigned int flags;
struct udp_tunnel_nic_table_info { struct udp_tunnel_nic_table_info {
......
...@@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags { ...@@ -19,8 +19,9 @@ enum udp_tunnel_nic_table_entry_flags {
struct udp_tunnel_nic_table_entry { struct udp_tunnel_nic_table_entry {
__be16 port; __be16 port;
u8 type; u8 type;
u8 use_cnt;
u8 flags; u8 flags;
u16 use_cnt;
#define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX
u8 hw_priv; u8 hw_priv;
}; };
...@@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, ...@@ -370,6 +371,8 @@ udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn,
bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL;
unsigned int from, to; unsigned int from, to;
WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX);
/* If not going from used to unused or vice versa - all done. /* If not going from used to unused or vice versa - all done.
* For dodgy entries make sure we try to sync again (queue the entry). * For dodgy entries make sure we try to sync again (queue the entry).
*/ */
...@@ -675,6 +678,7 @@ static void ...@@ -675,6 +678,7 @@ static void
udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
{ {
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
struct udp_tunnel_nic_shared_node *node;
unsigned int i, j; unsigned int i, j;
/* Freeze all the ports we are already tracking so that the replay /* Freeze all the ports we are already tracking so that the replay
...@@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) ...@@ -686,7 +690,12 @@ udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn)
utn->missed = 0; utn->missed = 0;
utn->need_replay = 0; utn->need_replay = 0;
udp_tunnel_get_rx_info(dev); if (!info->shared) {
udp_tunnel_get_rx_info(dev);
} else {
list_for_each_entry(node, &info->shared->devices, list)
udp_tunnel_get_rx_info(node->dev);
}
for (i = 0; i < utn->n_tables; i++) for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++) for (j = 0; j < info->tables[i].n_entries; j++)
...@@ -742,20 +751,39 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, ...@@ -742,20 +751,39 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
return NULL; return NULL;
} }
static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn)
{
unsigned int i;
for (i = 0; i < utn->n_tables; i++)
kfree(utn->entries[i]);
kfree(utn->entries);
kfree(utn);
}
static int udp_tunnel_nic_register(struct net_device *dev) static int udp_tunnel_nic_register(struct net_device *dev)
{ {
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
struct udp_tunnel_nic_shared_node *node = NULL;
struct udp_tunnel_nic *utn; struct udp_tunnel_nic *utn;
unsigned int n_tables, i; unsigned int n_tables, i;
BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE <
UDP_TUNNEL_NIC_MAX_TABLES); UDP_TUNNEL_NIC_MAX_TABLES);
/* Expect use count of at most 2 (IPv4, IPv6) per device */
BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX <
UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2);
/* Check that the driver info is sane */
if (WARN_ON(!info->set_port != !info->unset_port) || if (WARN_ON(!info->set_port != !info->unset_port) ||
WARN_ON(!info->set_port == !info->sync_table) || WARN_ON(!info->set_port == !info->sync_table) ||
WARN_ON(!info->tables[0].n_entries)) WARN_ON(!info->tables[0].n_entries))
return -EINVAL; return -EINVAL;
if (WARN_ON(info->shared &&
info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
return -EINVAL;
n_tables = 1; n_tables = 1;
for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) {
if (!info->tables[i].n_entries) if (!info->tables[i].n_entries)
...@@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) ...@@ -766,9 +794,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
return -EINVAL; return -EINVAL;
} }
utn = udp_tunnel_nic_alloc(info, n_tables); /* Create UDP tunnel state structures */
if (!utn) if (info->shared) {
return -ENOMEM; node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return -ENOMEM;
node->dev = dev;
}
if (info->shared && info->shared->udp_tunnel_nic_info) {
utn = info->shared->udp_tunnel_nic_info;
} else {
utn = udp_tunnel_nic_alloc(info, n_tables);
if (!utn) {
kfree(node);
return -ENOMEM;
}
}
if (info->shared) {
if (!info->shared->udp_tunnel_nic_info) {
INIT_LIST_HEAD(&info->shared->devices);
info->shared->udp_tunnel_nic_info = utn;
}
list_add_tail(&node->list, &info->shared->devices);
}
utn->dev = dev; utn->dev = dev;
dev_hold(dev); dev_hold(dev);
...@@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev) ...@@ -783,7 +835,33 @@ static int udp_tunnel_nic_register(struct net_device *dev)
static void static void
udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
{ {
unsigned int i; const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
/* For a shared table remove this dev from the list of sharing devices
* and if there are other devices just detach.
*/
if (info->shared) {
struct udp_tunnel_nic_shared_node *node, *first;
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
if (node->dev != dev)
return;
list_del(&node->list);
kfree(node);
first = list_first_entry_or_null(&info->shared->devices,
typeof(*first), list);
if (first) {
udp_tunnel_drop_rx_info(dev);
utn->dev = first->dev;
goto release_dev;
}
info->shared->udp_tunnel_nic_info = NULL;
}
/* Flush before we check work, so we don't waste time adding entries /* Flush before we check work, so we don't waste time adding entries
* from the work which we will boot immediately. * from the work which we will boot immediately.
...@@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) ...@@ -796,10 +874,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
if (utn->work_pending) if (utn->work_pending)
return; return;
for (i = 0; i < utn->n_tables; i++) udp_tunnel_nic_free(utn);
kfree(utn->entries[i]); release_dev:
kfree(utn->entries);
kfree(utn);
dev->udp_tunnel_nic = NULL; dev->udp_tunnel_nic = NULL;
dev_put(dev); dev_put(dev);
} }
......
...@@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID ...@@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
NSIM_NETDEV= NSIM_NETDEV=
HAS_ETHTOOL= HAS_ETHTOOL=
STATIC_ENTRIES=
EXIT_STATUS=0 EXIT_STATUS=0
num_cases=0 num_cases=0
num_errors=0 num_errors=0
...@@ -193,6 +194,21 @@ function check_tables { ...@@ -193,6 +194,21 @@ function check_tables {
sleep 0.02 sleep 0.02
((retries--)) ((retries--))
done done
if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
fail=0
for i in "${!STATIC_ENTRIES[@]}"; do
pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
if [ $cnt -ne 1 ]; then
err_cnt "ethtool static entry: $pfx - $msg"
echo " check_table: ethtool does not contain '$pp_expected'"
ethtool --show-tunnels $NSIM_NETDEV
fail=1
fi
done
[ $fail == 0 ] && pass_cnt
fi
} }
function print_table { function print_table {
...@@ -775,6 +791,157 @@ for port in 0 1; do ...@@ -775,6 +791,157 @@ for port in 0 1; do
exp1=( 0 0 0 0 ) exp1=( 0 0 0 0 )
done done
cleanup_nsim
# shared port tables
pfx="table sharing"
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
echo 1 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
old_netdevs=$(ls /sys/class/net)
echo 2 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV2=`get_netdev_name old_netdevs`
msg="VxLAN v4 devices"
exp0=( `mke 4789 1` 0 0 0 )
exp1=( 0 0 0 0 )
new_vxlan vxlan0 4789 $NSIM_NETDEV
new_vxlan vxlan1 4789 $NSIM_NETDEV2
msg="VxLAN v4 devices go down"
exp0=( 0 0 0 0 )
ifconfig vxlan1 down
ifconfig vxlan0 down
check_tables
for ifc in vxlan0 vxlan1; do
ifconfig $ifc up
done
msg="VxLAN v6 device"
exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
new_vxlan vxlanC 4790 $NSIM_NETDEV 6
msg="Geneve device"
exp1=( `mke 6081 2` 0 0 0 )
new_geneve gnv0 6081
msg="NIC device goes down"
ifconfig $NSIM_NETDEV down
check_tables
msg="NIC device goes up again"
ifconfig $NSIM_NETDEV up
check_tables
for i in `seq 2`; do
msg="turn feature off - 1, rep $i"
ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
check_tables
msg="turn feature off - 2, rep $i"
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
check_tables
msg="turn feature on - 1, rep $i"
exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
exp1=( `mke 6081 2` 0 0 0 )
ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
check_tables
msg="turn feature on - 2, rep $i"
ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
check_tables
done
msg="tunnels destroyed 1"
cleanup_tuns
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
check_tables
overflow_table0 "overflow NIC table"
msg="re-add a port"
echo 2 > $NSIM_DEV_SYS/del_port
echo 2 > $NSIM_DEV_SYS/new_port
check_tables
msg="replace VxLAN in overflow table"
exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
del_dev vxlan1
msg="vacate VxLAN in overflow table"
exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
del_dev vxlan2
echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
check_tables
msg="tunnels destroyed 2"
cleanup_tuns
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
check_tables
echo 1 > $NSIM_DEV_SYS/del_port
echo 2 > $NSIM_DEV_SYS/del_port
cleanup_nsim
# Static IANA port
pfx="static IANA vxlan"
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
STATIC_ENTRIES=( `mke 4789 1` )
port=1
old_netdevs=$(ls /sys/class/net)
echo $port > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="check empty"
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
check_tables
msg="add on static port"
new_vxlan vxlan0 4789 $NSIM_NETDEV
new_vxlan vxlan1 4789 $NSIM_NETDEV
msg="add on different port"
exp0=( `mke 4790 1` 0 0 0 )
new_vxlan vxlan2 4790 $NSIM_NETDEV
cleanup_tuns
msg="tunnels destroyed"
exp0=( 0 0 0 0 )
exp1=( 0 0 0 0 )
check_tables
msg="different type"
new_geneve gnv0 4789
cleanup_tuns
cleanup_nsim
# END
modprobe -r netdevsim modprobe -r netdevsim
if [ $num_errors -eq 0 ]; then if [ $num_errors -eq 0 ]; then
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment