Commit 1ca94d79 authored by David S. Miller's avatar David S. Miller

Merge branch 'nfp-flower-vxlan-tunnel-offload'

Simon Horman says:

====================
nfp: flower vxlan tunnel offload

John says:

This patch set allows offloading of TC flower match and set tunnel fields
to the NFP. The initial focus is on VXLAN traffic. Due to the current
state of the NFP firmware, only VXLAN traffic on well known port 4789 is
handled. The match and action fields must explicity set this value to be
supported. Tunnel end point information is also offloaded to the NFP for
both encapsulation and decapsulation. The NFP expects 3 separate data sets
to be supplied.

For decapsulation, 2 separate lists exist; a list of MAC addresses
referenced by an index comprised of the port number, and a list of IP
addresses. These IP addresses are not connected to a MAC or port. The MAC
addresses can be written as a block or one at a time (because they have an
index, previous values can be overwritten) while the IP addresses are
always written as a list of all the available IPs. Because the MAC address
used as a tunnel end point may be associated with a physical port or may
be a virtual netdev like an OVS bridge, we do not know which addresses
should be offloaded. For this reason, all MAC addresses of active netdevs
are offloaded to the NFP. A notifier checks for changes to any currently
offloaded MACs or any new netdevs that may occur. For IP addresses, the
tunnel end point used in the rules is known as the destination IP address
must be specified in the flower classifier rule. When a new IP address
appears in a rule, the IP address is offloaded. The IP is removed from the
offloaded list when all rules matching on that IP are deleted.

For encapsulation, a next hop table is updated on the NFP that contains
the source/dest IPs, MACs and egress port. These are written individually
when requested. If the NFP tries to encapsulate a packet but does not know
the next hop, then is sends a request to the host. The host carries out a
route lookup and populates the given entry on the NFP table. A notifier
also exists to check for any links changing or going down in the kernel
next hop table. If an offloaded next hop entry is removed from the kernel
then it is also removed on the NFP.

The NFP periodically sends a message to the host telling it which tunnel
ports have packets egressing the system. The host uses this information to
update the used value in the neighbour entry. This means that, rather than
expire when it times out, the kernel will send an ARP to check if the link
is still live. From an NFP perspective, this means that valid entries will
not be removed from its next hop table.
====================
Acked-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 14a0d032 856f5b13
......@@ -37,7 +37,8 @@ nfp-objs += \
flower/main.o \
flower/match.o \
flower/metadata.o \
flower/offload.o
flower/offload.o \
flower/tunnel_conf.o
endif
ifeq ($(CONFIG_BPF_SYSCALL),y)
......
......@@ -37,6 +37,7 @@
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
#include <net/tc_act/tc_vlan.h>
#include <net/tc_act/tc_tunnel_key.h>
#include "cmsg.h"
#include "main.h"
......@@ -80,14 +81,27 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
}
static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
enum nfp_flower_tun_type tun_type)
{
if (!out_dev->rtnl_link_ops)
return false;
if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan"))
return tun_type == NFP_FL_TUNNEL_VXLAN;
return false;
}
static int
nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
struct nfp_fl_payload *nfp_flow, bool last,
struct net_device *in_dev)
struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
int *tun_out_cnt)
{
size_t act_size = sizeof(struct nfp_fl_output);
u16 tmp_output_op, tmp_flags;
struct net_device *out_dev;
u16 tmp_output_op;
int ifindex;
/* Set action opcode to output action. */
......@@ -97,25 +111,114 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
output->a_op = cpu_to_be16(tmp_output_op);
/* Set action output parameters. */
output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
ifindex = tcf_mirred_ifindex(action);
out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
if (!out_dev)
return -EOPNOTSUPP;
/* Only offload egress ports are on the same device as the ingress
* port.
tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
if (tun_type) {
/* Verify the egress netdev matches the tunnel type. */
if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
return -EOPNOTSUPP;
if (*tun_out_cnt)
return -EOPNOTSUPP;
(*tun_out_cnt)++;
output->flags = cpu_to_be16(tmp_flags |
NFP_FL_OUT_FLAGS_USE_TUN);
output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
} else {
/* Set action output parameters. */
output->flags = cpu_to_be16(tmp_flags);
/* Only offload if egress ports are on the same device as the
* ingress port.
*/
if (!switchdev_port_same_parent_id(in_dev, out_dev))
return -EOPNOTSUPP;
output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
if (!output->port)
return -EOPNOTSUPP;
}
nfp_flow->meta.shortcut = output->port;
return 0;
}
static bool nfp_fl_supported_tun_port(const struct tc_action *action)
{
struct ip_tunnel_info *tun = tcf_tunnel_info(action);
return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT);
}
static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
{
size_t act_size = sizeof(struct nfp_fl_pre_tunnel);
struct nfp_fl_pre_tunnel *pre_tun_act;
u16 tmp_pre_tun_op;
/* Pre_tunnel action must be first on action list.
* If other actions already exist they need pushed forward.
*/
if (!switchdev_port_same_parent_id(in_dev, out_dev))
return -EOPNOTSUPP;
if (act_len)
memmove(act_data + act_size, act_data, act_len);
pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data;
memset(pre_tun_act, 0, act_size);
tmp_pre_tun_op =
FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PRE_TUNNEL);
pre_tun_act->a_op = cpu_to_be16(tmp_pre_tun_op);
output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
if (!output->port)
return pre_tun_act;
}
static int
nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan,
const struct tc_action *action,
struct nfp_fl_pre_tunnel *pre_tun)
{
struct ip_tunnel_info *vxlan = tcf_tunnel_info(action);
size_t act_size = sizeof(struct nfp_fl_set_vxlan);
u32 tmp_set_vxlan_type_index = 0;
u16 tmp_set_vxlan_op;
/* Currently support one pre-tunnel so index is always 0. */
int pretun_idx = 0;
if (vxlan->options_len) {
/* Do not support options e.g. vxlan gpe. */
return -EOPNOTSUPP;
}
nfp_flow->meta.shortcut = output->port;
tmp_set_vxlan_op =
FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
FIELD_PREP(NFP_FL_ACT_JMP_ID,
NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL);
set_vxlan->a_op = cpu_to_be16(tmp_set_vxlan_op);
/* Set tunnel type and pre-tunnel index. */
tmp_set_vxlan_type_index |=
FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) |
FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index);
set_vxlan->tun_id = vxlan->key.tun_id;
set_vxlan->tun_flags = vxlan->key.tun_flags;
set_vxlan->ipv4_ttl = vxlan->key.ttl;
set_vxlan->ipv4_tos = vxlan->key.tos;
/* Complete pre_tunnel action. */
pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst;
return 0;
}
......@@ -123,8 +226,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
static int
nfp_flower_loop_action(const struct tc_action *a,
struct nfp_fl_payload *nfp_fl, int *a_len,
struct net_device *netdev)
struct net_device *netdev,
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
{
struct nfp_fl_pre_tunnel *pre_tun;
struct nfp_fl_set_vxlan *s_vxl;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_pop_vlan *pop_v;
struct nfp_fl_output *output;
......@@ -137,7 +243,8 @@ nfp_flower_loop_action(const struct tc_action *a,
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
err = nfp_fl_output(output, a, nfp_fl, true, netdev);
err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
tun_out_cnt);
if (err)
return err;
......@@ -147,7 +254,8 @@ nfp_flower_loop_action(const struct tc_action *a,
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
err = nfp_fl_output(output, a, nfp_fl, false, netdev);
err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
tun_out_cnt);
if (err)
return err;
......@@ -170,6 +278,29 @@ nfp_flower_loop_action(const struct tc_action *a,
nfp_fl_push_vlan(psh_v, a);
*a_len += sizeof(struct nfp_fl_push_vlan);
} else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) {
/* Pre-tunnel action is required for tunnel encap.
* This checks for next hop entries on NFP.
* If none, the packet falls back before applying other actions.
*/
if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
*tun_type = NFP_FL_TUNNEL_VXLAN;
pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
*a_len += sizeof(struct nfp_fl_pre_tunnel);
s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len];
err = nfp_fl_set_vxlan(s_vxl, a, pre_tun);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_set_vxlan);
} else if (is_tcf_tunnel_release(a)) {
/* Tunnel decap is handled by default so accept action. */
return 0;
} else {
/* Currently we do not handle any other actions. */
return -EOPNOTSUPP;
......@@ -182,18 +313,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
int act_len, act_cnt, err;
int act_len, act_cnt, err, tun_out_cnt;
enum nfp_flower_tun_type tun_type;
const struct tc_action *a;
LIST_HEAD(actions);
memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
nfp_flow->meta.act_len = 0;
tun_type = NFP_FL_TUNNEL_NONE;
act_len = 0;
act_cnt = 0;
tun_out_cnt = 0;
tcf_exts_to_list(flow->exts, &actions);
list_for_each_entry(a, &actions, list) {
err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
&tun_type, &tun_out_cnt);
if (err)
return err;
act_cnt++;
......
......@@ -38,17 +38,10 @@
#include <net/dst_metadata.h>
#include "main.h"
#include "../nfpcore/nfp_cpp.h"
#include "../nfp_net.h"
#include "../nfp_net_repr.h"
#include "./cmsg.h"
#define nfp_flower_cmsg_warn(app, fmt, args...) \
do { \
if (net_ratelimit()) \
nfp_warn((app)->cpp, fmt, ## args); \
} while (0)
static struct nfp_flower_cmsg_hdr *
nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
{
......@@ -188,6 +181,15 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
nfp_flower_rx_flow_stats(app, skb);
break;
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
nfp_tunnel_request_route(app, skb);
break;
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
nfp_tunnel_keep_alive(app, skb);
break;
case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH:
/* Acks from the NFP that the route is added - ignore. */
break;
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
......
......@@ -39,6 +39,7 @@
#include <linux/types.h>
#include "../nfp_app.h"
#include "../nfpcore/nfp_cpp.h"
#define NFP_FLOWER_LAYER_META BIT(0)
#define NFP_FLOWER_LAYER_PORT BIT(1)
......@@ -67,10 +68,12 @@
#define NFP_FL_LW_SIZ 2
/* Action opcodes */
#define NFP_FL_ACTION_OPCODE_OUTPUT 0
#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
#define NFP_FL_ACTION_OPCODE_NUM 32
#define NFP_FL_ACTION_OPCODE_OUTPUT 0
#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6
#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
#define NFP_FL_ACTION_OPCODE_NUM 32
#define NFP_FL_ACT_JMP_ID GENMASK(15, 8)
#define NFP_FL_ACT_LEN_LW GENMASK(7, 0)
......@@ -83,6 +86,22 @@
#define NFP_FL_PUSH_VLAN_CFI BIT(12)
#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
/* Tunnel ports */
#define NFP_FL_PORT_TYPE_TUN 0x50000000
#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4)
#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0)
#define nfp_flower_cmsg_warn(app, fmt, args...) \
do { \
if (net_ratelimit()) \
nfp_warn((app)->cpp, fmt, ## args); \
} while (0)
enum nfp_flower_tun_type {
NFP_FL_TUNNEL_NONE = 0,
NFP_FL_TUNNEL_VXLAN = 2,
};
struct nfp_fl_output {
__be16 a_op;
__be16 flags;
......@@ -115,6 +134,25 @@ struct nfp_flower_meta_one {
u16 reserved;
};
struct nfp_fl_pre_tunnel {
__be16 a_op;
__be16 reserved;
__be32 ipv4_dst;
/* reserved for use with IPv6 addresses */
__be32 extra[3];
};
struct nfp_fl_set_vxlan {
__be16 a_op;
__be16 reserved;
__be64 tun_id;
__be32 tun_type_index;
__be16 tun_flags;
u8 ipv4_ttl;
u8 ipv4_tos;
__be32 extra[2];
} __packed;
/* Metadata with L2 (1W/4B)
* ----------------------------------------------------------------
* 3 2 1
......@@ -230,6 +268,36 @@ struct nfp_flower_ipv6 {
struct in6_addr ipv6_dst;
};
/* Flow Frame VXLAN --> Tunnel details (4W/16B)
* -----------------------------------------------------------------
* 3 2 1
* 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | ipv4_addr_src |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | ipv4_addr_dst |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | tun_flags | tos | ttl |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | gpe_flags | Reserved | Next Protocol |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | VNI | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
struct nfp_flower_vxlan {
__be32 ip_src;
__be32 ip_dst;
__be16 tun_flags;
u8 tos;
u8 ttl;
u8 gpe_flags;
u8 reserved[2];
u8 nxt_proto;
__be32 tun_id;
};
#define NFP_FL_TUN_VNI_OFFSET 8
/* The base header for a control message packet.
* Defines an 8-bit version, and an 8-bit type, padded
* to a 32-bit word. Rest of the packet is type-specific.
......@@ -249,6 +317,11 @@ enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10,
NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11,
NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12,
NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13,
NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14,
NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15,
NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16,
NFP_FLOWER_CMSG_TYPE_MAX = 32,
......@@ -282,6 +355,7 @@ enum nfp_flower_cmsg_port_type {
NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0,
NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1,
NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2,
NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3,
};
enum nfp_flower_cmsg_port_vnic_type {
......@@ -323,6 +397,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
}
static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb)
{
return skb->len - NFP_FLOWER_CMSG_HLEN;
}
struct sk_buff *
nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
void
......
......@@ -436,6 +436,16 @@ static void nfp_flower_clean(struct nfp_app *app)
app->priv = NULL;
}
static int nfp_flower_start(struct nfp_app *app)
{
return nfp_tunnel_config_start(app);
}
static void nfp_flower_stop(struct nfp_app *app)
{
nfp_tunnel_config_stop(app);
}
const struct nfp_app_type app_flower = {
.id = NFP_APP_FLOWER_NIC,
.name = "flower",
......@@ -453,6 +463,9 @@ const struct nfp_app_type app_flower = {
.repr_open = nfp_flower_repr_netdev_open,
.repr_stop = nfp_flower_repr_netdev_stop,
.start = nfp_flower_start,
.stop = nfp_flower_stop,
.ctrl_msg_rx = nfp_flower_cmsg_rx,
.sriov_enable = nfp_flower_sriov_enable,
......
......@@ -58,6 +58,8 @@ struct nfp_app;
#define NFP_FL_MASK_REUSE_TIME_NS 40000
#define NFP_FL_MASK_ID_LOCATION 1
#define NFP_FL_VXLAN_PORT 4789
struct nfp_fl_mask_id {
struct circ_buf mask_id_free_list;
struct timespec64 *last_used;
......@@ -82,6 +84,18 @@ struct nfp_fl_stats_id {
* @flow_table: Hash table used to store flower rules
* @cmsg_work: Workqueue for control messages processing
* @cmsg_skbs: List of skbs for control message processing
* @nfp_mac_off_list: List of MAC addresses to offload
* @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs
* @nfp_ipv4_off_list: List of IPv4 addresses to offload
* @nfp_neigh_off_list: List of neighbour offloads
* @nfp_mac_off_lock: Lock for the MAC address list
* @nfp_mac_index_lock: Lock for the MAC index list
* @nfp_ipv4_off_lock: Lock for the IPv4 address list
* @nfp_neigh_off_lock: Lock for the neighbour address list
* @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs
* @nfp_mac_off_count: Number of MACs in address list
* @nfp_tun_mac_nb: Notifier to monitor link state
* @nfp_tun_neigh_nb: Notifier to monitor neighbour state
*/
struct nfp_flower_priv {
struct nfp_app *app;
......@@ -94,6 +108,18 @@ struct nfp_flower_priv {
DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
struct work_struct cmsg_work;
struct sk_buff_head cmsg_skbs;
struct list_head nfp_mac_off_list;
struct list_head nfp_mac_index_list;
struct list_head nfp_ipv4_off_list;
struct list_head nfp_neigh_off_list;
struct mutex nfp_mac_off_lock;
struct mutex nfp_mac_index_lock;
struct mutex nfp_ipv4_off_lock;
struct mutex nfp_neigh_off_lock;
struct ida nfp_mac_off_ids;
int nfp_mac_off_count;
struct notifier_block nfp_tun_mac_nb;
struct notifier_block nfp_tun_neigh_nb;
};
struct nfp_fl_key_ls {
......@@ -126,6 +152,7 @@ struct nfp_fl_payload {
struct rcu_head rcu;
spinlock_t lock; /* lock stats */
struct nfp_fl_stats stats;
__be32 nfp_tun_ipv4_addr;
char *unmasked_data;
char *mask_data;
char *action_data;
......@@ -163,4 +190,12 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
int nfp_tunnel_config_start(struct nfp_app *app);
void nfp_tunnel_config_stop(struct nfp_app *app);
void nfp_tunnel_write_macs(struct nfp_app *app);
void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
#endif
......@@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
static int
nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
bool mask_version)
bool mask_version, enum nfp_flower_tun_type tun_type)
{
if (mask_version) {
frame->in_port = cpu_to_be32(~0);
return 0;
}
frame->in_port = cpu_to_be32(cmsg_port);
if (tun_type)
frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
else
frame->in_port = cpu_to_be32(cmsg_port);
return 0;
}
......@@ -189,15 +192,56 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
}
}
static void
nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
struct tc_cls_flower_offload *flow,
bool mask_version, __be32 *tun_dst)
{
struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
struct flow_dissector_key_ipv4_addrs *vxlan_ips;
struct flow_dissector_key_keyid *vni;
/* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */
memset(frame, 0, sizeof(struct nfp_flower_vxlan));
if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID)) {
u32 temp_vni;
vni = skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_KEYID,
target);
temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
frame->tun_id = cpu_to_be32(temp_vni);
}
if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
vxlan_ips =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
target);
frame->ip_src = vxlan_ips->src;
frame->ip_dst = vxlan_ips->dst;
*tun_dst = vxlan_ips->dst;
}
}
int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
struct nfp_fl_key_ls *key_ls,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
__be32 tun_dst, tun_dst_mask = 0;
struct nfp_repr *netdev_repr;
int err;
u8 *ext;
u8 *msk;
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN)
tun_type = NFP_FL_TUNNEL_VXLAN;
memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
memset(nfp_flow->mask_data, 0, key_ls->key_size);
......@@ -216,14 +260,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
/* Populate Exact Port data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
nfp_repr_get_port_id(netdev),
false);
false, tun_type);
if (err)
return err;
/* Populate Mask Port Data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
nfp_repr_get_port_id(netdev),
true);
true, tun_type);
if (err)
return err;
......@@ -291,5 +335,28 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
msk += sizeof(struct nfp_flower_ipv6);
}
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) {
/* Populate Exact VXLAN Data. */
nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext,
flow, false, &tun_dst);
/* Populate Mask VXLAN Data. */
nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk,
flow, true, &tun_dst_mask);
ext += sizeof(struct nfp_flower_vxlan);
msk += sizeof(struct nfp_flower_vxlan);
/* Configure tunnel end point MAC. */
if (nfp_netdev_is_nfp_repr(netdev)) {
netdev_repr = netdev_priv(netdev);
nfp_tunnel_write_macs(netdev_repr->app);
/* Store the tunnel destination in the rule data.
* This must be present and be an exact match.
*/
nfp_flow->nfp_tun_ipv4_addr = tun_dst;
nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
}
}
return 0;
}
......@@ -140,7 +140,7 @@ nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
{
unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
struct nfp_fl_stats_frame *stats_frame;
unsigned char *msg;
int i;
......
......@@ -52,8 +52,25 @@
BIT(FLOW_DISSECTOR_KEY_PORTS) | \
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_VLAN) | \
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
BIT(FLOW_DISSECTOR_KEY_IP))
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
static int
nfp_flower_xmit_flow(struct net_device *netdev,
struct nfp_fl_payload *nfp_flow, u8 mtype)
......@@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
return -EOPNOTSUPP;
/* If any tun dissector is used then the required set must be used. */
if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
(flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
!= NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
return -EOPNOTSUPP;
key_layer_two = 0;
key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
key_size = sizeof(struct nfp_flower_meta_one) +
sizeof(struct nfp_flower_in_port) +
sizeof(struct nfp_flower_mac_mpls);
if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
struct flow_dissector_key_ports *mask_enc_ports = NULL;
struct flow_dissector_key_ports *enc_ports = NULL;
struct flow_dissector_key_control *mask_enc_ctl =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
flow->mask);
/* We are expecting a tunnel. For now we ignore offloading. */
if (mask_enc_ctl->addr_type)
struct flow_dissector_key_control *enc_ctl =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
flow->key);
if (mask_enc_ctl->addr_type != 0xffff ||
enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
return -EOPNOTSUPP;
/* These fields are already verified as used. */
mask_ipv4 =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
flow->mask);
if (mask_ipv4->dst != cpu_to_be32(~0))
return -EOPNOTSUPP;
mask_enc_ports =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS,
flow->mask);
enc_ports =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_PORTS,
flow->key);
if (mask_enc_ports->dst != cpu_to_be16(~0) ||
enc_ports->dst != htons(NFP_FL_VXLAN_PORT))
return -EOPNOTSUPP;
key_layer |= NFP_FLOWER_LAYER_VXLAN;
key_size += sizeof(struct nfp_flower_vxlan);
}
if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
......@@ -151,12 +211,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
FLOW_DISSECTOR_KEY_IP,
flow->mask);
key_layer_two = 0;
key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
key_size = sizeof(struct nfp_flower_meta_one) +
sizeof(struct nfp_flower_in_port) +
sizeof(struct nfp_flower_mac_mpls);
if (mask_basic && mask_basic->n_proto) {
/* Ethernet type is present in the key. */
switch (key_basic->n_proto) {
......@@ -252,6 +306,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
if (!flow_pay->action_data)
goto err_free_mask;
flow_pay->nfp_tun_ipv4_addr = 0;
flow_pay->meta.flags = 0;
spin_lock_init(&flow_pay->lock);
......@@ -361,6 +416,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_free_flow;
if (nfp_flow->nfp_tun_ipv4_addr)
nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
err = nfp_flower_xmit_flow(netdev, nfp_flow,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
if (err)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment