Commit bf3347c4 authored by David S. Miller's avatar David S. Miller
parents 93e61613 b8ce9037
......@@ -3570,7 +3570,8 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
misc_parameters_2);
MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
mlx5_eswitch_get_vport_metadata_mask());
} else {
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
misc_parameters);
......
......@@ -34,7 +34,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o diag/en_tc_tracepoint.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2018 Mellanox Technologies */
#include <linux/jhash.h>
#include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/hashtable.h>
#include "mapping.h"
#define MAPPING_GRACE_PERIOD 2000
struct mapping_ctx {
struct xarray xarray;
DECLARE_HASHTABLE(ht, 8);
struct mutex lock; /* Guards hashtable and xarray */
unsigned long max_id;
size_t data_size;
bool delayed_removal;
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
};
struct mapping_item {
struct rcu_head rcu;
struct list_head list;
unsigned long timeout;
struct hlist_node node;
int cnt;
u32 id;
char data[];
};
int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id)
{
struct mapping_item *mi;
int err = -ENOMEM;
u32 hash_key;
mutex_lock(&ctx->lock);
hash_key = jhash(data, ctx->data_size, 0);
hash_for_each_possible(ctx->ht, mi, node, hash_key) {
if (!memcmp(data, mi->data, ctx->data_size))
goto attach;
}
mi = kzalloc(sizeof(*mi) + ctx->data_size, GFP_KERNEL);
if (!mi)
goto err_alloc;
memcpy(mi->data, data, ctx->data_size);
hash_add(ctx->ht, &mi->node, hash_key);
err = xa_alloc(&ctx->xarray, &mi->id, mi, XA_LIMIT(1, ctx->max_id),
GFP_KERNEL);
if (err)
goto err_assign;
attach:
++mi->cnt;
*id = mi->id;
mutex_unlock(&ctx->lock);
return 0;
err_assign:
hash_del(&mi->node);
kfree(mi);
err_alloc:
mutex_unlock(&ctx->lock);
return err;
}
static void mapping_remove_and_free(struct mapping_ctx *ctx,
struct mapping_item *mi)
{
xa_erase(&ctx->xarray, mi->id);
kfree_rcu(mi, rcu);
}
static void mapping_free_item(struct mapping_ctx *ctx,
struct mapping_item *mi)
{
if (!ctx->delayed_removal) {
mapping_remove_and_free(ctx, mi);
return;
}
mi->timeout = jiffies + msecs_to_jiffies(MAPPING_GRACE_PERIOD);
spin_lock(&ctx->pending_list_lock);
list_add_tail(&mi->list, &ctx->pending_list);
spin_unlock(&ctx->pending_list_lock);
schedule_delayed_work(&ctx->dwork, MAPPING_GRACE_PERIOD);
}
int mapping_remove(struct mapping_ctx *ctx, u32 id)
{
unsigned long index = id;
struct mapping_item *mi;
int err = -ENOENT;
mutex_lock(&ctx->lock);
mi = xa_load(&ctx->xarray, index);
if (!mi)
goto out;
err = 0;
if (--mi->cnt > 0)
goto out;
hash_del(&mi->node);
mapping_free_item(ctx, mi);
out:
mutex_unlock(&ctx->lock);
return err;
}
int mapping_find(struct mapping_ctx *ctx, u32 id, void *data)
{
unsigned long index = id;
struct mapping_item *mi;
int err = -ENOENT;
rcu_read_lock();
mi = xa_load(&ctx->xarray, index);
if (!mi)
goto err_find;
memcpy(data, mi->data, ctx->data_size);
err = 0;
err_find:
rcu_read_unlock();
return err;
}
static void
mapping_remove_and_free_list(struct mapping_ctx *ctx, struct list_head *list)
{
struct mapping_item *mi;
list_for_each_entry(mi, list, list)
mapping_remove_and_free(ctx, mi);
}
static void mapping_work_handler(struct work_struct *work)
{
unsigned long min_timeout = 0, now = jiffies;
struct mapping_item *mi, *next;
LIST_HEAD(pending_items);
struct mapping_ctx *ctx;
ctx = container_of(work, struct mapping_ctx, dwork.work);
spin_lock(&ctx->pending_list_lock);
list_for_each_entry_safe(mi, next, &ctx->pending_list, list) {
if (time_after(now, mi->timeout))
list_move(&mi->list, &pending_items);
else if (!min_timeout ||
time_before(mi->timeout, min_timeout))
min_timeout = mi->timeout;
}
spin_unlock(&ctx->pending_list_lock);
mapping_remove_and_free_list(ctx, &pending_items);
if (min_timeout)
schedule_delayed_work(&ctx->dwork, abs(min_timeout - now));
}
static void mapping_flush_work(struct mapping_ctx *ctx)
{
if (!ctx->delayed_removal)
return;
cancel_delayed_work_sync(&ctx->dwork);
mapping_remove_and_free_list(ctx, &ctx->pending_list);
}
struct mapping_ctx *
mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
{
struct mapping_ctx *ctx;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
ctx->max_id = max_id ? max_id : UINT_MAX;
ctx->data_size = data_size;
if (delayed_removal) {
INIT_DELAYED_WORK(&ctx->dwork, mapping_work_handler);
INIT_LIST_HEAD(&ctx->pending_list);
spin_lock_init(&ctx->pending_list_lock);
ctx->delayed_removal = true;
}
mutex_init(&ctx->lock);
xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
return ctx;
}
void mapping_destroy(struct mapping_ctx *ctx)
{
mapping_flush_work(ctx);
xa_destroy(&ctx->xarray);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies */
#ifndef __MLX5_MAPPING_H__
#define __MLX5_MAPPING_H__
struct mapping_ctx;
int mapping_add(struct mapping_ctx *ctx, void *data, u32 *id);
int mapping_remove(struct mapping_ctx *ctx, u32 id);
int mapping_find(struct mapping_ctx *ctx, u32 id, void *data);
/* mapping uses an xarray to map data to ids in add(), and for find().
* For locking, it uses a internal xarray spin lock for add()/remove(),
* find() uses rcu_read_lock().
* Choosing delayed_removal postpones the removal of a previously mapped
* id by MAPPING_GRACE_PERIOD milliseconds.
* This is to avoid races against hardware, where we mark the packet in
* hardware with a previous id, and quick remove() and add() reusing the same
* previous id. Then find() will get the new mapping instead of the old
* which was used to mark the packet.
*/
struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
bool delayed_removal);
void mapping_destroy(struct mapping_ctx *ctx);
#endif /* __MLX5_MAPPING_H__ */
......@@ -469,10 +469,15 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level)
u8 *match_level)
{
struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
outer_headers);
struct netlink_ext_ack *extack = f->common.extack;
int err = 0;
if (!tunnel) {
......@@ -499,6 +504,109 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
goto out;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_match_control match;
u16 addr_type;
flow_rule_match_enc_control(rule, &match);
addr_type = match.key->addr_type;
/* For tunnel addr_type used same key id`s as for non-tunnel */
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
struct flow_match_ipv4_addrs match;
flow_rule_match_enc_ipv4_addrs(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4,
ntohl(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.mask->dst));
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
ntohl(match.key->dst));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IP);
} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
struct flow_match_ipv6_addrs match;
flow_rule_match_enc_ipv6_addrs(rule, &match);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
ipv6));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
ethertype);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
ETH_P_IPV6);
}
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
struct flow_match_ip match;
flow_rule_match_enc_ip(rule, &match);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
match.mask->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
match.key->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
match.key->tos >> 2);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
match.mask->ttl);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
match.key->ttl);
if (match.mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB
(priv->mdev,
ft_field_support.outer_ipv4_ttl)) {
NL_SET_ERR_MSG_MOD(extack,
"Matching on TTL is not supported");
err = -EOPNOTSUPP;
goto out;
}
}
/* Enforce DMAC when offloading incoming tunneled flows.
* Flow counters require a match on the DMAC.
*/
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16), priv->netdev->dev_addr);
/* let software handle IP fragments */
MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
return 0;
out:
return err;
}
......
......@@ -76,8 +76,7 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level);
u8 *match_level);
int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
......
......@@ -1952,7 +1952,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
.update_rx = mlx5e_update_rep_rx,
.update_stats = mlx5e_update_ndo_stats,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
.max_tc = 1,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_rep_stats_grps,
......@@ -1972,7 +1972,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
.update_stats = mlx5e_update_ndo_stats,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep,
.max_tc = MLX5E_MAX_NUM_TC,
.rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR),
.stats_grps = mlx5e_ul_rep_stats_grps,
......
......@@ -81,6 +81,11 @@ struct mlx5_rep_uplink_priv {
struct mutex unready_flows_lock;
struct list_head unready_flows;
struct work_struct reoffload_flows_work;
/* maps tun_info to a unique id*/
struct mapping_ctx *tunnel_mapping;
/* maps tun_enc_opts to a unique id*/
struct mapping_ctx *tunnel_enc_opts_mapping;
};
struct mlx5e_rep_priv {
......@@ -192,6 +197,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe);
int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e);
......
......@@ -1195,6 +1195,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
struct mlx5e_wqe_frag_info *wi;
struct sk_buff *skb;
......@@ -1227,13 +1228,78 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
if (rep->vlan && skb_vlan_tag_present(skb))
skb_vlan_pop(skb);
if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
goto free_wqe;
napi_gro_receive(rq->cq.napi, skb);
mlx5_tc_rep_post_napi_receive(&tc_priv);
free_wqe:
mlx5e_free_rx_wqe(rq, wi, true);
wq_cyc_pop:
mlx5_wq_cyc_pop(wq);
}
void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
struct mlx5_cqe64 *cqe)
{
u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe);
u16 wqe_id = be16_to_cpu(cqe->wqe_id);
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
u16 stride_ix = mpwrq_get_cqe_stride_index(cqe);
u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz;
u32 head_offset = wqe_offset & (PAGE_SIZE - 1);
u32 page_idx = wqe_offset >> PAGE_SHIFT;
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5e_rx_wqe_ll *wqe;
struct mlx5_wq_ll *wq;
struct sk_buff *skb;
u16 cqe_bcnt;
wi->consumed_strides += cstrides;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
rq->stats->wqe_err++;
goto mpwrq_cqe_out;
}
if (unlikely(mpwrq_is_filler_cqe(cqe))) {
struct mlx5e_rq_stats *stats = rq->stats;
stats->mpwqe_filler_cqes++;
stats->mpwqe_filler_strides += cstrides;
goto mpwrq_cqe_out;
}
cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
mlx5e_skb_from_cqe_mpwrq_linear,
mlx5e_skb_from_cqe_mpwrq_nonlinear,
rq, wi, cqe_bcnt, head_offset, page_idx);
if (!skb)
goto mpwrq_cqe_out;
mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
goto mpwrq_cqe_out;
napi_gro_receive(rq->cq.napi, skb);
mlx5_tc_rep_post_napi_receive(&tc_priv);
mpwrq_cqe_out:
if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
return;
wq = &rq->mpwqe.wq;
wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
mlx5e_free_rx_mpwqe(rq, wi, true);
mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
}
#endif
struct sk_buff *
......
......@@ -91,9 +91,54 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
enum mlx5e_tc_attr_to_reg {
CHAIN_TO_REG,
TUNNEL_TO_REG,
};
struct mlx5e_tc_attr_to_reg_mapping {
int mfield; /* rewrite field */
int moffset; /* offset of mfield */
int mlen; /* bytes to rewrite/match */
int soffset; /* offset of spec for match */
};
extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
struct net_device *out_dev);
struct mlx5e_tc_update_priv {
struct net_device *tun_dev;
};
bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv);
void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
struct mlx5e_tc_mod_hdr_acts {
int num_actions;
int max_actions;
void *actions;
};
int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
enum mlx5e_tc_attr_to_reg type,
u32 data);
void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
enum mlx5e_tc_attr_to_reg type,
u32 data,
u32 mask);
int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
int namespace,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
......
......@@ -197,6 +197,10 @@ struct mlx5_eswitch_fdb {
};
struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads_restore;
struct mlx5_flow_group *restore_group;
struct mlx5_modify_hdr *restore_copy_hdr_id;
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
......@@ -636,6 +640,11 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw);
void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
u32
esw_get_max_restore_tag(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
......@@ -651,6 +660,12 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
static struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
return ERR_PTR(-EOPNOTSUPP);
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
......@@ -6,14 +6,17 @@
#include <linux/mlx5/fs.h>
#include "eswitch_offloads_chains.h"
#include "en/mapping.h"
#include "mlx5_core.h"
#include "fs_core.h"
#include "eswitch.h"
#include "en.h"
#include "en_tc.h"
#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv)
#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock)
#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht)
#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping)
#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht)
#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left)
#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb)
......@@ -43,6 +46,7 @@ struct mlx5_esw_chains_priv {
struct mutex lock;
struct mlx5_flow_table *tc_end_fdb;
struct mapping_ctx *chains_mapping;
int fdb_left[ARRAY_SIZE(ESW_POOLS)];
};
......@@ -53,9 +57,12 @@ struct fdb_chain {
u32 chain;
int ref;
int id;
struct mlx5_eswitch *esw;
struct list_head prios_list;
struct mlx5_flow_handle *restore_rule;
struct mlx5_modify_hdr *miss_modify_hdr;
};
struct fdb_prio_key {
......@@ -261,6 +268,70 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
mlx5_destroy_flow_table(fdb);
}
static int
create_fdb_chain_restore(struct fdb_chain *fdb_chain)
{
char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)];
struct mlx5_eswitch *esw = fdb_chain->esw;
struct mlx5_modify_hdr *mod_hdr;
u32 index;
int err;
if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw))
return 0;
err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index);
if (err)
return err;
if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
/* we got the special default flow tag id, so we won't know
* if we actually marked the packet with the restore rule
* we create.
*
* This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
*/
err = mapping_add(esw_chains_mapping(esw),
&fdb_chain->chain, &index);
mapping_remove(esw_chains_mapping(esw),
MLX5_FS_DEFAULT_FLOW_TAG);
if (err)
return err;
}
fdb_chain->id = index;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, modact, field,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield);
MLX5_SET(set_action_in, modact, offset,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8);
MLX5_SET(set_action_in, modact, length,
mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8);
MLX5_SET(set_action_in, modact, data, fdb_chain->id);
mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB,
1, modact);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
goto err_mod_hdr;
}
fdb_chain->miss_modify_hdr = mod_hdr;
fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id);
if (IS_ERR(fdb_chain->restore_rule)) {
err = PTR_ERR(fdb_chain->restore_rule);
goto err_rule;
}
return 0;
err_rule:
mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr);
err_mod_hdr:
/* Datapath can't find this mapping, so we can safely remove it */
mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
return err;
}
static struct fdb_chain *
mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
{
......@@ -275,6 +346,10 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
fdb_chain->chain = chain;
INIT_LIST_HEAD(&fdb_chain->prios_list);
err = create_fdb_chain_restore(fdb_chain);
if (err)
goto err_restore;
err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
if (err)
......@@ -283,6 +358,12 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
return fdb_chain;
err_insert:
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
mlx5_del_flow_rules(fdb_chain->restore_rule);
mlx5_modify_header_dealloc(esw->dev,
fdb_chain->miss_modify_hdr);
}
err_restore:
kvfree(fdb_chain);
return ERR_PTR(err);
}
......@@ -294,6 +375,15 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain)
rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node,
chain_params);
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
mlx5_del_flow_rules(fdb_chain->restore_rule);
mlx5_modify_header_dealloc(esw->dev,
fdb_chain->miss_modify_hdr);
mapping_remove(esw_chains_mapping(esw), fdb_chain->id);
}
kvfree(fdb_chain);
}
......@@ -316,10 +406,12 @@ mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain)
}
static struct mlx5_flow_handle *
mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain,
struct mlx5_flow_table *fdb,
struct mlx5_flow_table *next_fdb)
{
static const struct mlx5_flow_spec spec = {};
struct mlx5_eswitch *esw = fdb_chain->esw;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act act = {};
......@@ -328,6 +420,11 @@ mlx5_esw_chains_add_miss_rule(struct mlx5_flow_table *fdb,
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = next_fdb;
if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) {
act.modify_hdr = fdb_chain->miss_modify_hdr;
act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
}
return mlx5_add_flow_rules(fdb, &spec, &act, &dest, 1);
}
......@@ -351,7 +448,8 @@ mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio,
list_for_each_entry_continue_reverse(pos,
&fdb_chain->prios_list,
list) {
miss_rules[n] = mlx5_esw_chains_add_miss_rule(pos->fdb,
miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain,
pos->fdb,
next_fdb);
if (IS_ERR(miss_rules[n])) {
err = PTR_ERR(miss_rules[n]);
......@@ -465,7 +563,7 @@ mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw,
}
/* Add miss rule to next_fdb */
miss_rule = mlx5_esw_chains_add_miss_rule(fdb, next_fdb);
miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb);
if (IS_ERR(miss_rule)) {
err = PTR_ERR(miss_rule);
goto err_miss_rule;
......@@ -630,6 +728,7 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw)
struct mlx5_esw_chains_priv *chains_priv;
struct mlx5_core_dev *dev = esw->dev;
u32 max_flow_counter, fdb_max;
struct mapping_ctx *mapping;
int err;
chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
......@@ -666,10 +765,20 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw)
if (err)
goto init_prios_ht_err;
mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw),
true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto mapping_err;
}
esw_chains_mapping(esw) = mapping;
mutex_init(&esw_chains_lock(esw));
return 0;
mapping_err:
rhashtable_destroy(&esw_prios_ht(esw));
init_prios_ht_err:
rhashtable_destroy(&esw_chains_ht(esw));
init_chains_ht_err:
......@@ -681,6 +790,7 @@ static void
mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw)
{
mutex_destroy(&esw_chains_lock(esw));
mapping_destroy(esw_chains_mapping(esw));
rhashtable_destroy(&esw_prios_ht(esw));
rhashtable_destroy(&esw_chains_ht(esw));
......@@ -759,3 +869,17 @@ mlx5_esw_chains_destroy(struct mlx5_eswitch *esw)
mlx5_esw_chains_close(esw);
mlx5_esw_chains_cleanup(esw);
}
int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag,
u32 *chain)
{
int err;
err = mapping_find(esw_chains_mapping(esw), tag, chain);
if (err) {
esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag);
return -ENOENT;
}
return 0;
}
......@@ -28,5 +28,7 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw);
int mlx5_esw_chains_create(struct mlx5_eswitch *esw);
void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
#endif /* __ML5_ESW_CHAINS_H__ */
int
mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
#endif /* __ML5_ESW_CHAINS_H__ */
......@@ -111,8 +111,8 @@
#define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1)
#define OFFLOADS_MAX_FT 2
#define OFFLOADS_NUM_PRIOS 1
#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
#define OFFLOADS_NUM_PRIOS 2
#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + OFFLOADS_NUM_PRIOS)
#define LAG_PRIO_NUM_LEVELS 1
#define LAG_NUM_PRIOS 1
......
......@@ -71,7 +71,28 @@ enum devlink_eswitch_encap_mode
mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
/* Reg C0 usage:
* Reg C0 = < ESW_VHCA_ID_BITS(8) | ESW_VPORT BITS(8) | ESW_CHAIN_TAG(16) >
*
* Highest 8 bits of the reg c0 is the vhca_id, next 8 bits is vport_num,
* the rest (lowest 16 bits) is left for tc chain tag restoration.
* VHCA_ID + VPORT comprise the SOURCE_PORT matching.
*/
#define ESW_VHCA_ID_BITS 8
#define ESW_VPORT_BITS 8
#define ESW_SOURCE_PORT_METADATA_BITS (ESW_VHCA_ID_BITS + ESW_VPORT_BITS)
#define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\
0)
static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
{
return GENMASK(31, 32 - ESW_SOURCE_PORT_METADATA_BITS);
}
u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
u16 vport_num);
u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
......@@ -94,11 +115,17 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
};
static inline u32
mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
int vport_num)
{
return 0;
};
static inline u32
mlx5_eswitch_get_vport_metadata_mask(void)
{
return 0;
}
#endif /* CONFIG_MLX5_ESWITCH */
#endif
......@@ -72,6 +72,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp, struct tcf_result *res,
bool compat_mode);
#else
static inline bool tcf_block_shared(struct tcf_block *block)
......@@ -133,6 +137,15 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
return TC_ACT_UNSPEC;
}
static inline int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
return TC_ACT_UNSPEC;
}
#endif
static inline unsigned long
......
......@@ -1269,6 +1269,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
*/
struct mini_Qdisc {
struct tcf_proto *filter_list;
struct tcf_block *block;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
struct rcu_head rcu;
......@@ -1295,6 +1296,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq);
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block);
static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
{
......
......@@ -4848,7 +4848,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
&cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
......
......@@ -22,6 +22,7 @@
#include <linux/idr.h>
#include <linux/rhashtable.h>
#include <linux/jhash.h>
#include <linux/rculist.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
......@@ -354,7 +355,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
list_add_tail(&chain->list, &block->chain_list);
list_add_tail_rcu(&chain->list, &block->chain_list);
mutex_init(&chain->filter_chain_lock);
chain->block = block;
chain->index = chain_index;
......@@ -394,7 +395,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
ASSERT_BLOCK_LOCKED(block);
list_del(&chain->list);
list_del_rcu(&chain->list);
if (!chain->index)
block->chain0.chain = NULL;
......@@ -453,6 +454,20 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
return NULL;
}
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
u32 chain_index)
{
struct tcf_chain *chain;
list_for_each_entry_rcu(chain, &block->chain_list, list) {
if (chain->index == chain_index)
return chain;
}
return NULL;
}
#endif
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
u32 seq, u16 flags, int event, bool unicast);
......@@ -1559,12 +1574,15 @@ static int tcf_block_setup(struct tcf_block *block,
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
*/
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
static inline int __tcf_classify(struct sk_buff *skb,
const struct tcf_proto *tp,
const struct tcf_proto *orig_tp,
struct tcf_result *res,
bool compat_mode,
u32 *last_executed_chain)
{
#ifdef CONFIG_NET_CLS_ACT
const int max_reclassify_loop = 4;
const struct tcf_proto *orig_tp = tp;
const struct tcf_proto *first_tp;
int limit = 0;
......@@ -1582,21 +1600,11 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
#ifdef CONFIG_NET_CLS_ACT
if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
first_tp = orig_tp;
*last_executed_chain = first_tp->chain->index;
goto reset;
} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
first_tp = res->goto_tp;
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
{
struct tc_skb_ext *ext;
ext = skb_ext_add(skb, TC_SKB_EXT);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = err & TC_ACT_EXT_VAL_MASK;
}
#endif
*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
goto reset;
}
#endif
......@@ -1619,8 +1627,64 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
goto reclassify;
#endif
}
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
u32 last_executed_chain = 0;
return __tcf_classify(skb, tp, tp, res, compat_mode,
&last_executed_chain);
}
EXPORT_SYMBOL(tcf_classify);
int tcf_classify_ingress(struct sk_buff *skb,
const struct tcf_block *ingress_block,
const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
return __tcf_classify(skb, tp, tp, res, compat_mode,
&last_executed_chain);
#else
u32 last_executed_chain = tp ? tp->chain->index : 0;
const struct tcf_proto *orig_tp = tp;
struct tc_skb_ext *ext;
int ret;
ext = skb_ext_find(skb, TC_SKB_EXT);
if (ext && ext->chain) {
struct tcf_chain *fchain;
fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
if (!fchain)
return TC_ACT_SHOT;
/* Consume, so cloned/redirect skbs won't inherit ext */
skb_ext_del(skb, TC_SKB_EXT);
tp = rcu_dereference_bh(fchain->filter_chain);
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
&last_executed_chain);
/* If we missed on some chain */
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
ext = skb_ext_add(skb, TC_SKB_EXT);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
}
return ret;
#endif
}
EXPORT_SYMBOL(tcf_classify_ingress);
struct tcf_chain_info {
struct tcf_proto __rcu **pprev;
struct tcf_proto __rcu *next;
......
......@@ -1391,6 +1391,14 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
struct tcf_block *block)
{
miniqp->miniq1.block = block;
miniqp->miniq2.block = block;
}
EXPORT_SYMBOL(mini_qdisc_pair_block_init);
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq)
{
......
......@@ -78,6 +78,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
net_inc_ingress_queue();
......@@ -87,7 +88,13 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
q->block_info.chain_head_change = clsact_chain_head_change;
q->block_info.chain_head_change_priv = &q->miniqp;
return tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
err = tcf_block_get_ext(&q->block, sch, &q->block_info, extack);
if (err)
return err;
mini_qdisc_pair_block_init(&q->miniqp, q->block);
return 0;
}
static void ingress_destroy(struct Qdisc *sch)
......@@ -226,6 +233,8 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
if (err)
return err;
mini_qdisc_pair_block_init(&q->miniqp_ingress, q->ingress_block);
mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
q->egress_block_info.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment