Commit edd8b295 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'

Leon Romanovsky says:

====================
mlx5 IPsec packet offload support in eswitch mode

This series from Jianbo adds mlx5 IPsec packet offload support in eswitch
offloaded mode.

It works exactly like "regular" IPsec, nothing special, except
now users can switch to switchdev before adding IPsec rules.

 devlink dev eswitch set pci/0000:06:00.0 mode switchdev

Same configurations as here:

https://lore.kernel.org/netdev/cover.1670005543.git.leonro@nvidia.com/

Packet offload mode:
  ip xfrm state offload packet dev <if-name> dir <in|out>
  ip xfrm policy .... offload packet dev <if-name>
Crypto offload mode:
  ip xfrm state offload crypto dev <if-name> dir <in|out>
or (backward compatibility)
  ip xfrm state offload dev <if-name> dir <in|out>

v0: https://lore.kernel.org/all/cover.1689064922.git.leonro@nvidia.com
====================

Link: https://lore.kernel.org/r/cover.1690802064.git.leon@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 30ff01ee c8e350e6
......@@ -75,6 +75,10 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
ifneq ($(CONFIG_MLX5_EN_IPSEC),)
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/ipsec_fs.o
endif
mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge_debugfs.o \
en/rep/bridge.o
......
......@@ -715,9 +715,20 @@ void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
uplink_priv = &uplink_rpriv->uplink_priv;
ct_priv = uplink_priv->ct_priv;
if (!mlx5_ipsec_is_rx_flow(cqe) &&
!mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, zone_restore_id, tunnel_id,
&tc_priv))
#ifdef CONFIG_MLX5_EN_IPSEC
if (!(tunnel_id >> ESW_TUN_OPTS_BITS)) {
u32 mapped_id;
u32 metadata;
mapped_id = tunnel_id & ESW_IPSEC_RX_MAPPED_ID_MASK;
if (mapped_id &&
!mlx5_esw_ipsec_rx_make_metadata(priv, mapped_id, &metadata))
mlx5e_ipsec_offload_handle_rx_skb(priv->netdev, skb, metadata);
}
#endif
if (!mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv,
zone_restore_id, tunnel_id, &tc_priv))
goto free_skb;
forward:
......
......@@ -40,6 +40,7 @@
#include "en.h"
#include "ipsec.h"
#include "ipsec_rxtx.h"
#include "en_rep.h"
#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
#define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
......@@ -858,6 +859,7 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv)
goto clear_aso;
}
ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
ret = mlx5e_accel_ipsec_fs_init(ipsec);
if (ret)
goto err_fs_init;
......
......@@ -143,7 +143,7 @@ struct mlx5e_ipsec_sw_stats {
atomic64_t ipsec_tx_drop_trailer;
};
struct mlx5e_ipsec_rx;
struct mlx5e_ipsec_fc;
struct mlx5e_ipsec_tx;
struct mlx5e_ipsec_work {
......@@ -169,6 +169,58 @@ struct mlx5e_ipsec_aso {
spinlock_t lock;
};
struct mlx5e_ipsec_rx_create_attr {
struct mlx5_flow_namespace *ns;
struct mlx5_ttc_table *ttc;
u32 family;
int prio;
int pol_level;
int sa_level;
int status_level;
enum mlx5_flow_namespace_type chains_ns;
};
struct mlx5e_ipsec_ft {
struct mutex mutex; /* Protect changes to this struct */
struct mlx5_flow_table *pol;
struct mlx5_flow_table *sa;
struct mlx5_flow_table *status;
u32 refcnt;
};
struct mlx5e_ipsec_rule {
struct mlx5_flow_handle *rule;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_fc *fc;
};
struct mlx5e_ipsec_miss {
struct mlx5_flow_group *group;
struct mlx5_flow_handle *rule;
};
struct mlx5e_ipsec_rx {
struct mlx5e_ipsec_ft ft;
struct mlx5e_ipsec_miss pol;
struct mlx5e_ipsec_miss sa;
struct mlx5e_ipsec_rule status;
struct mlx5e_ipsec_miss status_drop;
struct mlx5_fc *status_drop_cnt;
struct mlx5e_ipsec_fc *fc;
struct mlx5_fs_chains *chains;
u8 allow_tunnel_mode : 1;
struct xarray ipsec_obj_id_map;
};
struct mlx5e_ipsec_tx_create_attr {
int prio;
int pol_level;
int sa_level;
int cnt_level;
enum mlx5_flow_namespace_type chains_ns;
};
struct mlx5e_ipsec {
struct mlx5_core_dev *mdev;
struct xarray sadb;
......@@ -178,11 +230,14 @@ struct mlx5e_ipsec {
struct mlx5e_flow_steering *fs;
struct mlx5e_ipsec_rx *rx_ipv4;
struct mlx5e_ipsec_rx *rx_ipv6;
struct mlx5e_ipsec_rx *rx_esw;
struct mlx5e_ipsec_tx *tx;
struct mlx5e_ipsec_tx *tx_esw;
struct mlx5e_ipsec_aso *aso;
struct notifier_block nb;
struct notifier_block netevent_nb;
struct mlx5_ipsec_fs *roce;
u8 is_uplink_rep: 1;
};
struct mlx5e_ipsec_esn_state {
......@@ -191,13 +246,6 @@ struct mlx5e_ipsec_esn_state {
u8 overlap: 1;
};
struct mlx5e_ipsec_rule {
struct mlx5_flow_handle *rule;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_pkt_reformat *pkt_reformat;
struct mlx5_fc *fc;
};
struct mlx5e_ipsec_limits {
u64 round;
u8 soft_limit_hit : 1;
......@@ -217,6 +265,7 @@ struct mlx5e_ipsec_sa_entry {
struct mlx5e_ipsec_work *work;
struct mlx5e_ipsec_dwork *dwork;
struct mlx5e_ipsec_limits limits;
u32 rx_mapped_id;
};
struct mlx5_accel_pol_xfrm_attrs {
......
......@@ -45,8 +45,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev)
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap))
caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD;
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level))
if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) &&
MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) ||
MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
caps |= MLX5_IPSEC_CAP_PRIO;
if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev,
......
......@@ -37,6 +37,7 @@
#include "ipsec.h"
#include "ipsec_rxtx.h"
#include "en.h"
#include "esw/ipsec_fs.h"
enum {
MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
......@@ -309,9 +310,8 @@ enum {
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe)
u32 ipsec_meta_data)
{
u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_ipsec *ipsec = priv->ipsec;
struct mlx5e_ipsec_sa_entry *sa_entry;
......@@ -356,3 +356,24 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_syndrome);
}
}
int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata)
{
struct mlx5e_ipsec *ipsec = priv->ipsec;
u32 ipsec_obj_id;
int err;
if (!ipsec || !ipsec->is_uplink_rep)
return -EINVAL;
err = mlx5_esw_ipsec_rx_ipsec_obj_id_search(priv, id, &ipsec_obj_id);
if (err) {
atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
return err;
}
*metadata = MLX5_IPSEC_METADATA_CREATE(ipsec_obj_id,
MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED);
return 0;
}
......@@ -43,6 +43,7 @@
#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1)
#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0))
#define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0))
#define MLX5_IPSEC_METADATA_CREATE(id, syndrome) ((id) | ((syndrome) << 24))
struct mlx5e_accel_tx_ipsec_state {
struct xfrm_offload *xo;
......@@ -66,7 +67,8 @@ void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
struct mlx5_wqe_inline_seg *inlseg);
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe);
u32 ipsec_meta_data);
int mlx5_esw_ipsec_rx_make_metadata(struct mlx5e_priv *priv, u32 id, u32 *metadata);
static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
return ipsec_st->tailen;
......@@ -145,7 +147,7 @@ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
static inline
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe)
u32 ipsec_meta_data)
{}
static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg)
......
......@@ -1338,6 +1338,7 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
&MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest),
#ifdef CONFIG_MLX5_EN_IPSEC
&MLX5E_STATS_GRP(ipsec_hw),
&MLX5E_STATS_GRP(ipsec_sw),
#endif
&MLX5E_STATS_GRP(ptp),
......
......@@ -1543,7 +1543,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt);
if (unlikely(mlx5_ipsec_is_rx_flow(cqe)))
mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe);
mlx5e_ipsec_offload_handle_rx_skb(netdev, skb,
be32_to_cpu(cqe->ft_metadata));
if (unlikely(mlx5e_macsec_is_rx_flow(cqe)))
mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe);
......
......@@ -4606,6 +4606,46 @@ static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
}
/* As IPsec and TC order is not aligned between software and hardware-offload,
* either IPsec offload or TC offload, not both, is allowed for a specific interface.
*/
static bool is_tc_ipsec_order_check_needed(struct net_device *filter, struct mlx5e_priv *priv)
{
if (!IS_ENABLED(CONFIG_MLX5_EN_IPSEC))
return false;
if (filter != priv->netdev)
return false;
if (mlx5e_eswitch_vf_rep(priv->netdev))
return false;
return true;
}
static int mlx5e_tc_block_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
if (!is_tc_ipsec_order_check_needed(filter, priv))
return 0;
if (mdev->num_block_tc)
return -EBUSY;
mdev->num_block_ipsec++;
return 0;
}
static void mlx5e_tc_unblock_ipsec_offload(struct net_device *filter, struct mlx5e_priv *priv)
{
if (!is_tc_ipsec_order_check_needed(filter, priv))
return;
priv->mdev->num_block_ipsec--;
}
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct flow_cls_offload *f, unsigned long flags)
{
......@@ -4618,6 +4658,10 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
if (!mlx5_esw_hold(priv->mdev))
return -EBUSY;
err = mlx5e_tc_block_ipsec_offload(dev, priv);
if (err)
goto esw_release;
mlx5_esw_get(priv->mdev);
rcu_read_lock();
......@@ -4663,7 +4707,9 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
err_free:
mlx5e_flow_put(priv, flow);
out:
mlx5e_tc_unblock_ipsec_offload(dev, priv);
mlx5_esw_put(priv->mdev);
esw_release:
mlx5_esw_release(priv->mdev);
return err;
}
......@@ -4704,6 +4750,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
trace_mlx5e_delete_flower(f);
mlx5e_flow_put(priv, flow);
mlx5e_tc_unblock_ipsec_offload(dev, priv);
mlx5_esw_put(priv->mdev);
return 0;
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#include "fs_core.h"
#include "eswitch.h"
#include "en_accel/ipsec.h"
#include "esw/ipsec_fs.h"
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
#include "en/tc_priv.h"
#endif
enum {
MLX5_ESW_IPSEC_RX_POL_FT_LEVEL,
MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL,
MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL,
};
enum {
MLX5_ESW_IPSEC_TX_POL_FT_LEVEL,
MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL,
MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL,
};
static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx)
{
mlx5_del_flow_rules(rx->status_drop.rule);
mlx5_destroy_flow_group(rx->status_drop.group);
mlx5_fc_destroy(ipsec->mdev, rx->status_drop_cnt);
}
static void esw_ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx)
{
mlx5_del_flow_rules(rx->status.rule);
mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0);
}
static int esw_ipsec_rx_status_drop_create(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table *ft = rx->ft.status;
struct mlx5_core_dev *mdev = ipsec->mdev;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_fc *flow_counter;
struct mlx5_flow_spec *spec;
struct mlx5_flow_group *g;
u32 *flow_group_in;
int err = 0;
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!flow_group_in || !spec) {
err = -ENOMEM;
goto err_out;
}
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
mlx5_core_err(mdev,
"Failed to add ipsec rx status drop flow group, err=%d\n", err);
goto err_out;
}
flow_counter = mlx5_fc_create(mdev, false);
if (IS_ERR(flow_counter)) {
err = PTR_ERR(flow_counter);
mlx5_core_err(mdev,
"Failed to add ipsec rx status drop rule counter, err=%d\n", err);
goto err_cnt;
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT;
dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest.counter_id = mlx5_fc_id(flow_counter);
spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_err(mdev,
"Failed to add ipsec rx status drop rule, err=%d\n", err);
goto err_rule;
}
rx->status_drop.group = g;
rx->status_drop.rule = rule;
rx->status_drop_cnt = flow_counter;
kvfree(flow_group_in);
kvfree(spec);
return 0;
err_rule:
mlx5_fc_destroy(mdev, flow_counter);
err_cnt:
mlx5_destroy_flow_group(g);
err_out:
kvfree(flow_group_in);
kvfree(spec);
return err;
}
static int esw_ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx,
struct mlx5_flow_destination *dest)
{
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
misc_parameters_2.ipsec_syndrome);
MLX5_SET(fte_match_param, spec->match_value,
misc_parameters_2.ipsec_syndrome, 0);
spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
flow_act.flags = FLOW_ACT_NO_APPEND;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_warn(ipsec->mdev,
"Failed to add ipsec rx status pass rule, err=%d\n", err);
goto err_rule;
}
rx->status.rule = rule;
kvfree(spec);
return 0;
err_rule:
kvfree(spec);
return err;
}
void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx)
{
esw_ipsec_rx_status_pass_destroy(ipsec, rx);
esw_ipsec_rx_status_drop_destroy(ipsec, rx);
}
int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx,
struct mlx5_flow_destination *dest)
{
int err;
err = esw_ipsec_rx_status_drop_create(ipsec, rx);
if (err)
return err;
err = esw_ipsec_rx_status_pass_create(ipsec, rx, dest);
if (err)
goto err_pass_create;
return 0;
err_pass_create:
esw_ipsec_rx_status_drop_destroy(ipsec, rx);
return err;
}
void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx_create_attr *attr)
{
attr->prio = FDB_CRYPTO_INGRESS;
attr->pol_level = MLX5_ESW_IPSEC_RX_POL_FT_LEVEL;
attr->sa_level = MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL;
attr->status_level = MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL;
attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB;
}
int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination *dest)
{
dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest->ft = mlx5_chains_get_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0);
return 0;
}
int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_flow_act *flow_act)
{
u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
struct mlx5_core_dev *mdev = ipsec->mdev;
struct mlx5_modify_hdr *modify_hdr;
u32 mapped_id;
int err;
err = xa_alloc_bh(&ipsec->rx_esw->ipsec_obj_id_map, &mapped_id,
xa_mk_value(sa_entry->ipsec_obj_id),
XA_LIMIT(1, ESW_IPSEC_RX_MAPPED_ID_MASK), 0);
if (err)
return err;
/* reuse tunnel bits for ipsec,
* tun_id is always 0 and tun_opts is mapped to ipsec_obj_id.
*/
MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
MLX5_SET(set_action_in, action, field,
MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
MLX5_SET(set_action_in, action, offset, ESW_ZONE_ID_BITS);
MLX5_SET(set_action_in, action, length,
ESW_TUN_ID_BITS + ESW_TUN_OPTS_BITS);
MLX5_SET(set_action_in, action, data, mapped_id);
modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
1, action);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_header_alloc;
}
sa_entry->rx_mapped_id = mapped_id;
flow_act->modify_hdr = modify_hdr;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
return 0;
err_header_alloc:
xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map, mapped_id);
return err;
}
void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry)
{
struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
if (sa_entry->rx_mapped_id)
xa_erase_bh(&ipsec->rx_esw->ipsec_obj_id_map,
sa_entry->rx_mapped_id);
}
int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id,
u32 *ipsec_obj_id)
{
struct mlx5e_ipsec *ipsec = priv->ipsec;
void *val;
val = xa_load(&ipsec->rx_esw->ipsec_obj_id_map, id);
if (!val)
return -ENOENT;
*ipsec_obj_id = xa_to_value(val);
return 0;
}
void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_tx_create_attr *attr)
{
attr->prio = FDB_CRYPTO_EGRESS;
attr->pol_level = MLX5_ESW_IPSEC_TX_POL_FT_LEVEL;
attr->sa_level = MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL;
attr->cnt_level = MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL;
attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB;
}
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
static int mlx5_esw_ipsec_modify_flow_dests(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow)
{
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_attr *attr;
int err;
attr = flow->attr;
esw_attr = attr->esw_attr;
if (esw_attr->out_count - esw_attr->split_count > 1)
return 0;
err = mlx5_eswitch_restore_ipsec_rule(esw, flow->rule[0], esw_attr,
esw_attr->out_count - 1);
return err;
}
#endif
void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev)
{
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
struct mlx5_eswitch *esw = mdev->priv.eswitch;
struct mlx5_eswitch_rep *rep;
struct mlx5e_rep_priv *rpriv;
struct rhashtable_iter iter;
struct mlx5e_tc_flow *flow;
unsigned long i;
int err;
xa_for_each(&esw->offloads.vport_reps, i, rep) {
rpriv = rep->rep_data[REP_ETH].priv;
if (!rpriv || !rpriv->netdev)
continue;
rhashtable_walk_enter(&rpriv->tc_ht, &iter);
rhashtable_walk_start(&iter);
while ((flow = rhashtable_walk_next(&iter)) != NULL) {
if (IS_ERR(flow))
continue;
err = mlx5_esw_ipsec_modify_flow_dests(esw, flow);
if (err)
mlx5_core_warn_once(mdev,
"Faided to modify flow dests for IPsec");
}
rhashtable_walk_stop(&iter);
rhashtable_walk_exit(&iter);
}
#endif
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef __MLX5_ESW_IPSEC_FS_H__
#define __MLX5_ESW_IPSEC_FS_H__
struct mlx5e_ipsec;
struct mlx5e_ipsec_sa_entry;
#ifdef CONFIG_MLX5_ESWITCH
void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx);
int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx,
struct mlx5_flow_destination *dest);
void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx_create_attr *attr);
int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination *dest);
int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_flow_act *flow_act);
void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry);
int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id,
u32 *ipsec_obj_id);
void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_tx_create_attr *attr);
void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev);
#else
static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx) {}
static inline int mlx5_esw_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx *rx,
struct mlx5_flow_destination *dest)
{
return -EINVAL;
}
static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_rx_create_attr *attr) {}
static inline int mlx5_esw_ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec,
struct mlx5_flow_destination *dest)
{
return -EINVAL;
}
static inline int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry,
struct mlx5_flow_act *flow_act)
{
return -EINVAL;
}
static inline void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) {}
static inline int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id,
u32 *ipsec_obj_id)
{
return -EINVAL;
}
static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
struct mlx5e_ipsec_tx_create_attr *attr) {}
static inline void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) {}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESW_IPSEC_FS_H__ */
......@@ -254,6 +254,7 @@ struct mlx5_esw_offload {
struct mlx5_flow_group *vport_rx_group;
struct mlx5_flow_group *vport_rx_drop_group;
struct mlx5_flow_handle *vport_rx_drop_rule;
struct mlx5_flow_table *ft_ipsec_tx_pol;
struct xarray vport_reps;
struct list_head peer_flows[MLX5_MAX_PORTS];
struct mutex peer_mutex;
......@@ -269,6 +270,7 @@ struct mlx5_esw_offload {
u8 inline_mode;
atomic64_t num_flows;
u64 num_block_encap;
u64 num_block_mode;
enum devlink_eswitch_encap_mode encap;
struct ida vport_metadata_ida;
unsigned int host_number; /* ECPF supports one external host */
......@@ -783,6 +785,11 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev);
void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err);
void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev);
void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev);
static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
{
if (mlx5_esw_allowed(esw))
......@@ -804,6 +811,8 @@ mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw)
return esw->fdb_table.offloads.slow_fdb;
}
int mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *esw_attr, int attr_idx);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
......@@ -862,6 +871,14 @@ static inline bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev)
static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
{
}
static inline int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err) {}
static inline void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev) {}
static inline void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev) {}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
......@@ -424,8 +424,49 @@ esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 l
mlx5_chains_put_table(chains, chain, prio, level);
}
static bool esw_same_vhca_id(struct mlx5_core_dev *mdev1, struct mlx5_core_dev *mdev2)
{
return MLX5_CAP_GEN(mdev1, vhca_id) == MLX5_CAP_GEN(mdev2, vhca_id);
}
static bool esw_setup_uplink_fwd_ipsec_needed(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *esw_attr,
int attr_idx)
{
if (esw->offloads.ft_ipsec_tx_pol &&
esw_attr->dests[attr_idx].rep &&
esw_attr->dests[attr_idx].rep->vport == MLX5_VPORT_UPLINK &&
/* To be aligned with software, encryption is needed only for tunnel device */
(esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) &&
esw_attr->dests[attr_idx].rep != esw_attr->in_rep &&
esw_same_vhca_id(esw_attr->dests[attr_idx].mdev, esw->dev))
return true;
return false;
}
static bool esw_flow_dests_fwd_ipsec_check(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *esw_attr)
{
int i;
if (!esw->offloads.ft_ipsec_tx_pol)
return true;
for (i = 0; i < esw_attr->split_count; i++)
if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i))
return false;
for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, i) &&
(esw_attr->out_count - esw_attr->split_count > 1))
return false;
return true;
}
static void
esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
esw_setup_dest_fwd_vport(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
int attr_idx, int dest_idx, bool pkt_reformat)
{
......@@ -449,6 +490,33 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
}
}
static void
esw_setup_dest_fwd_ipsec(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
int attr_idx, int dest_idx, bool pkt_reformat)
{
dest[dest_idx].ft = esw->offloads.ft_ipsec_tx_pol;
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
if (pkt_reformat &&
esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
}
}
static void
esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
int attr_idx, int dest_idx, bool pkt_reformat)
{
if (esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx))
esw_setup_dest_fwd_ipsec(dest, flow_act, esw, esw_attr,
attr_idx, dest_idx, pkt_reformat);
else
esw_setup_dest_fwd_vport(dest, flow_act, esw, esw_attr,
attr_idx, dest_idx, pkt_reformat);
}
static int
esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act,
struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr,
......@@ -575,6 +643,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
return ERR_PTR(-EOPNOTSUPP);
if (!esw_flow_dests_fwd_ipsec_check(esw, esw_attr))
return ERR_PTR(-EOPNOTSUPP);
dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
if (!dest)
return ERR_PTR(-ENOMEM);
......@@ -884,6 +955,17 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
} else {
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport.num = rep->vport;
dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) &&
rep->vport == MLX5_VPORT_UPLINK)
spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
......@@ -3507,6 +3589,69 @@ static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink)
return net_eq(devl_net, netdev_net);
}
int mlx5_eswitch_block_mode_trylock(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
int err;
devl_lock(devlink);
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw)) {
/* Failure means no eswitch => not possible to change eswitch mode */
devl_unlock(devlink);
return 0;
}
err = mlx5_esw_try_lock(esw);
if (err < 0) {
devl_unlock(devlink);
return err;
}
return 0;
}
void mlx5_eswitch_block_mode_unlock(struct mlx5_core_dev *dev, int err)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return;
if (!err)
esw->offloads.num_block_mode++;
mlx5_esw_unlock(esw);
devl_unlock(devlink);
}
void mlx5_eswitch_unblock_mode_lock(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return;
down_write(&esw->mode_lock);
}
void mlx5_eswitch_unblock_mode_unlock(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
struct mlx5_eswitch *esw;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return;
esw->offloads.num_block_mode--;
up_write(&esw->mode_lock);
}
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
......@@ -3540,6 +3685,13 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (cur_mlx5_mode == mlx5_mode)
goto unlock;
if (esw->offloads.num_block_mode) {
NL_SET_ERR_MSG_MOD(extack,
"Can't change eswitch mode when IPsec SA and/or policies are configured");
err = -EOPNOTSUPP;
goto unlock;
}
mlx5_eswitch_disable_locked(esw);
if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
if (mlx5_devlink_trap_get_num_active(esw->dev)) {
......@@ -4293,3 +4445,19 @@ int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable,
mutex_unlock(&esw->state_lock);
return err;
}
int
mlx5_eswitch_restore_ipsec_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *esw_attr, int attr_idx)
{
struct mlx5_flow_destination new_dest = {};
struct mlx5_flow_destination old_dest = {};
if (!esw_setup_uplink_fwd_ipsec_needed(esw, esw_attr, attr_idx))
return 0;
esw_setup_dest_fwd_ipsec(&old_dest, NULL, esw, esw_attr, attr_idx, 0, false);
esw_setup_dest_fwd_vport(&new_dest, NULL, esw, esw_attr, attr_idx, 0, false);
return mlx5_modify_rule_destination(rule, &new_dest, &old_dest);
}
......@@ -1066,7 +1066,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
}
for (i = 0; i < handle->num_rules; i++) {
if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
if (mlx5_flow_dests_cmp(old_dest, &handle->rule[i]->dest_attr))
return _mlx5_modify_rule_destination(handle->rule[i],
new_dest);
}
......@@ -2987,6 +2987,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
if (err)
goto out_err;
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_INGRESS, 3);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
err = create_fdb_fast_path(steering);
if (err)
goto out_err;
......@@ -3009,6 +3015,12 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
goto out_err;
}
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_EGRESS, 3);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
/* We put this priority last, knowing that nothing will get here
* unless explicitly forwarded to. This is possible because the
* slow path tables have catch all rules and nothing gets passed
......
......@@ -806,6 +806,8 @@ struct mlx5_core_dev {
u32 vsc_addr;
struct mlx5_hv_vhca *hv_vhca;
struct mlx5_thermal *thermal;
u64 num_block_tc;
u64 num_block_ipsec;
};
struct mlx5_db {
......
......@@ -144,6 +144,9 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
ESW_TUN_OPTS_OFFSET + 1)
/* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */
#define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0)
u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
......
......@@ -109,11 +109,13 @@ enum mlx5_flow_namespace_type {
enum {
FDB_BYPASS_PATH,
FDB_CRYPTO_INGRESS,
FDB_TC_OFFLOAD,
FDB_FT_OFFLOAD,
FDB_TC_MISS,
FDB_BR_OFFLOAD,
FDB_SLOW_PATH,
FDB_CRYPTO_EGRESS,
FDB_PER_VPORT,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment