Commit ea6119aa authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2020-05-15' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2020-05-15

mlx5 core and mlx5e (netdev) updates:

1) Two fixes for release all FW pages support.
2) Improvement in calculating the send queue stop room on tx
3) Flow steering auto-groups creation improvements
4) TC offload fix for Connection tracking with NAT action
5) IPoIB support for self looback to allow communication between ipoib
pkey child interfaces on the same host.
6) DCBNL cleanup to avoid #ifdef DCBNL all over the main mlx5e code
7) Small and trivial code cleanup
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2ea46dc6 3f3ab178
......@@ -36,7 +36,6 @@
#include <linux/etherdevice.h>
#include <linux/timecounter.h>
#include <linux/net_tstamp.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/crash_dump.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/qp.h>
......@@ -53,6 +52,7 @@
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
#include "en/dcbnl.h"
#include "en/fs.h"
#include "lib/hv_vhca.h"
......@@ -69,8 +69,6 @@ struct page_pool;
#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
#define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
#define MLX5E_MAX_PRIORITY 8
#define MLX5E_MAX_DSCP 64
#define MLX5E_MAX_NUM_TC 8
#define MLX5_RX_HEADROOM NET_SKB_PAD
......@@ -243,10 +241,6 @@ enum mlx5e_priv_flag {
#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
#endif
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
......@@ -271,42 +265,6 @@ struct mlx5e_params {
int hard_mtu;
};
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_cee_config {
/* bw pct for priority group */
u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
bool pfc_setting[CEE_DCBX_MAX_PRIO];
bool pfc_enable;
};
enum {
MLX5_DCB_CHG_RESET,
MLX5_DCB_NO_CHG,
MLX5_DCB_CHG_NO_RESET,
};
struct mlx5e_dcbx {
enum mlx5_dcbx_oper_mode mode;
struct mlx5e_cee_config cee_cfg; /* pending configuration */
u8 dscp_app_cnt;
/* The only setting that cannot be read from FW */
u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
u8 cap;
/* Buffer configuration */
bool manual_buffer;
u32 cable_len;
u32 xoff;
};
struct mlx5e_dcbx_dp {
u8 dscp2prio[MLX5E_MAX_DSCP];
u8 trust_state;
};
#endif
enum {
MLX5E_RQ_STATE_ENABLED,
MLX5E_RQ_STATE_RECOVERING,
......@@ -1069,13 +1027,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
}
extern const struct ethtool_ops mlx5e_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
#endif
int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
u32 *in);
......@@ -1083,7 +1034,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
struct mlx5e_tir *tir);
int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
bool enable_mc_lb);
/* common netdev helpers */
void mlx5e_create_q_counters(struct mlx5e_priv *priv);
......
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020 Mellanox Technologies. */
#ifndef __MLX5E_DCBNL_H__
#define __MLX5E_DCBNL_H__
#ifdef CONFIG_MLX5_CORE_EN_DCB
#define MLX5E_MAX_PRIORITY (8)
struct mlx5e_cee_config {
/* bw pct for priority group */
u8 pg_bw_pct[CEE_DCBX_MAX_PGS];
u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO];
bool pfc_setting[CEE_DCBX_MAX_PRIO];
bool pfc_enable;
};
struct mlx5e_dcbx {
enum mlx5_dcbx_oper_mode mode;
struct mlx5e_cee_config cee_cfg; /* pending configuration */
u8 dscp_app_cnt;
/* The only setting that cannot be read from FW */
u8 tc_tsa[IEEE_8021QAZ_MAX_TCS];
u8 cap;
/* Buffer configuration */
bool manual_buffer;
u32 cable_len;
u32 xoff;
};
#define MLX5E_MAX_DSCP (64)
struct mlx5e_dcbx_dp {
u8 dscp2prio[MLX5E_MAX_DSCP];
u8 trust_state;
};
void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev);
void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
#else
static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {}
static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5E_DCBNL_H__ */
......@@ -24,6 +24,7 @@
#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2)
#define MLX5_CT_STATE_NAT_BIT BIT(3)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
......@@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule {
bool nat;
};
struct mlx5_tc_ct_pre {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *flow_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_handle *miss_rule;
struct mlx5_modify_hdr *modify_hdr;
};
struct mlx5_ct_ft {
struct rhash_head node;
u16 zone;
......@@ -68,6 +78,8 @@ struct mlx5_ct_ft {
struct nf_flowtable *nf_ft;
struct mlx5_tc_ct_priv *ct_priv;
struct rhashtable ct_entries_ht;
struct mlx5_tc_ct_pre pre_ct;
struct mlx5_tc_ct_pre pre_ct_nat;
};
struct mlx5_ct_entry {
......@@ -426,6 +438,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_eswitch *esw = ct_priv->esw;
struct mlx5_modify_hdr *mod_hdr;
struct flow_action_entry *meta;
u16 ct_state = 0;
int err;
meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
......@@ -444,11 +457,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
&mod_acts);
if (err)
goto err_mapping;
ct_state |= MLX5_CT_STATE_NAT_BIT;
}
ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
(MLX5_CT_STATE_ESTABLISHED_BIT |
MLX5_CT_STATE_TRK_BIT),
ct_state,
meta->ct_metadata.mark,
meta->ct_metadata.labels[0],
tupleid);
......@@ -791,6 +806,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
return 0;
}
static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct,
bool nat)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
struct mlx5_core_dev *dev = ct_priv->esw->dev;
struct mlx5_flow_table *fdb = pre_ct->fdb;
struct mlx5_flow_destination dest = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
u32 ctstate;
u16 zone;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
mod_hdr = mlx5_modify_header_alloc(dev,
MLX5_FLOW_NAMESPACE_FDB,
pre_mod_acts.num_actions,
pre_mod_acts.actions);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
ct_dbg("Failed to create pre ct mod hdr");
goto err_mapping;
}
pre_ct->modify_hdr = mod_hdr;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
flow_act.modify_hdr = mod_hdr;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
/* add flow rule */
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
zone, MLX5_CT_ZONE_MASK);
ctstate = MLX5_CT_STATE_TRK_BIT;
if (nat)
ctstate |= MLX5_CT_STATE_NAT_BIT;
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
dest.ft = ct_priv->post_ct;
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct flow rule zone %d", zone);
goto err_flow_rule;
}
pre_ct->flow_rule = rule;
/* add miss rule */
memset(spec, 0, sizeof(*spec));
dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
ct_dbg("Failed to add pre ct miss rule zone %d", zone);
goto err_miss_rule;
}
pre_ct->miss_rule = rule;
dealloc_mod_hdr_actions(&pre_mod_acts);
kvfree(spec);
return 0;
err_miss_rule:
mlx5_del_flow_rules(pre_ct->flow_rule);
err_flow_rule:
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
err_mapping:
dealloc_mod_hdr_actions(&pre_mod_acts);
kvfree(spec);
return err;
}
static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5_core_dev *dev = ct_priv->esw->dev;
mlx5_del_flow_rules(pre_ct->flow_rule);
mlx5_del_flow_rules(pre_ct->miss_rule);
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
}
static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct,
bool nat)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
struct mlx5_core_dev *dev = ct_priv->esw->dev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
struct mlx5_flow_table *ft;
struct mlx5_flow_group *g;
u32 metadata_reg_c_2_mask;
u32 *flow_group_in;
void *misc;
int err;
ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
if (!ns) {
err = -EOPNOTSUPP;
ct_dbg("Failed to get FDB flow namespace");
return err;
}
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
ft_attr.prio = FDB_TC_OFFLOAD;
ft_attr.max_fte = 2;
ft_attr.level = 1;
ft = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft)) {
err = PTR_ERR(ft);
ct_dbg("Failed to create pre ct table");
goto out_free;
}
pre_ct->fdb = ft;
/* create flow group */
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
MLX5_MATCH_MISC_PARAMETERS_2);
misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
match_criteria.misc_parameters_2);
metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
if (nat)
metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
metadata_reg_c_2_mask);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
ct_dbg("Failed to create pre ct group");
goto err_flow_grp;
}
pre_ct->flow_grp = g;
/* create miss group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
g = mlx5_create_flow_group(ft, flow_group_in);
if (IS_ERR(g)) {
err = PTR_ERR(g);
ct_dbg("Failed to create pre ct miss group");
goto err_miss_grp;
}
pre_ct->miss_grp = g;
err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
if (err)
goto err_add_rules;
kvfree(flow_group_in);
return 0;
err_add_rules:
mlx5_destroy_flow_group(pre_ct->miss_grp);
err_miss_grp:
mlx5_destroy_flow_group(pre_ct->flow_grp);
err_flow_grp:
mlx5_destroy_flow_table(ft);
out_free:
kvfree(flow_group_in);
return err;
}
static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
struct mlx5_tc_ct_pre *pre_ct)
{
tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
mlx5_destroy_flow_group(pre_ct->miss_grp);
mlx5_destroy_flow_group(pre_ct->flow_grp);
mlx5_destroy_flow_table(pre_ct->fdb);
}
static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
{
int err;
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
if (err)
return err;
err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
if (err)
goto err_pre_ct_nat;
return 0;
err_pre_ct_nat:
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
return err;
}
static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
{
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
}
static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
struct nf_flowtable *nf_ft)
......@@ -813,6 +1060,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
ft->ct_priv = ct_priv;
refcount_set(&ft->refcount, 1);
err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
if (err)
goto err_alloc_pre_ct;
err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
if (err)
goto err_init;
......@@ -834,6 +1085,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
err_insert:
rhashtable_destroy(&ft->ct_entries_ht);
err_init:
mlx5_tc_ct_free_pre_ct_tables(ft);
err_alloc_pre_ct:
kfree(ft);
return ERR_PTR(err);
}
......@@ -859,21 +1112,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry,
ct_priv);
mlx5_tc_ct_free_pre_ct_tables(ft);
kfree(ft);
}
/* We translate the tc filter with CT action to the following HW model:
*
* +-------------------+ +--------------------+ +--------------+
* + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +----->
* + original match + | + tuple + zone match + | + fte_id match + |
* +-------------------+ | +--------------------+ | +--------------+ |
* v v v
* set chain miss mapping set mark original
* set fte_id set label filter
* set zone set established actions
* set tunnel_id do nat (if needed)
* do decap
* +---------------------+
* + fdb prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
* | set fte_id
* | set tunnel_id
* | do decap
* v
* +---------------------+
* + pre_ct/pre_ct_nat + if matches +---------------------+
* + zone+nat match +---------------->+ post_ct (see below) +
* +---------------------+ set zone +---------------------+
* | set zone
* v
* +--------------------+
* + CT (nat or no nat) +
* + tuple + zone match +
* +--------------------+
* | set mark
* | set label
* | set established
* | do nat (if needed)
* v
* +--------------+
* + post_ct + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/
static int
__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
......@@ -951,14 +1223,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
goto err_mapping;
}
err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG,
attr->ct_attr.zone &
MLX5_CT_ZONE_MASK);
if (err) {
ct_dbg("Failed to set zone register mapping");
goto err_mapping;
}
err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
FTEID_TO_REG, fte_id);
if (err) {
......@@ -1018,7 +1282,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
/* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0;
pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
orig_spec,
pre_ct_attr);
......
......@@ -6,25 +6,6 @@
#include "en.h"
#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
MLX5E_SQ_NOPS_ROOM)
#ifndef CONFIG_MLX5_EN_TLS
#define MLX5E_SQ_TLS_ROOM (0)
#else
/* TLS offload requires additional stop_room for:
* - a resync SKB.
* kTLS offload requires fixed additional stop_room for:
* - a static params WQE, and a progress params WQE.
* The additional MTU-depending room for the resync DUMP WQEs
* will be calculated and added in runtime.
*/
#define MLX5E_SQ_TLS_ROOM \
(MLX5_SEND_WQE_MAX_WQEBBS + \
MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
#endif
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type {
......@@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
{
BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
* 2. If the WQE size is X, and the space remaining in a page is less
* than X, this space needs to be padded with NOPs. So, one WQE of
* size X may require up to X-1 WQEBBs of padding, which makes the
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
if (__builtin_constant_p(wqe_size))
BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
else
WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
return wqe_size * 2 - 1;
}
#endif
......@@ -257,8 +257,10 @@ enum {
static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
MLX5E_XDPSQ_STOP_ROOM))) {
stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
......
......@@ -40,8 +40,6 @@
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
......
......@@ -4,6 +4,19 @@
#include "en.h"
#include "en_accel/ktls.h"
u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq)
{
u16 num_dumps, stop_room = 0;
num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS);
stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
return stop_room;
}
static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
......
......@@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq);
static inline u8
mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags,
unsigned int sync_len)
{
/* Given the MTU and sync_len, calculates an upper bound for the
* number of WQEBBs needed for the TX resync DUMP WQEs of a record.
* number of DUMP WQEs needed for the TX resync of a record.
*/
return MLX5E_KTLS_DUMP_WQEBBS *
(nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu);
}
#else
......@@ -122,7 +123,6 @@ static inline void
mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc) {}
#endif
#endif /* __MLX5E_TLS_H__ */
......@@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
kfree(tls);
priv->tls = NULL;
}
u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
{
struct mlx5_core_dev *mdev = sq->channel->mdev;
if (!mlx5_accel_is_tls_device(mdev))
return 0;
if (MLX5_CAP_GEN(mdev, tls_tx))
return mlx5e_ktls_get_stop_room(sq);
/* Resync SKB. */
return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
}
......@@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv);
int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq);
#else
static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
......@@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
{
return 0;
}
#endif
#endif /* __MLX5E_TLS_H__ */
......@@ -141,10 +141,12 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
memset(res, 0, sizeof(*res));
}
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
bool enable_mc_lb)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_tir *tir;
u8 lb_flags = 0;
int err = 0;
u32 tirn = 0;
int inlen;
......@@ -158,8 +160,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
}
if (enable_uc_lb)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
if (enable_mc_lb)
lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
if (lb_flags)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags);
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
......
......@@ -35,6 +35,8 @@
#include "en/port.h"
#include "en/port_buffer.h"
#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
#define MLX5E_100MB (100000)
#define MLX5E_1GB (1000000)
......@@ -49,6 +51,12 @@ enum {
MLX5E_LOWEST_PRIO_GROUP = 0,
};
enum {
MLX5_DCB_CHG_RESET,
MLX5_DCB_NO_CHG,
MLX5_DCB_CHG_NO_RESET,
};
#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \
MLX5_CAP_QCAM_REG(mdev, qpts) && \
MLX5_CAP_QCAM_REG(mdev, qpdpm))
......@@ -238,7 +246,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
* Report both group #0 and #1 as ETS type.
* All the tcs in group #0 will be reported with 0% BW.
*/
int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
......@@ -1009,6 +1017,24 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
.setpfcstate = mlx5e_dcbnl_setpfcstate,
};
void mlx5e_dcbnl_build_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
}
void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
if (MLX5_CAP_GEN(mdev, qos))
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
}
static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
enum mlx5_dcbx_oper_mode *mode)
{
......
......@@ -66,7 +66,6 @@
#include "en/devlink.h"
#include "lib/mlx5.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
......@@ -1122,6 +1121,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
return 0;
}
static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
{
int sq_size = 1 << log_sq_size;
sq->stop_room = mlx5e_tls_get_stop_room(sq);
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) {
netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
sq->stop_room, sq_size);
return -ENOSPC;
}
return 0;
}
static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int txq_ix,
......@@ -1146,20 +1161,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
#ifdef CONFIG_MLX5_EN_TLS
if (mlx5_accel_is_tls_device(c->priv->mdev)) {
if (mlx5_accel_is_tls_device(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
sq->stop_room += MLX5E_SQ_TLS_ROOM +
mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
TLS_MAX_PAYLOAD_SIZE);
}
#endif
err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
if (err)
return err;
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
......@@ -4915,10 +4926,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->netdev_ops = &mlx5e_netdev_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
#endif
mlx5e_dcbnl_build_netdev(netdev);
netdev->watchdog_timeo = 15 * HZ;
......@@ -5206,9 +5214,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
return err;
}
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
#endif
return 0;
}
......@@ -5235,9 +5241,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_init_app(priv);
#endif
queue_work(priv->wq, &priv->set_rx_mode_work);
......@@ -5252,10 +5256,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->netdev->reg_state == NETREG_REGISTERED)
mlx5e_dcbnl_delete_app(priv);
#endif
rtnl_lock();
if (netif_running(priv->netdev))
......@@ -5275,7 +5277,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
{
return mlx5e_refresh_tirs(priv, false);
return mlx5e_refresh_tirs(priv, false, false);
}
static const struct mlx5e_profile mlx5e_nic_profile = {
......@@ -5552,9 +5554,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
mlx5e_devlink_port_type_eth_set(priv);
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_init_app(priv);
#endif
return priv;
err_devlink_port_unregister:
......@@ -5577,9 +5577,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
}
#endif
priv = vpriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
unregister_netdev(priv->netdev);
mlx5e_devlink_port_unregister(priv);
mlx5e_detach(mdev, vpriv);
......
......@@ -1544,10 +1544,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
/* we want a persistent mac for the uplink rep */
mlx5_query_mac_address(mdev, netdev->dev_addr);
netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (MLX5_CAP_GEN(mdev, qos))
netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
#endif
mlx5e_dcbnl_build_rep_netdev(netdev);
} else {
netdev->netdev_ops = &mlx5e_netdev_ops_rep;
eth_hw_addr_random(netdev);
......@@ -1929,10 +1926,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
mlx5_lag_add(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_initialize(priv);
mlx5e_dcbnl_init_app(priv);
#endif
}
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
......@@ -1940,9 +1935,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
mlx5_lag_remove(mdev);
......
......@@ -1489,6 +1489,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
#ifdef CONFIG_MLX5_CORE_IPOIB
#define MLX5_IB_GRH_SGID_OFFSET 8
#define MLX5_IB_GRH_DGID_OFFSET 24
#define MLX5_GID_SIZE 16
......@@ -1502,6 +1503,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
struct net_device *netdev;
struct mlx5e_priv *priv;
char *pseudo_header;
u32 flags_rqpn;
u32 qpn;
u8 *dgid;
u8 g;
......@@ -1523,7 +1525,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
tstamp = &priv->tstamp;
stats = &priv->channel_stats[rq->ix].rq;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
g = (flags_rqpn >> 28) & 3;
dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
if ((!g) || dgid[0] != 0xff)
skb->pkt_type = PACKET_HOST;
......@@ -1532,9 +1535,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
else
skb->pkt_type = PACKET_MULTICAST;
/* TODO: IB/ipoib: Allow mcast packets from other VFs
* 68996a6e760e5c74654723eeb57bf65628ae87f4
/* Drop packets that this interface sent, ie multicast packets
* that the HCA has replicated.
*/
if (g && (qpn == (flags_rqpn & 0xffffff)) &&
(memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
MLX5_GID_SIZE) == 0)) {
skb->dev = NULL;
return;
}
skb_pull(skb, MLX5_IB_GRH_BYTES);
......
......@@ -234,7 +234,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
return err;
}
err = mlx5e_refresh_tirs(priv, true);
err = mlx5e_refresh_tirs(priv, true, false);
if (err)
goto out;
......@@ -263,7 +263,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
mlx5_nic_vport_update_local_lb(priv->mdev, false);
dev_remove_pack(&lbtp->pt);
mlx5e_refresh_tirs(priv, false);
mlx5e_refresh_tirs(priv, false, false);
}
#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
......
......@@ -1755,11 +1755,13 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
list_for_each_entry(iter, match_head, list) {
g = iter->g;
if (!g->node.active)
continue;
nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
if (!g->node.active) {
up_write_ref_node(&g->node, false);
continue;
}
err = insert_fte(g, fte);
if (err) {
up_write_ref_node(&g->node, false);
......
......@@ -262,6 +262,11 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn)
mlx5_cmd_exec_in(mdev, destroy_qp, in);
}
int mlx5i_update_nic_rx(struct mlx5e_priv *priv)
{
return mlx5e_refresh_tirs(priv, true, true);
}
int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
......@@ -456,7 +461,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.cleanup_rx = mlx5i_cleanup_rx,
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
.update_rx = mlx5e_update_nic_rx,
.update_rx = mlx5i_update_nic_rx,
.update_stats = NULL, /* mlx5i_update_stats */
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
......
......@@ -92,6 +92,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
void *ppriv);
void mlx5i_cleanup(struct mlx5e_priv *priv);
int mlx5i_update_nic_rx(struct mlx5e_priv *priv);
/* Get child interface nic profile */
const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
......
......@@ -347,7 +347,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.cleanup_rx = mlx5i_pkey_cleanup_rx,
.enable = NULL,
.disable = NULL,
.update_rx = mlx5e_update_nic_rx,
.update_rx = mlx5i_update_nic_rx,
.update_stats = NULL,
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
......
......@@ -32,6 +32,7 @@
#include <linux/clocksource.h>
#include <linux/highmem.h>
#include <linux/ptp_clock_kernel.h>
#include <rdma/mlx5-abi.h>
#include "lib/eq.h"
#include "en.h"
......@@ -66,6 +67,26 @@ enum {
MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
};
static u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
if (timer_h != timer_h1) {
/* wrap around */
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
}
return (u64)timer_l | (u64)timer_h1 << 32;
}
static u64 read_internal_timer(const struct cyclecounter *cc)
{
struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
......
......@@ -672,26 +672,6 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
return mlx5_cmd_exec_in(dev, disable_hca, in);
}
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
struct ptp_system_timestamp *sts)
{
u32 timer_h, timer_h1, timer_l;
timer_h = ioread32be(&dev->iseg->internal_timer_h);
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
if (timer_h != timer_h1) {
/* wrap around */
ptp_read_system_prets(sts);
timer_l = ioread32be(&dev->iseg->internal_timer_l);
ptp_read_system_postts(sts);
}
return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
......@@ -1217,10 +1197,9 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
mlx5_register_device(dev);
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
out:
mutex_unlock(&dev->intf_state_mutex);
return err;
mutex_unlock(&dev->intf_state_mutex);
return 0;
err_devlink_reg:
mlx5_unload(dev);
......@@ -1230,17 +1209,15 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
function_teardown:
mlx5_function_teardown(dev, boot);
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
out:
mutex_unlock(&dev->intf_state_mutex);
return err;
}
void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
{
if (cleanup) {
if (cleanup)
mlx5_unregister_device(dev);
mlx5_drain_health_wq(dev);
}
mutex_lock(&dev->intf_state_mutex);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
......@@ -1383,6 +1360,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_crdump_disable(dev);
mlx5_devlink_unregister(devlink);
mlx5_drain_health_wq(dev);
mlx5_unload_one(dev, true);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
......
......@@ -38,7 +38,6 @@
#include <linux/sched.h>
#include <linux/if_link.h>
#include <linux/firmware.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/driver.h>
......@@ -141,8 +140,6 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
u32 element_id);
int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
struct ptp_system_timestamp *sts);
void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
void mlx5_cmd_flush(struct mlx5_core_dev *dev);
......
......@@ -156,15 +156,21 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
return err;
}
static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
{
struct fw_page *fp;
struct fw_page *fp = NULL;
struct fw_page *iter;
unsigned n;
if (list_empty(&dev->priv.free_list))
list_for_each_entry(iter, &dev->priv.free_list, list) {
if (iter->func_id != func_id)
continue;
fp = iter;
}
if (list_empty(&dev->priv.free_list) || !fp)
return -ENOMEM;
fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
if (n >= MLX5_NUM_4K_IN_PAGE) {
mlx5_core_warn(dev, "alloc 4k bug\n");
......@@ -182,35 +188,35 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp)
static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
bool in_free_list)
{
int n = (fwp->addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root);
if (fwp->free_count != 1)
if (in_free_list)
list_del(&fwp->list);
dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page);
kfree(fwp);
} else if (fwp->free_count == 1) {
list_add(&fwp->list, &dev->priv.free_list);
}
}
static void free_addr(struct mlx5_core_dev *dev, u64 addr)
static void free_4k(struct mlx5_core_dev *dev, u64 addr)
{
struct fw_page *fwp;
int n;
fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
if (!fwp) {
mlx5_core_warn_rl(dev, "page not found\n");
return;
}
free_fwp(dev, fwp);
n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE)
free_fwp(dev, fwp, fwp->free_count != 1);
else if (fwp->free_count == 1)
list_add(&fwp->list, &dev->priv.free_list);
}
static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
......@@ -295,7 +301,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
for (i = 0; i < npages; i++) {
retry:
err = alloc_4k(dev, &addr);
err = alloc_4k(dev, &addr, func_id);
if (err) {
if (err == -ENOMEM)
err = alloc_system_page(dev, func_id);
......@@ -334,7 +340,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
out_4k:
for (i--; i >= 0; i--)
free_addr(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
out_free:
kvfree(in);
if (notify_fail)
......@@ -355,8 +361,8 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
p = rb_next(p);
if (fwp->func_id != func_id)
continue;
free_fwp(dev, fwp);
npages++;
npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
free_fwp(dev, fwp, fwp->free_count);
}
dev->priv.fw_pages -= npages;
......@@ -440,7 +446,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
}
for (i = 0; i < num_claimed; i++)
free_addr(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
if (nclaimed)
*nclaimed = num_claimed;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment