Commit 9908aa29 authored by Tariq Toukan's avatar Tariq Toukan Committed by David S. Miller

net/mlx5e: CQE based moderation

In this mode the moderation timer will restart upon
new completion (CQE) generation rather than upon interrupt
generation.

The outcome is that for bursty traffic the period timer will never
expire and thus only the moderation frames counter will dictate
interrupt generation, thus the interrupt rate will be relative
to the incoming packets size.
If the burst seizes for "moderation period" time then an interrupt
will be issued immediately.

CQE based moderation is off by default and can be controlled
via ethtool set_priv_flags.

Performance tested on ConnectX4-Lx 50G.

Less packet loss in netperf UDP and TCP tests, with no bw degradation,
for both single and multi streams, with message sizes of
64, 1024, 1472 and 32768 byte.
Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarAchiad Shochat <achiad@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarGal Pressman <galp@mellanox.com>
Signed-off-by: default avatarGil Rockah <gilr@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4e59e288
...@@ -79,6 +79,7 @@ ...@@ -79,6 +79,7 @@
#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
...@@ -145,11 +146,11 @@ struct mlx5e_umr_wqe { ...@@ -145,11 +146,11 @@ struct mlx5e_umr_wqe {
}; };
static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
"nop", "rx_cqe_moder",
}; };
enum mlx5e_priv_flag { enum mlx5e_priv_flag {
MLX5E_PFLAG_NOP = (1 << 0), MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
}; };
#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \ #define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \
...@@ -165,6 +166,11 @@ enum mlx5e_priv_flag { ...@@ -165,6 +166,11 @@ enum mlx5e_priv_flag {
#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */
#endif #endif
struct mlx5e_cq_moder {
u16 usec;
u16 pkts;
};
struct mlx5e_params { struct mlx5e_params {
u8 log_sq_size; u8 log_sq_size;
u8 rq_wq_type; u8 rq_wq_type;
...@@ -173,12 +179,11 @@ struct mlx5e_params { ...@@ -173,12 +179,11 @@ struct mlx5e_params {
u8 log_rq_size; u8 log_rq_size;
u16 num_channels; u16 num_channels;
u8 num_tc; u8 num_tc;
u8 rx_cq_period_mode;
bool rx_cqe_compress_admin; bool rx_cqe_compress_admin;
bool rx_cqe_compress; bool rx_cqe_compress;
u16 rx_cq_moderation_usec; struct mlx5e_cq_moder rx_cq_moderation;
u16 rx_cq_moderation_pkts; struct mlx5e_cq_moder tx_cq_moderation;
u16 tx_cq_moderation_usec;
u16 tx_cq_moderation_pkts;
u16 min_rx_wqes; u16 min_rx_wqes;
bool lro_en; bool lro_en;
u32 lro_wqe_sz; u32 lro_wqe_sz;
...@@ -667,6 +672,9 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, ...@@ -667,6 +672,9 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
int num_channels); int num_channels);
int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
u8 cq_period_mode);
static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)
{ {
......
...@@ -524,10 +524,10 @@ static int mlx5e_get_coalesce(struct net_device *netdev, ...@@ -524,10 +524,10 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
return -ENOTSUPP; return -ENOTSUPP;
coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec;
coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts;
coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec;
coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts;
return 0; return 0;
} }
...@@ -545,10 +545,11 @@ static int mlx5e_set_coalesce(struct net_device *netdev, ...@@ -545,10 +545,11 @@ static int mlx5e_set_coalesce(struct net_device *netdev,
return -ENOTSUPP; return -ENOTSUPP;
mutex_lock(&priv->state_lock); mutex_lock(&priv->state_lock);
priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs;
priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
goto out; goto out;
...@@ -1279,9 +1280,37 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, ...@@ -1279,9 +1280,37 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable);
static int set_pflag_nop(struct net_device *netdev, bool enable) static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
{ {
return 0; struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
bool rx_mode_changed;
u8 rx_cq_period_mode;
int err = 0;
bool reset;
rx_cq_period_mode = enable ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode;
if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
return -ENOTSUPP;
if (!rx_mode_changed)
return 0;
reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
if (reset)
mlx5e_close_locked(netdev);
mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode);
if (reset)
err = mlx5e_open_locked(netdev);
return err;
} }
static int mlx5e_handle_pflag(struct net_device *netdev, static int mlx5e_handle_pflag(struct net_device *netdev,
...@@ -1315,8 +1344,9 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) ...@@ -1315,8 +1344,9 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
mutex_lock(&priv->state_lock); mutex_lock(&priv->state_lock);
err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_NOP, err = mlx5e_handle_pflag(netdev, pflags,
set_pflag_nop); MLX5E_PFLAG_RX_CQE_BASED_MODER,
set_pflag_rx_cqe_based_moder);
mutex_unlock(&priv->state_lock); mutex_unlock(&priv->state_lock);
return err ? -EINVAL : 0; return err ? -EINVAL : 0;
......
...@@ -55,6 +55,7 @@ struct mlx5e_cq_param { ...@@ -55,6 +55,7 @@ struct mlx5e_cq_param {
u32 cqc[MLX5_ST_SZ_DW(cqc)]; u32 cqc[MLX5_ST_SZ_DW(cqc)];
struct mlx5_wq_param wq; struct mlx5_wq_param wq;
u16 eq_ix; u16 eq_ix;
u8 cq_period_mode;
}; };
struct mlx5e_channel_param { struct mlx5e_channel_param {
...@@ -896,6 +897,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) ...@@ -896,6 +897,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
...@@ -925,8 +927,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq) ...@@ -925,8 +927,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq)
static int mlx5e_open_cq(struct mlx5e_channel *c, static int mlx5e_open_cq(struct mlx5e_channel *c,
struct mlx5e_cq_param *param, struct mlx5e_cq_param *param,
struct mlx5e_cq *cq, struct mlx5e_cq *cq,
u16 moderation_usecs, struct mlx5e_cq_moder moderation)
u16 moderation_frames)
{ {
int err; int err;
struct mlx5e_priv *priv = c->priv; struct mlx5e_priv *priv = c->priv;
...@@ -942,8 +943,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c, ...@@ -942,8 +943,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c,
if (MLX5_CAP_GEN(mdev, cq_moderation)) if (MLX5_CAP_GEN(mdev, cq_moderation))
mlx5_core_modify_cq_moderation(mdev, &cq->mcq, mlx5_core_modify_cq_moderation(mdev, &cq->mcq,
moderation_usecs, moderation.usec,
moderation_frames); moderation.pkts);
return 0; return 0;
err_destroy_cq: err_destroy_cq:
...@@ -972,8 +973,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, ...@@ -972,8 +973,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
for (tc = 0; tc < c->num_tc; tc++) { for (tc = 0; tc < c->num_tc; tc++) {
err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation);
priv->params.tx_cq_moderation_pkts);
if (err) if (err)
goto err_close_tx_cqs; goto err_close_tx_cqs;
} }
...@@ -1110,6 +1110,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -1110,6 +1110,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam, struct mlx5e_channel_param *cparam,
struct mlx5e_channel **cp) struct mlx5e_channel **cp)
{ {
struct mlx5e_cq_moder icosq_cq_moder = {0, 0};
struct net_device *netdev = priv->netdev; struct net_device *netdev = priv->netdev;
int cpu = mlx5e_get_cpu(priv, ix); int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c; struct mlx5e_channel *c;
...@@ -1133,7 +1134,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -1133,7 +1134,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder);
if (err) if (err)
goto err_napi_del; goto err_napi_del;
...@@ -1142,8 +1143,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -1142,8 +1143,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
goto err_close_icosq_cq; goto err_close_icosq_cq;
err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation);
priv->params.rx_cq_moderation_pkts);
if (err) if (err)
goto err_close_tx_cqs; goto err_close_tx_cqs;
...@@ -1308,6 +1308,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, ...@@ -1308,6 +1308,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
} }
mlx5e_build_common_cq_param(priv, param); mlx5e_build_common_cq_param(priv, param);
param->cq_period_mode = priv->params.rx_cq_period_mode;
} }
static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
...@@ -1318,6 +1320,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, ...@@ -1318,6 +1320,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
mlx5e_build_common_cq_param(priv, param); mlx5e_build_common_cq_param(priv, param);
param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
} }
static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
...@@ -1329,6 +1333,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, ...@@ -1329,6 +1333,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
mlx5e_build_common_cq_param(priv, param); mlx5e_build_common_cq_param(priv, param);
param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
} }
static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
...@@ -2856,6 +2862,20 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) ...@@ -2856,6 +2862,20 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
(pci_bw < 40000) && (pci_bw < link_speed)); (pci_bw < 40000) && (pci_bw < link_speed));
} }
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
{
params->rx_cq_period_mode = cq_period_mode;
params->rx_cq_moderation.pkts =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
params->rx_cq_moderation.usec =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
params->rx_cq_moderation.usec =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
}
static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev, struct net_device *netdev,
int num_channels) int num_channels)
...@@ -2908,13 +2928,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, ...@@ -2908,13 +2928,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
BIT(priv->params.log_rq_size)); BIT(priv->params.log_rq_size));
priv->params.rx_cq_moderation_usec =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; mlx5e_set_rx_cq_mode_params(&priv->params,
priv->params.rx_cq_moderation_pkts = MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
priv->params.tx_cq_moderation_usec = priv->params.tx_cq_moderation.usec =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
priv->params.tx_cq_moderation_pkts = priv->params.tx_cq_moderation.pkts =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
priv->params.num_tc = 1; priv->params.num_tc = 1;
...@@ -2929,6 +2949,10 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, ...@@ -2929,6 +2949,10 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
priv->params.lro_wqe_sz = priv->params.lro_wqe_sz =
MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
/* Initialize pflags */
MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
priv->mdev = mdev; priv->mdev = mdev;
priv->netdev = netdev; priv->netdev = netdev;
priv->params.num_channels = num_channels; priv->params.num_channels = num_channels;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment