Commit 6a47a570 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-next'

Saeed Mahameed says:

====================
Mellanox 100G mlx5 CQE compression

Introducing ConnectX-4 CQE (Completion Queue Entry) compression feature
for mlx5 etherent driver.

CQE Compressing reduces PCI overhead by coalescing and compressing multiple CQEs into a
single merged CQE.  Successful compressing improves message rate especially for small packet
traffic.

CQE Compressing in details:

Instead of writing full CQEs to memory, multiple almost identical CQEs are merged and compressed.
Information that is shared between the CQEs is written once, regardless of the number of
compressed CQEs.  In addition, only the unique information (small amount of bytes compared to
full CQE size) is written per CQE.

CQE Compression Block:

This block contains multiple compressed CQEs.  CQE Compression Block contains a single copy
of CQEs properties which are shared between all the compressed CQEs (called Title, see below)
and multiple mini CQEs (CQEs in compressed form).

Title:

The Title holds information which is shared between all the compressed CQEs in the CQE Compression
Block.  In each Compression Block there is only a single Title regardless of the number
of compressed CQEs.

Mini CQE:

A CQE in compressed form that holds some data needed to extract a single full CQE, for example
8 Bytes instead of 64 Bytes.
The shared information between all compressed CQEs, which belong to the same CQE Compression
Block called Title, is written once, and only the unique information in each compressed
CQE, for example 8 bytes, is written per compressed CQE, called mini CQE.

Since CQE Compression can add overhead to the software (CPU),
it will be only enabled on "weak/slow" PCI slots, where it can actually help.

Applied on top: c047c3b1 ('netfilter: conntrack: remove uninitialized shadow variable')
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c1869d58 b797a684
......@@ -64,12 +64,9 @@
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6
#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */
#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */
#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\
MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */
#define MLX5_MPWRQ_LOG_WQE_SZ 17
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
......@@ -154,9 +151,13 @@ struct mlx5e_umr_wqe {
struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
u8 mpwqe_log_stride_sz;
u8 mpwqe_log_num_strides;
u8 log_rq_size;
u16 num_channels;
u8 num_tc;
bool rx_cqe_compress_admin;
bool rx_cqe_compress;
u16 rx_cq_moderation_usec;
u16 rx_cq_moderation_pkts;
u16 tx_cq_moderation_usec;
......@@ -202,6 +203,13 @@ struct mlx5e_cq {
struct mlx5e_channel *channel;
struct mlx5e_priv *priv;
/* cqe decompression */
struct mlx5_cqe64 title;
struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
u8 mini_arr_idx;
u16 decmprs_left;
u16 decmprs_wqe_counter;
/* control */
struct mlx5_wq_ctrl wq_ctrl;
} ____cacheline_aligned_in_smp;
......@@ -240,6 +248,8 @@ struct mlx5e_rq {
/* control */
struct mlx5_wq_ctrl wq_ctrl;
u8 wq_type;
u32 mpwqe_stride_sz;
u32 mpwqe_num_strides;
u32 rqn;
struct mlx5e_channel *channel;
struct mlx5e_priv *priv;
......@@ -263,7 +273,7 @@ struct mlx5e_mpw_info {
void (*dma_pre_sync)(struct device *pdev,
struct mlx5e_mpw_info *wi,
u32 wqe_offset, u32 len);
void (*add_skb_frag)(struct device *pdev,
void (*add_skb_frag)(struct mlx5e_rq *rq,
struct sk_buff *skb,
struct mlx5e_mpw_info *wi,
u32 page_idx, u32 frag_offset, u32 len);
......@@ -616,6 +626,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val);
int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
......@@ -634,6 +645,7 @@ int mlx5e_close_locked(struct net_device *netdev);
void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
u32 *indirection_rqt, int len,
int num_channels);
int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)
......
......@@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
/* RX HW timestamp */
switch (config.rx_filter) {
case HWTSTAMP_FILTER_NONE:
/* Reset CQE compression to Admin default */
mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
break;
case HWTSTAMP_FILTER_ALL:
case HWTSTAMP_FILTER_SOME:
......@@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
/* Disable CQE compression */
mlx5e_modify_rx_cqe_compression(priv, false);
config.rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
......
......@@ -613,6 +613,25 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
return 0;
}
int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
u32 max_speed = 0;
u32 proto_cap;
int err;
int i;
err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
if (err)
return err;
for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
if (proto_cap & MLX5E_PROT_MASK(i))
max_speed = max(max_speed, ptys2ethtool_table[i].speed);
*speed = max_speed;
return 0;
}
static void get_speed_duplex(struct net_device *netdev,
u32 eth_proto_oper,
struct ethtool_cmd *cmd)
......
......@@ -114,6 +114,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
s->rx_mpwqe_frag += rq_stats->mpwqe_frag;
s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
for (j = 0; j < priv->params.num_tc; j++) {
sq_stats = &priv->channel[i]->sq[j].stats;
......@@ -305,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
byte_count = rq->wqe_sz;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
......@@ -1128,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
MLX5_SET(wq, wq, log_wqe_num_of_strides,
MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
priv->params.mpwqe_log_num_strides - 9);
MLX5_SET(wq, wq, log_wqe_stride_size,
MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
priv->params.mpwqe_log_stride_sz - 6);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
......@@ -1197,13 +1201,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
log_cq_size = priv->params.log_rq_size +
MLX5_MPWRQ_LOG_NUM_STRIDES;
priv->params.mpwqe_log_num_strides;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
log_cq_size = priv->params.log_rq_size;
}
MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
if (priv->params.rx_cqe_compress) {
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
}
mlx5e_build_common_cq_param(priv, param);
}
......@@ -2708,11 +2716,49 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
MLX5_CAP_ETH(mdev, reg_umr_sq);
}
static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
{
enum pcie_link_width width;
enum pci_bus_speed speed;
int err = 0;
err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
if (err)
return err;
if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
return -EINVAL;
switch (speed) {
case PCIE_SPEED_2_5GT:
*pci_bw = 2500 * width;
break;
case PCIE_SPEED_5_0GT:
*pci_bw = 5000 * width;
break;
case PCIE_SPEED_8_0GT:
*pci_bw = 8000 * width;
break;
default:
return -EINVAL;
}
return 0;
}
static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
{
return (link_speed && pci_bw &&
(pci_bw < 40000) && (pci_bw < link_speed));
}
static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
int num_channels)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
u32 link_speed = 0;
u32 pci_bw = 0;
priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
......@@ -2720,15 +2766,42 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_LINKED_LIST;
/* set CQE compression */
priv->params.rx_cqe_compress_admin = false;
if (MLX5_CAP_GEN(mdev, cqe_compression) &&
MLX5_CAP_GEN(mdev, vport_group_manager)) {
mlx5e_get_max_linkspeed(mdev, &link_speed);
mlx5e_get_pci_bw(mdev, &pci_bw);
mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
priv->params.rx_cqe_compress_admin =
cqe_compress_heuristic(link_speed, pci_bw);
}
priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
switch (priv->params.rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
priv->params.mpwqe_log_stride_sz =
priv->params.rx_cqe_compress ?
MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
MLX5_MPWRQ_LOG_STRIDE_SIZE;
priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
priv->params.mpwqe_log_stride_sz;
priv->params.lro_en = true;
break;
default: /* MLX5_WQ_TYPE_LINKED_LIST */
priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
}
mlx5_core_info(mdev,
"MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
BIT(priv->params.log_rq_size),
BIT(priv->params.mpwqe_log_stride_sz),
priv->params.rx_cqe_compress_admin);
priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
BIT(priv->params.log_rq_size));
priv->params.rx_cq_moderation_usec =
......
......@@ -72,6 +72,8 @@ struct mlx5e_sw_stats {
u64 rx_mpwqe_filler;
u64 rx_mpwqe_frag;
u64 rx_buff_alloc_err;
u64 rx_cqe_compress_blks;
u64 rx_cqe_compress_pkts;
/* Special handling counters */
u64 link_down_events;
......@@ -101,6 +103,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
};
......@@ -283,6 +287,8 @@ struct mlx5e_rq_stats {
u64 mpwqe_filler;
u64 mpwqe_frag;
u64 buff_alloc_err;
u64 cqe_compress_blks;
u64 cqe_compress_pkts;
};
static const struct counter_desc rq_stats_desc[] = {
......@@ -297,6 +303,8 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
};
struct mlx5e_sq_stats {
......
......@@ -685,6 +685,40 @@ struct mlx5_cqe64 {
u8 op_own;
};
struct mlx5_mini_cqe8 {
union {
__be32 rx_hash_result;
struct {
__be16 checksum;
__be16 rsvd;
};
struct {
__be16 wqe_counter;
u8 s_wqe_opcode;
u8 reserved;
} s_wqe_info;
};
__be32 byte_cnt;
};
enum {
MLX5_NO_INLINE_DATA,
MLX5_INLINE_DATA32_SEG,
MLX5_INLINE_DATA64_SEG,
MLX5_COMPRESSED,
};
enum {
MLX5_CQE_FORMAT_CSUM = 0x1,
};
#define MLX5_MINI_CQE_ARRAY_SIZE 8
static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
{
return (cqe->op_own >> 2) & 0x3;
}
static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
{
return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment