Commit 907676b1 authored by Yunsheng Lin's avatar Yunsheng Lin Committed by David S. Miller

net: hns3: use tx bounce buffer for small packets

when the packet or frag size is small, it causes both security and
performance issue. As dma can't map sub-page, this means some extra
kernel data is visible to devices. On the other hand, the overhead
of dma map and unmap is huge when IOMMU is on.

So add a queue based tx shared bounce buffer to memcpy the small
packet when the len of the xmitted skb is below tx_copybreak.
Add tx_spare_buf_size module param to set the size of tx spare
buffer, and add set/get_tunable to set or query the tx_copybreak.

The throughtput improves from 30 Gbps to 90+ Gbps when running 16
netperf threads with 32KB UDP message size when IOMMU is in the
strict mode(tx_copybreak = 2000 and mtu = 1500).
Suggested-by: default avatarBarry Song <song.bao.hua@hisilicon.com>
Signed-off-by: default avatarYunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: default avatarGuangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8677d78c
...@@ -392,6 +392,56 @@ static void hns3_dbg_fill_content(char *content, u16 len, ...@@ -392,6 +392,56 @@ static void hns3_dbg_fill_content(char *content, u16 len,
*pos++ = '\0'; *pos++ = '\0';
} }
static const struct hns3_dbg_item tx_spare_info_items[] = {
{ "QUEUE_ID", 2 },
{ "COPYBREAK", 2 },
{ "LEN", 7 },
{ "NTU", 4 },
{ "NTC", 4 },
{ "LTC", 4 },
{ "DMA", 17 },
};
static void hns3_dbg_tx_spare_info(struct hns3_enet_ring *ring, char *buf,
int len, u32 ring_num, int *pos)
{
char data_str[ARRAY_SIZE(tx_spare_info_items)][HNS3_DBG_DATA_STR_LEN];
struct hns3_tx_spare *tx_spare = ring->tx_spare;
char *result[ARRAY_SIZE(tx_spare_info_items)];
char content[HNS3_DBG_INFO_LEN];
u32 i, j;
if (!tx_spare) {
*pos += scnprintf(buf + *pos, len - *pos,
"tx spare buffer is not enabled\n");
return;
}
for (i = 0; i < ARRAY_SIZE(tx_spare_info_items); i++)
result[i] = &data_str[i][0];
*pos += scnprintf(buf + *pos, len - *pos, "tx spare buffer info\n");
hns3_dbg_fill_content(content, sizeof(content), tx_spare_info_items,
NULL, ARRAY_SIZE(tx_spare_info_items));
*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
for (i = 0; i < ring_num; i++) {
j = 0;
sprintf(result[j++], "%8u", i);
sprintf(result[j++], "%9u", ring->tx_copybreak);
sprintf(result[j++], "%3u", tx_spare->len);
sprintf(result[j++], "%3u", tx_spare->next_to_use);
sprintf(result[j++], "%3u", tx_spare->next_to_clean);
sprintf(result[j++], "%3u", tx_spare->last_to_clean);
sprintf(result[j++], "%pad", &tx_spare->dma);
hns3_dbg_fill_content(content, sizeof(content),
tx_spare_info_items,
(const char **)result,
ARRAY_SIZE(tx_spare_info_items));
*pos += scnprintf(buf + *pos, len - *pos, "%s", content);
}
}
static const struct hns3_dbg_item rx_queue_info_items[] = { static const struct hns3_dbg_item rx_queue_info_items[] = {
{ "QUEUE_ID", 2 }, { "QUEUE_ID", 2 },
{ "BD_NUM", 2 }, { "BD_NUM", 2 },
...@@ -593,6 +643,8 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h, ...@@ -593,6 +643,8 @@ static int hns3_dbg_tx_queue_info(struct hnae3_handle *h,
pos += scnprintf(buf + pos, len - pos, "%s", content); pos += scnprintf(buf + pos, len - pos, "%s", content);
} }
hns3_dbg_tx_spare_info(ring, buf, len, h->kinfo.num_tqps, &pos);
return 0; return 0;
} }
......
...@@ -304,6 +304,8 @@ enum hns3_desc_type { ...@@ -304,6 +304,8 @@ enum hns3_desc_type {
DESC_TYPE_SKB = 1 << 0, DESC_TYPE_SKB = 1 << 0,
DESC_TYPE_FRAGLIST_SKB = 1 << 1, DESC_TYPE_FRAGLIST_SKB = 1 << 1,
DESC_TYPE_PAGE = 1 << 2, DESC_TYPE_PAGE = 1 << 2,
DESC_TYPE_BOUNCE_ALL = 1 << 3,
DESC_TYPE_BOUNCE_HEAD = 1 << 4,
}; };
struct hns3_desc_cb { struct hns3_desc_cb {
...@@ -405,6 +407,9 @@ struct ring_stats { ...@@ -405,6 +407,9 @@ struct ring_stats {
u64 tx_tso_err; u64 tx_tso_err;
u64 over_max_recursion; u64 over_max_recursion;
u64 hw_limitation; u64 hw_limitation;
u64 tx_bounce;
u64 tx_spare_full;
u64 copy_bits_err;
}; };
struct { struct {
u64 rx_pkts; u64 rx_pkts;
...@@ -423,6 +428,15 @@ struct ring_stats { ...@@ -423,6 +428,15 @@ struct ring_stats {
}; };
}; };
struct hns3_tx_spare {
dma_addr_t dma;
void *buf;
u32 next_to_use;
u32 next_to_clean;
u32 last_to_clean;
u32 len;
};
struct hns3_enet_ring { struct hns3_enet_ring {
struct hns3_desc *desc; /* dma map address space */ struct hns3_desc *desc; /* dma map address space */
struct hns3_desc_cb *desc_cb; struct hns3_desc_cb *desc_cb;
...@@ -445,18 +459,28 @@ struct hns3_enet_ring { ...@@ -445,18 +459,28 @@ struct hns3_enet_ring {
* next_to_use * next_to_use
*/ */
int next_to_clean; int next_to_clean;
union {
int last_to_use; /* last idx used by xmit */
u32 pull_len; /* memcpy len for current rx packet */
};
u32 frag_num;
void *va; /* first buffer address for current packet */
u32 flag; /* ring attribute */ u32 flag; /* ring attribute */
int pending_buf; int pending_buf;
struct sk_buff *skb; union {
struct sk_buff *tail_skb; /* for Tx ring */
struct {
u32 fd_qb_tx_sample;
int last_to_use; /* last idx used by xmit */
u32 tx_copybreak;
struct hns3_tx_spare *tx_spare;
};
/* for Rx ring */
struct {
u32 pull_len; /* memcpy len for current rx packet */
u32 frag_num;
/* first buffer address for current packet */
unsigned char *va;
struct sk_buff *skb;
struct sk_buff *tail_skb;
};
};
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
enum hns3_flow_level_range { enum hns3_flow_level_range {
...@@ -540,6 +564,7 @@ struct hns3_nic_priv { ...@@ -540,6 +564,7 @@ struct hns3_nic_priv {
struct hns3_enet_coalesce tx_coal; struct hns3_enet_coalesce tx_coal;
struct hns3_enet_coalesce rx_coal; struct hns3_enet_coalesce rx_coal;
u32 tx_copybreak;
}; };
union l3_hdr_info { union l3_hdr_info {
......
...@@ -46,6 +46,9 @@ static const struct hns3_stats hns3_txq_stats[] = { ...@@ -46,6 +46,9 @@ static const struct hns3_stats hns3_txq_stats[] = {
HNS3_TQP_STAT("tso_err", tx_tso_err), HNS3_TQP_STAT("tso_err", tx_tso_err),
HNS3_TQP_STAT("over_max_recursion", over_max_recursion), HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
HNS3_TQP_STAT("hw_limitation", hw_limitation), HNS3_TQP_STAT("hw_limitation", hw_limitation),
HNS3_TQP_STAT("bounce", tx_bounce),
HNS3_TQP_STAT("spare_full", tx_spare_full),
HNS3_TQP_STAT("copy_bits_err", copy_bits_err),
}; };
#define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats) #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
...@@ -1592,6 +1595,50 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags) ...@@ -1592,6 +1595,50 @@ static int hns3_set_priv_flags(struct net_device *netdev, u32 pflags)
return 0; return 0;
} }
static int hns3_get_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
void *data)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
int ret = 0;
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
/* all the tx rings have the same tx_copybreak */
*(u32 *)data = priv->tx_copybreak;
break;
default:
ret = -EOPNOTSUPP;
break;
}
return ret;
}
static int hns3_set_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
const void *data)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hnae3_handle *h = priv->ae_handle;
int i, ret = 0;
switch (tuna->id) {
case ETHTOOL_TX_COPYBREAK:
priv->tx_copybreak = *(u32 *)data;
for (i = 0; i < h->kinfo.num_tqps; i++)
priv->ring[i].tx_copybreak = priv->tx_copybreak;
break;
default:
ret = -EOPNOTSUPP;
break;
}
return ret;
}
#define HNS3_ETHTOOL_COALESCE (ETHTOOL_COALESCE_USECS | \ #define HNS3_ETHTOOL_COALESCE (ETHTOOL_COALESCE_USECS | \
ETHTOOL_COALESCE_USE_ADAPTIVE | \ ETHTOOL_COALESCE_USE_ADAPTIVE | \
ETHTOOL_COALESCE_RX_USECS_HIGH | \ ETHTOOL_COALESCE_RX_USECS_HIGH | \
...@@ -1635,6 +1682,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = { ...@@ -1635,6 +1682,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
.set_msglevel = hns3_set_msglevel, .set_msglevel = hns3_set_msglevel,
.get_priv_flags = hns3_get_priv_flags, .get_priv_flags = hns3_get_priv_flags,
.set_priv_flags = hns3_set_priv_flags, .set_priv_flags = hns3_set_priv_flags,
.get_tunable = hns3_get_tunable,
.set_tunable = hns3_set_tunable,
}; };
static const struct ethtool_ops hns3_ethtool_ops = { static const struct ethtool_ops hns3_ethtool_ops = {
...@@ -1674,6 +1723,8 @@ static const struct ethtool_ops hns3_ethtool_ops = { ...@@ -1674,6 +1723,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
.get_priv_flags = hns3_get_priv_flags, .get_priv_flags = hns3_get_priv_flags,
.set_priv_flags = hns3_set_priv_flags, .set_priv_flags = hns3_set_priv_flags,
.get_ts_info = hns3_get_ts_info, .get_ts_info = hns3_get_ts_info,
.get_tunable = hns3_get_tunable,
.set_tunable = hns3_set_tunable,
}; };
void hns3_ethtool_set_ops(struct net_device *netdev) void hns3_ethtool_set_ops(struct net_device *netdev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment