Commit e0155935 authored by Aleksander Jan Bajkowski's avatar Aleksander Jan Bajkowski Committed by Jakub Kicinski

net: lantiq_xrx200: convert to build_skb

We can increase the efficiency of rx path by using buffers to receive
packets then build SKBs around them just before passing into the network
stack. In contrast, preallocating SKBs too early reduces CPU cache
efficiency.

NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	577 Mbps	648 Mbps
After	624 Mbps	695 Mbps
Signed-off-by: default avatarAleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 768818d7
...@@ -63,7 +63,11 @@ struct xrx200_chan { ...@@ -63,7 +63,11 @@ struct xrx200_chan {
struct napi_struct napi; struct napi_struct napi;
struct ltq_dma_channel dma; struct ltq_dma_channel dma;
struct sk_buff *skb[LTQ_DESC_NUM];
union {
struct sk_buff *skb[LTQ_DESC_NUM];
void *rx_buff[LTQ_DESC_NUM];
};
struct sk_buff *skb_head; struct sk_buff *skb_head;
struct sk_buff *skb_tail; struct sk_buff *skb_tail;
...@@ -78,6 +82,7 @@ struct xrx200_priv { ...@@ -78,6 +82,7 @@ struct xrx200_priv {
struct xrx200_chan chan_rx; struct xrx200_chan chan_rx;
u16 rx_buf_size; u16 rx_buf_size;
u16 rx_skb_size;
struct net_device *net_dev; struct net_device *net_dev;
struct device *dev; struct device *dev;
...@@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu) ...@@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu)
return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN); return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN);
} }
static int xrx200_skb_size(u16 buf_size)
{
return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}
/* drop all the packets from the DMA ring */ /* drop all the packets from the DMA ring */
static void xrx200_flush_dma(struct xrx200_chan *ch) static void xrx200_flush_dma(struct xrx200_chan *ch)
{ {
...@@ -173,30 +184,29 @@ static int xrx200_close(struct net_device *net_dev) ...@@ -173,30 +184,29 @@ static int xrx200_close(struct net_device *net_dev)
return 0; return 0;
} }
static int xrx200_alloc_skb(struct xrx200_chan *ch) static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size))
{ {
struct sk_buff *skb = ch->skb[ch->dma.desc]; void *buf = ch->rx_buff[ch->dma.desc];
struct xrx200_priv *priv = ch->priv; struct xrx200_priv *priv = ch->priv;
dma_addr_t mapping; dma_addr_t mapping;
int ret = 0; int ret = 0;
ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev, ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
priv->rx_buf_size); if (!ch->rx_buff[ch->dma.desc]) {
if (!ch->skb[ch->dma.desc]) {
ret = -ENOMEM; ret = -ENOMEM;
goto skip; goto skip;
} }
mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data, mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc],
priv->rx_buf_size, DMA_FROM_DEVICE); priv->rx_buf_size, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(priv->dev, mapping))) { if (unlikely(dma_mapping_error(priv->dev, mapping))) {
dev_kfree_skb_any(ch->skb[ch->dma.desc]); skb_free_frag(ch->rx_buff[ch->dma.desc]);
ch->skb[ch->dma.desc] = skb; ch->rx_buff[ch->dma.desc] = buf;
ret = -ENOMEM; ret = -ENOMEM;
goto skip; goto skip;
} }
ch->dma.desc_base[ch->dma.desc].addr = mapping; ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN;
/* Make sure the address is written before we give it to HW */ /* Make sure the address is written before we give it to HW */
wmb(); wmb();
skip: skip:
...@@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) ...@@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
{ {
struct xrx200_priv *priv = ch->priv; struct xrx200_priv *priv = ch->priv;
struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc]; struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
struct sk_buff *skb = ch->skb[ch->dma.desc]; void *buf = ch->rx_buff[ch->dma.desc];
u32 ctl = desc->ctl; u32 ctl = desc->ctl;
int len = (ctl & LTQ_DMA_SIZE_MASK); int len = (ctl & LTQ_DMA_SIZE_MASK);
struct net_device *net_dev = priv->net_dev; struct net_device *net_dev = priv->net_dev;
struct sk_buff *skb;
int ret; int ret;
ret = xrx200_alloc_skb(ch); ret = xrx200_alloc_buf(ch, napi_alloc_frag);
ch->dma.desc++; ch->dma.desc++;
ch->dma.desc %= LTQ_DESC_NUM; ch->dma.desc %= LTQ_DESC_NUM;
...@@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) ...@@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
return ret; return ret;
} }
skb = build_skb(buf, priv->rx_skb_size);
skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len); skb_put(skb, len);
/* add buffers to skb via skb->frag_list */ /* add buffers to skb via skb->frag_list */
if (ctl & LTQ_DMA_SOP) { if (ctl & LTQ_DMA_SOP) {
ch->skb_head = skb; ch->skb_head = skb;
ch->skb_tail = skb; ch->skb_tail = skb;
skb_reserve(skb, NET_IP_ALIGN);
} else if (ch->skb_head) { } else if (ch->skb_head) {
if (ch->skb_head == ch->skb_tail) if (ch->skb_head == ch->skb_tail)
skb_shinfo(ch->skb_tail)->frag_list = skb; skb_shinfo(ch->skb_tail)->frag_list = skb;
else else
ch->skb_tail->next = skb; ch->skb_tail->next = skb;
ch->skb_tail = skb; ch->skb_tail = skb;
skb_reserve(ch->skb_tail, -NET_IP_ALIGN);
ch->skb_head->len += skb->len; ch->skb_head->len += skb->len;
ch->skb_head->data_len += skb->len; ch->skb_head->data_len += skb->len;
ch->skb_head->truesize += skb->truesize; ch->skb_head->truesize += skb->truesize;
...@@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) ...@@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu)
struct xrx200_chan *ch_rx = &priv->chan_rx; struct xrx200_chan *ch_rx = &priv->chan_rx;
int old_mtu = net_dev->mtu; int old_mtu = net_dev->mtu;
bool running = false; bool running = false;
struct sk_buff *skb; void *buff;
int curr_desc; int curr_desc;
int ret = 0; int ret = 0;
net_dev->mtu = new_mtu; net_dev->mtu = new_mtu;
priv->rx_buf_size = xrx200_buffer_size(new_mtu); priv->rx_buf_size = xrx200_buffer_size(new_mtu);
priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
if (new_mtu <= old_mtu) if (new_mtu <= old_mtu)
return ret; return ret;
...@@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) ...@@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu)
for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
ch_rx->dma.desc++) { ch_rx->dma.desc++) {
skb = ch_rx->skb[ch_rx->dma.desc]; buff = ch_rx->rx_buff[ch_rx->dma.desc];
ret = xrx200_alloc_skb(ch_rx); ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
if (ret) { if (ret) {
net_dev->mtu = old_mtu; net_dev->mtu = old_mtu;
priv->rx_buf_size = xrx200_buffer_size(old_mtu); priv->rx_buf_size = xrx200_buffer_size(old_mtu);
priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
break; break;
} }
dev_kfree_skb_any(skb); skb_free_frag(buff);
} }
ch_rx->dma.desc = curr_desc; ch_rx->dma.desc = curr_desc;
...@@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv) ...@@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv)
ltq_dma_alloc_rx(&ch_rx->dma); ltq_dma_alloc_rx(&ch_rx->dma);
for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
ch_rx->dma.desc++) { ch_rx->dma.desc++) {
ret = xrx200_alloc_skb(ch_rx); ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
if (ret) if (ret)
goto rx_free; goto rx_free;
} }
...@@ -511,7 +526,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv) ...@@ -511,7 +526,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv)
/* free the allocated RX ring */ /* free the allocated RX ring */
for (i = 0; i < LTQ_DESC_NUM; i++) { for (i = 0; i < LTQ_DESC_NUM; i++) {
if (priv->chan_rx.skb[i]) if (priv->chan_rx.skb[i])
dev_kfree_skb_any(priv->chan_rx.skb[i]); skb_free_frag(priv->chan_rx.rx_buff[i]);
} }
rx_free: rx_free:
...@@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx200_priv *priv) ...@@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx200_priv *priv)
/* free the allocated RX ring */ /* free the allocated RX ring */
for (i = 0; i < LTQ_DESC_NUM; i++) for (i = 0; i < LTQ_DESC_NUM; i++)
dev_kfree_skb_any(priv->chan_rx.skb[i]); skb_free_frag(priv->chan_rx.rx_buff[i]);
} }
static int xrx200_probe(struct platform_device *pdev) static int xrx200_probe(struct platform_device *pdev)
...@@ -553,6 +568,7 @@ static int xrx200_probe(struct platform_device *pdev) ...@@ -553,6 +568,7 @@ static int xrx200_probe(struct platform_device *pdev)
net_dev->min_mtu = ETH_ZLEN; net_dev->min_mtu = ETH_ZLEN;
net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0); net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0);
priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN); priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN);
priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
/* load the memory ranges */ /* load the memory ranges */
priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment