Commit 689adf0d authored by Boris Pismenny's avatar Boris Pismenny Committed by Saeed Mahameed

net/mlx5e: Add UDP GSO support

This patch enables UDP GSO support. We enable this by using two WQEs
the first is a UDP LSO WQE for all segments with equal length, and the
second is for the last segment in case it has different length.
Due to HW limitation, before sending, we must adjust the packet length fields.

We measure performance between two Intel(R) Xeon(R) CPU E5-2643 v2 @3.50GHz
machines connected back-to-back with Connectx4-Lx (40Gbps) NICs.
We compare single stream UDP, UDP GSO and UDP GSO with offload.
Performance:
		| MSS (bytes)	| Throughput (Gbps)	| CPU utilization (%)
UDP GSO offload	| 1472		| 35.6			| 8%
UDP GSO 	| 1472		| 25.5			| 17%
UDP 		| 1472		| 10.2			| 17%
UDP GSO offload	| 1024		| 35.6			| 8%
UDP GSO		| 1024		| 19.2			| 17%
UDP 		| 1024		| 5.7			| 17%
UDP GSO offload	| 512		| 33.8			| 16%
UDP GSO		| 512		| 10.4			| 17%
UDP 		| 512		| 3.5			| 17%
Signed-off-by: default avatarBoris Pismenny <borisp@mellanox.com>
Signed-off-by: default avatarYossi Kuperman <yossiku@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 7861552c
...@@ -14,8 +14,8 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ ...@@ -14,8 +14,8 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
fpga/ipsec.o fpga/tls.o fpga/ipsec.o fpga/tls.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en_stats.o vxlan.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en_accel/rxtx.o en_stats.o \
en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o vxlan.o en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
......
...@@ -34,12 +34,11 @@ ...@@ -34,12 +34,11 @@
#ifndef __MLX5E_EN_ACCEL_H__ #ifndef __MLX5E_EN_ACCEL_H__
#define __MLX5E_EN_ACCEL_H__ #define __MLX5E_EN_ACCEL_H__
#ifdef CONFIG_MLX5_ACCEL
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include "en_accel/ipsec_rxtx.h" #include "en_accel/ipsec_rxtx.h"
#include "en_accel/tls_rxtx.h" #include "en_accel/tls_rxtx.h"
#include "en_accel/rxtx.h"
#include "en.h" #include "en.h"
static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb, static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
...@@ -64,9 +63,13 @@ static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb, ...@@ -64,9 +63,13 @@ static inline struct sk_buff *mlx5e_accel_handle_tx(struct sk_buff *skb,
} }
#endif #endif
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
skb = mlx5e_udp_gso_handle_tx_skb(dev, sq, skb, wqe, pi);
if (unlikely(!skb))
return NULL;
}
return skb; return skb;
} }
#endif /* CONFIG_MLX5_ACCEL */
#endif /* __MLX5E_EN_ACCEL_H__ */ #endif /* __MLX5E_EN_ACCEL_H__ */
#include "en_accel/rxtx.h"
static void mlx5e_udp_gso_prepare_last_skb(struct sk_buff *skb,
struct sk_buff *nskb,
int remaining)
{
int bytes_needed = remaining, remaining_headlen, remaining_page_offset;
int headlen = skb_transport_offset(skb) + sizeof(struct udphdr);
int payload_len = remaining + sizeof(struct udphdr);
int k = 0, i, j;
skb_copy_bits(skb, 0, nskb->data, headlen);
nskb->dev = skb->dev;
skb_reset_mac_header(nskb);
skb_set_network_header(nskb, skb_network_offset(skb));
skb_set_transport_header(nskb, skb_transport_offset(skb));
skb_set_tail_pointer(nskb, headlen);
/* How many frags do we need? */
for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
bytes_needed -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
k++;
if (bytes_needed <= 0)
break;
}
/* Fill the first frag and split it if necessary */
j = skb_shinfo(skb)->nr_frags - k;
remaining_page_offset = -bytes_needed;
skb_fill_page_desc(nskb, 0,
skb_shinfo(skb)->frags[j].page.p,
skb_shinfo(skb)->frags[j].page_offset + remaining_page_offset,
skb_shinfo(skb)->frags[j].size - remaining_page_offset);
skb_frag_ref(skb, j);
/* Fill the rest of the frags */
for (i = 1; i < k; i++) {
j = skb_shinfo(skb)->nr_frags - k + i;
skb_fill_page_desc(nskb, i,
skb_shinfo(skb)->frags[j].page.p,
skb_shinfo(skb)->frags[j].page_offset,
skb_shinfo(skb)->frags[j].size);
skb_frag_ref(skb, j);
}
skb_shinfo(nskb)->nr_frags = k;
remaining_headlen = remaining - skb->data_len;
/* headlen contains remaining data? */
if (remaining_headlen > 0)
skb_copy_bits(skb, skb->len - remaining, nskb->data + headlen,
remaining_headlen);
nskb->len = remaining + headlen;
nskb->data_len = payload_len - sizeof(struct udphdr) +
max_t(int, 0, remaining_headlen);
nskb->protocol = skb->protocol;
if (nskb->protocol == htons(ETH_P_IP)) {
ip_hdr(nskb)->id = htons(ntohs(ip_hdr(nskb)->id) +
skb_shinfo(skb)->gso_segs);
ip_hdr(nskb)->tot_len =
htons(payload_len + sizeof(struct iphdr));
} else {
ipv6_hdr(nskb)->payload_len = htons(payload_len);
}
udp_hdr(nskb)->len = htons(payload_len);
skb_shinfo(nskb)->gso_size = 0;
nskb->ip_summed = skb->ip_summed;
nskb->csum_start = skb->csum_start;
nskb->csum_offset = skb->csum_offset;
nskb->queue_mapping = skb->queue_mapping;
}
/* might send skbs and update wqe and pi */
struct sk_buff *mlx5e_udp_gso_handle_tx_skb(struct net_device *netdev,
struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5e_tx_wqe **wqe,
u16 *pi)
{
int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
int headlen = skb_transport_offset(skb) + sizeof(struct udphdr);
int remaining = (skb->len - headlen) % skb_shinfo(skb)->gso_size;
struct sk_buff *nskb;
if (skb->protocol == htons(ETH_P_IP))
ip_hdr(skb)->tot_len = htons(payload_len + sizeof(struct iphdr));
else
ipv6_hdr(skb)->payload_len = htons(payload_len);
udp_hdr(skb)->len = htons(payload_len);
if (!remaining)
return skb;
nskb = alloc_skb(max_t(int, headlen, headlen + remaining - skb->data_len), GFP_ATOMIC);
if (unlikely(!nskb)) {
sq->stats->dropped++;
return NULL;
}
mlx5e_udp_gso_prepare_last_skb(skb, nskb, remaining);
skb_shinfo(skb)->gso_segs--;
pskb_trim(skb, skb->len - remaining);
mlx5e_sq_xmit(sq, skb, *wqe, *pi);
mlx5e_sq_fetch_wqe(sq, wqe, pi);
return nskb;
}
#ifndef __MLX5E_EN_ACCEL_RX_TX_H__
#define __MLX5E_EN_ACCEL_RX_TX_H__
#include <linux/skbuff.h>
#include "en.h"
struct sk_buff *mlx5e_udp_gso_handle_tx_skb(struct net_device *netdev,
struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5e_tx_wqe **wqe,
u16 *pi);
#endif
...@@ -4592,6 +4592,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) ...@@ -4592,6 +4592,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HIGHDMA;
netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
netdev->features |= NETIF_F_GSO_UDP_L4;
netdev->hw_features |= NETIF_F_GSO_UDP_L4;
netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_UNICAST_FLT;
mlx5e_set_netdev_dev_addr(netdev); mlx5e_set_netdev_dev_addr(netdev);
......
...@@ -228,6 +228,9 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb) ...@@ -228,6 +228,9 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
stats->tso_inner_packets++; stats->tso_inner_packets++;
stats->tso_inner_bytes += skb->len - ihs; stats->tso_inner_bytes += skb->len - ihs;
} else { } else {
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
else
ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
stats->tso_packets++; stats->tso_packets++;
stats->tso_bytes += skb->len - ihs; stats->tso_bytes += skb->len - ihs;
...@@ -443,12 +446,11 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -443,12 +446,11 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
sq = priv->txq2sq[skb_get_queue_mapping(skb)]; sq = priv->txq2sq[skb_get_queue_mapping(skb)];
mlx5e_sq_fetch_wqe(sq, &wqe, &pi); mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
#ifdef CONFIG_MLX5_ACCEL
/* might send skbs and update wqe and pi */ /* might send skbs and update wqe and pi */
skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
if (unlikely(!skb)) if (unlikely(!skb))
return NETDEV_TX_OK; return NETDEV_TX_OK;
#endif
return mlx5e_sq_xmit(sq, skb, wqe, pi); return mlx5e_sq_xmit(sq, skb, wqe, pi);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment