Commit 5eddb249 authored by Coco Li's avatar Coco Li Committed by David S. Miller

gro: add support of (hw)gro packets to gro stack

Current GRO stack only supports incoming packets containing
one frame/MSS.

This patch changes GRO to accept packets that are already GRO.

HW-GRO (aka RSC for some vendors) is very often limited in presence
of interleaved packets. Linux SW GRO stack can complete the job
and provide larger GRO packets, thus reducing rate of ACK packets
and cpu overhead.

This also means BIG TCP can still be used, even if HW-GRO/RSC was
able to cook ~64 KB GRO packets.

v2: fix logic in tcp_gro_receive()

    Only support TCP for the moment (Paolo)
Co-Developed-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarCoco Li <lixiaoyan@google.com>
Acked-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 197060c1
...@@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) ...@@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int gro_max_size; unsigned int gro_max_size;
unsigned int new_truesize; unsigned int new_truesize;
struct sk_buff *lp; struct sk_buff *lp;
int segs;
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size); gro_max_size = READ_ONCE(p->dev->gro_max_size);
...@@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) ...@@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
return -E2BIG; return -E2BIG;
} }
segs = NAPI_GRO_CB(skb)->count;
lp = NAPI_GRO_CB(p)->last; lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp); pinfo = skb_shinfo(lp);
...@@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) ...@@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
lp = p; lp = p;
done: done:
NAPI_GRO_CB(p)->count++; NAPI_GRO_CB(p)->count += segs;
p->data_len += len; p->data_len += len;
p->truesize += delta_truesize; p->truesize += delta_truesize;
p->len += len; p->len += len;
...@@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
sizeof(u32))); /* Avoid slow unaligned acc */ sizeof(u32))); /* Avoid slow unaligned acc */
*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->is_atomic = 1;
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
/* Only support TCP at the moment. */
if (!skb_is_gso_tcp(skb))
NAPI_GRO_CB(skb)->flush = 1;
}
/* Setup for GRO checksum validation */ /* Setup for GRO checksum validation */
switch (skb->ip_summed) { switch (skb->ip_summed) {
...@@ -545,9 +554,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff ...@@ -545,9 +554,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
else else
gro_list->count++; gro_list->count++;
NAPI_GRO_CB(skb)->count = 1;
NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->age = jiffies;
NAPI_GRO_CB(skb)->last = skb; NAPI_GRO_CB(skb)->last = skb;
if (!skb_is_gso(skb))
skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb_shinfo(skb)->gso_size = skb_gro_len(skb);
list_add(&skb->list, &gro_list->list); list_add(&skb->list, &gro_list->list);
ret = GRO_HELD; ret = GRO_HELD;
...@@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) ...@@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0; skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->gso_size = 0;
if (unlikely(skb->slow_gro)) { if (unlikely(skb->slow_gro)) {
skb_orphan(skb); skb_orphan(skb);
skb_ext_reset(skb); skb_ext_reset(skb);
......
...@@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) ...@@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
mss = skb_shinfo(p)->gso_size; mss = skb_shinfo(p)->gso_size;
/* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
* If it is a single frame, do not aggregate it if its length
* is bigger than our mss.
*/
if (unlikely(skb_is_gso(skb)))
flush |= (mss != skb_shinfo(skb)->gso_size);
else
flush |= (len - 1) >= mss; flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
flush |= p->decrypted ^ skb->decrypted; flush |= p->decrypted ^ skb->decrypted;
...@@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) ...@@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final: out_check_final:
/* Force a flush if last segment is smaller than mss. */
if (unlikely(skb_is_gso(skb)))
flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
else
flush = len < mss; flush = len < mss;
flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_SYN |
TCP_FLAG_FIN)); TCP_FLAG_FIN));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment