Commit f7c81167 authored by Peter Oskolkov's avatar Peter Oskolkov Committed by Stefan Bader

ip: add helpers to process in-order fragments faster.

BugLink: https://bugs.launchpad.net/bugs/1818806

commit 353c9cb3 upstream.

This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.

The new logic (fully implemented in the second patch) is as follows:

* Nodes in the rb-tree will now contain not single fragments, but lists
  of consecutive fragments ("runs").

* At each point in time, the current "active" run at the tail is
  maintained/tracked. Fragments that arrive in-order, adjacent
  to the previous tail fragment, are added to this tail run without
  triggering the re-balancing of the rb-tree.

* If a fragment arrives out of order with the offset _before_ the tail run,
  it is inserted into the rb-tree as a single fragment.

* If a fragment arrives after the current tail fragment (with a gap),
  it starts a new "tail" run, as is inserted into the rb-tree
  at the end as the head of the new run.

skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).
Reported-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarPeter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarMao Wenan <maowenan@huawei.com>
Signed-off-by: default avatarBen Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
parent bee79060
......@@ -55,7 +55,9 @@ struct frag_v6_compare_key {
* @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
* @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
* @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
......@@ -76,6 +78,7 @@ struct inet_frag_queue {
struct sk_buff *fragments; /* Used in IPv6. */
struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
......@@ -112,6 +115,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
/* Free all skbs in the queue; return the sum of their truesizes. */
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
......
......@@ -58,6 +58,57 @@
static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
*
* Invariants:
* - next_frag is NULL at the tail of a "run";
* - the head of a "run" has the sum of all fragment lengths in frag_run_len.
*/
struct ipfrag_skb_cb {
struct inet_skb_parm h;
struct sk_buff *next_frag;
int frag_run_len;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
static void ip4_frag_init_run(struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
FRAG_CB(skb)->next_frag = NULL;
FRAG_CB(skb)->frag_run_len = skb->len;
}
/* Append skb to the last "run". */
static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
struct sk_buff *skb)
{
RB_CLEAR_NODE(&skb->rbnode);
FRAG_CB(skb)->next_frag = NULL;
FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
FRAG_CB(q->fragments_tail)->next_frag = skb;
q->fragments_tail = skb;
}
/* Create a new "run" with the skb. */
static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
{
if (q->last_run_head)
rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
&q->last_run_head->rbnode.rb_right);
else
rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
rb_insert_color(&skb->rbnode, &q->rb_fragments);
ip4_frag_init_run(skb);
q->fragments_tail = skb;
q->last_run_head = skb;
}
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
......@@ -658,6 +709,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);
unsigned int inet_frag_rbtree_purge(struct rb_root *root)
{
struct rb_node *p = rb_first(root);
unsigned int sum = 0;
while (p) {
struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
p = rb_next(p);
rb_erase(&skb->rbnode, root);
while (skb) {
struct sk_buff *next = FRAG_CB(skb)->next_frag;
sum += skb->truesize;
kfree_skb(skb);
skb = next;
}
}
return sum;
}
EXPORT_SYMBOL(inet_frag_rbtree_purge);
#ifdef CONFIG_SYSCTL
static int dist_min;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment