Commit dde0a648 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net_sched: sch_fq: avoid touching f->next from fq_gc()

A significant amount of cpu cycles is spent in fq_gc()

When fq_gc() does its lookup in the rb-tree, it needs the
following fields from struct fq_flow :

f->sk       (lookup key in the rb-tree)
f->fq_node  (anchor in the rb-tree)
f->next     (used to determine if the flow is detached)
f->age      (used to determine if the flow is candidate for gc)

This unfortunately spans two cache lines (assuming 64 bytes cache lines)

We can avoid using f->next, if we use the low order bit of f->{age|tail}

This low order bit is 0, if f->tail points to an sk_buff.
We set the low order bit to 1, if the union contains a jiffies value.

Combined with the following patch, this makes sure we only need
to bring into cpu caches one cache line per flow.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ee1bd483
...@@ -70,14 +70,14 @@ struct fq_flow { ...@@ -70,14 +70,14 @@ struct fq_flow {
struct sk_buff *head; /* list of skbs for this flow : first skb */ struct sk_buff *head; /* list of skbs for this flow : first skb */
union { union {
struct sk_buff *tail; /* last skb in the list */ struct sk_buff *tail; /* last skb in the list */
unsigned long age; /* jiffies when flow was emptied, for gc */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */
}; };
struct rb_node fq_node; /* anchor in fq_root[] trees */ struct rb_node fq_node; /* anchor in fq_root[] trees */
struct sock *sk; struct sock *sk;
int qlen; /* number of packets in flow queue */ int qlen; /* number of packets in flow queue */
int credit; int credit;
u32 socket_hash; /* sk_hash */ u32 socket_hash; /* sk_hash */
struct fq_flow *next; /* next pointer in RR lists, or &detached */ struct fq_flow *next; /* next pointer in RR lists */
struct rb_node rate_node; /* anchor in q->delayed tree */ struct rb_node rate_node; /* anchor in q->delayed tree */
u64 time_next_packet; u64 time_next_packet;
...@@ -126,20 +126,25 @@ struct fq_sched_data { ...@@ -126,20 +126,25 @@ struct fq_sched_data {
struct qdisc_watchdog watchdog; struct qdisc_watchdog watchdog;
}; };
/* special value to mark a detached flow (not on old/new list) */ /*
static struct fq_flow detached, throttled; * f->tail and f->age share the same location.
* We can use the low order bit to differentiate if this location points
* to a sk_buff or contains a jiffies value, if we force this value to be odd.
* This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
*/
static void fq_flow_set_detached(struct fq_flow *f) static void fq_flow_set_detached(struct fq_flow *f)
{ {
f->next = &detached; f->age = jiffies | 1UL;
f->age = jiffies;
} }
static bool fq_flow_is_detached(const struct fq_flow *f) static bool fq_flow_is_detached(const struct fq_flow *f)
{ {
return f->next == &detached; return !!(f->age & 1UL);
} }
/* special value to mark a throttled flow (not on old/new list) */
static struct fq_flow throttled;
static bool fq_flow_is_throttled(const struct fq_flow *f) static bool fq_flow_is_throttled(const struct fq_flow *f)
{ {
return f->next == &throttled; return f->next == &throttled;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment