Commit 4d202a0d authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net_sched: generalize bulk dequeue

When qdisc bulk dequeue was added in linux-3.18 (commit
5772e9a3 "qdisc: bulk dequeue support for qdiscs
with TCQ_F_ONETXQUEUE"), it was constrained to some
specific qdiscs.

With some extra care, we can extend this to all qdiscs,
so that typical traffic shaping solutions can benefit from
small batches (8 packets in this patch).

For example, HTB is often used on some multi queue device.
And bonding/team are multi queue devices...

Idea is to bulk-dequeue packets mapping to the same transmit queue.

This brings between 35 and 80 % performance increase in HTB setup
under pressure on a bonding setup :

1) NUMA node contention :   610,000 pps -> 1,110,000 pps
2) No node contention   : 1,380,000 pps -> 1,930,000 pps

Now we should work to add batches on the enqueue() side ;)
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 338ed9b4
...@@ -75,13 +75,14 @@ struct Qdisc { ...@@ -75,13 +75,14 @@ struct Qdisc {
/* /*
* For performance sake on SMP, we put highly modified fields at the end * For performance sake on SMP, we put highly modified fields at the end
*/ */
struct Qdisc *next_sched ____cacheline_aligned_in_smp; struct sk_buff *gso_skb ____cacheline_aligned_in_smp;
struct sk_buff *gso_skb;
unsigned long state;
struct sk_buff_head q; struct sk_buff_head q;
struct gnet_stats_basic_packed bstats; struct gnet_stats_basic_packed bstats;
seqcount_t running; seqcount_t running;
struct gnet_stats_queue qstats; struct gnet_stats_queue qstats;
unsigned long state;
struct Qdisc *next_sched;
struct sk_buff *skb_bad_txq;
struct rcu_head rcu_head; struct rcu_head rcu_head;
int padded; int padded;
atomic_t refcnt; atomic_t refcnt;
......
...@@ -77,6 +77,34 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, ...@@ -77,6 +77,34 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
skb->next = NULL; skb->next = NULL;
} }
/* This variant of try_bulk_dequeue_skb() makes sure
* all skbs in the chain are for the same txq
*/
static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
struct sk_buff *skb,
int *packets)
{
int mapping = skb_get_queue_mapping(skb);
struct sk_buff *nskb;
int cnt = 0;
do {
nskb = q->dequeue(q);
if (!nskb)
break;
if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
q->skb_bad_txq = nskb;
qdisc_qstats_backlog_inc(q, nskb);
q->q.qlen++;
break;
}
skb->next = nskb;
skb = nskb;
} while (++cnt < 8);
(*packets) += cnt;
skb->next = NULL;
}
/* Note that dequeue_skb can possibly return a SKB list (via skb->next). /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
* A requeued skb (via q->gso_skb) can also be a SKB list. * A requeued skb (via q->gso_skb) can also be a SKB list.
*/ */
...@@ -87,8 +115,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, ...@@ -87,8 +115,9 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
const struct netdev_queue *txq = q->dev_queue; const struct netdev_queue *txq = q->dev_queue;
*packets = 1; *packets = 1;
*validate = true;
if (unlikely(skb)) { if (unlikely(skb)) {
/* skb in gso_skb were already validated */
*validate = false;
/* check the reason of requeuing without tx lock first */ /* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb); txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) { if (!netif_xmit_frozen_or_stopped(txq)) {
...@@ -97,15 +126,30 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, ...@@ -97,15 +126,30 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
q->q.qlen--; q->q.qlen--;
} else } else
skb = NULL; skb = NULL;
/* skb in gso_skb were already validated */ return skb;
*validate = false; }
} else { *validate = true;
skb = q->skb_bad_txq;
if (unlikely(skb)) {
/* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb);
if (!netif_xmit_frozen_or_stopped(txq)) {
q->skb_bad_txq = NULL;
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
goto bulk;
}
return NULL;
}
if (!(q->flags & TCQ_F_ONETXQUEUE) || if (!(q->flags & TCQ_F_ONETXQUEUE) ||
!netif_xmit_frozen_or_stopped(txq)) { !netif_xmit_frozen_or_stopped(txq))
skb = q->dequeue(q); skb = q->dequeue(q);
if (skb && qdisc_may_bulk(q)) if (skb) {
bulk:
if (qdisc_may_bulk(q))
try_bulk_dequeue_skb(q, skb, txq, packets); try_bulk_dequeue_skb(q, skb, txq, packets);
} else
try_bulk_dequeue_skb_slow(q, skb, packets);
} }
return skb; return skb;
} }
...@@ -624,11 +668,14 @@ void qdisc_reset(struct Qdisc *qdisc) ...@@ -624,11 +668,14 @@ void qdisc_reset(struct Qdisc *qdisc)
if (ops->reset) if (ops->reset)
ops->reset(qdisc); ops->reset(qdisc);
kfree_skb(qdisc->skb_bad_txq);
qdisc->skb_bad_txq = NULL;
if (qdisc->gso_skb) { if (qdisc->gso_skb) {
kfree_skb_list(qdisc->gso_skb); kfree_skb_list(qdisc->gso_skb);
qdisc->gso_skb = NULL; qdisc->gso_skb = NULL;
qdisc->q.qlen = 0;
} }
qdisc->q.qlen = 0;
} }
EXPORT_SYMBOL(qdisc_reset); EXPORT_SYMBOL(qdisc_reset);
...@@ -667,6 +714,7 @@ void qdisc_destroy(struct Qdisc *qdisc) ...@@ -667,6 +714,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
dev_put(qdisc_dev(qdisc)); dev_put(qdisc_dev(qdisc));
kfree_skb_list(qdisc->gso_skb); kfree_skb_list(qdisc->gso_skb);
kfree_skb(qdisc->skb_bad_txq);
/* /*
* gen_estimator est_timer() might access qdisc->q.lock, * gen_estimator est_timer() might access qdisc->q.lock,
* wait a RCU grace period before freeing qdisc. * wait a RCU grace period before freeing qdisc.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment