Commit b757c933 authored by Jiri Pirko's avatar Jiri Pirko Committed by David S. Miller

tbf: improved accuracy at high rates

Current TBF uses rate table computed by the "tc" userspace program,
which has the following issue:

The rate table has 256 entries to map packet lengths to
token (time units).  With TSO sized packets, the 256 entry granularity
leads to loss/gain of rate, making the token bucket inaccurate.

Thus, instead of relying on rate table, this patch explicitly computes
the time and accounts for packet transmission times with nanosecond
granularity.

This is a followup to 56b765b7
("htb: improved accuracy at high rates").
Signed-off-by: default avatarJiri Pirko <jiri@resnulli.us>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 34c5d292
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <net/netlink.h> #include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
...@@ -100,23 +101,21 @@ ...@@ -100,23 +101,21 @@
struct tbf_sched_data { struct tbf_sched_data {
/* Parameters */ /* Parameters */
u32 limit; /* Maximal length of backlog: bytes */ u32 limit; /* Maximal length of backlog: bytes */
u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
u32 mtu; s64 mtu;
u32 max_size; u32 max_size;
struct qdisc_rate_table *R_tab; struct psched_ratecfg rate;
struct qdisc_rate_table *P_tab; struct psched_ratecfg peak;
bool peak_present;
/* Variables */ /* Variables */
long tokens; /* Current number of B tokens */ s64 tokens; /* Current number of B tokens */
long ptokens; /* Current number of P tokens */ s64 ptokens; /* Current number of P tokens */
psched_time_t t_c; /* Time check-point */ s64 t_c; /* Time check-point */
struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
struct qdisc_watchdog watchdog; /* Watchdog timer */ struct qdisc_watchdog watchdog; /* Watchdog timer */
}; };
#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{ {
struct tbf_sched_data *q = qdisc_priv(sch); struct tbf_sched_data *q = qdisc_priv(sch);
...@@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) ...@@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
skb = q->qdisc->ops->peek(q->qdisc); skb = q->qdisc->ops->peek(q->qdisc);
if (skb) { if (skb) {
psched_time_t now; s64 now;
long toks; s64 toks;
long ptoks = 0; s64 ptoks = 0;
unsigned int len = qdisc_pkt_len(skb); unsigned int len = qdisc_pkt_len(skb);
now = psched_get_time(); now = ktime_to_ns(ktime_get());
toks = psched_tdiff_bounded(now, q->t_c, q->buffer); toks = min_t(s64, now - q->t_c, q->buffer);
if (q->P_tab) { if (q->peak_present) {
ptoks = toks + q->ptokens; ptoks = toks + q->ptokens;
if (ptoks > (long)q->mtu) if (ptoks > q->mtu)
ptoks = q->mtu; ptoks = q->mtu;
ptoks -= L2T_P(q, len); ptoks -= (s64) psched_l2t_ns(&q->peak, len);
} }
toks += q->tokens; toks += q->tokens;
if (toks > (long)q->buffer) if (toks > q->buffer)
toks = q->buffer; toks = q->buffer;
toks -= L2T(q, len); toks -= (s64) psched_l2t_ns(&q->rate, len);
if ((toks|ptoks) >= 0) { if ((toks|ptoks) >= 0) {
skb = qdisc_dequeue_peeked(q->qdisc); skb = qdisc_dequeue_peeked(q->qdisc);
...@@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) ...@@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
return skb; return skb;
} }
qdisc_watchdog_schedule(&q->watchdog, qdisc_watchdog_schedule_ns(&q->watchdog,
now + max_t(long, -toks, -ptoks)); now + max_t(long, -toks, -ptoks));
/* Maybe we have a shorter packet in the queue, /* Maybe we have a shorter packet in the queue,
which can be sent now. It sounds cool, which can be sent now. It sounds cool,
...@@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch) ...@@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch)
qdisc_reset(q->qdisc); qdisc_reset(q->qdisc);
sch->q.qlen = 0; sch->q.qlen = 0;
q->t_c = psched_get_time(); q->t_c = ktime_to_ns(ktime_get());
q->tokens = q->buffer; q->tokens = q->buffer;
q->ptokens = q->mtu; q->ptokens = q->mtu;
qdisc_watchdog_cancel(&q->watchdog); qdisc_watchdog_cancel(&q->watchdog);
...@@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) ...@@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
q->qdisc = child; q->qdisc = child;
} }
q->limit = qopt->limit; q->limit = qopt->limit;
q->mtu = qopt->mtu; q->mtu = PSCHED_TICKS2NS(qopt->mtu);
q->max_size = max_size; q->max_size = max_size;
q->buffer = qopt->buffer; q->buffer = PSCHED_TICKS2NS(qopt->buffer);
q->tokens = q->buffer; q->tokens = q->buffer;
q->ptokens = q->mtu; q->ptokens = q->mtu;
swap(q->R_tab, rtab); psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
swap(q->P_tab, ptab); if (ptab) {
psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
q->peak_present = true;
} else {
q->peak_present = false;
}
sch_tree_unlock(sch); sch_tree_unlock(sch);
err = 0; err = 0;
...@@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) ...@@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL) if (opt == NULL)
return -EINVAL; return -EINVAL;
q->t_c = psched_get_time(); q->t_c = ktime_to_ns(ktime_get());
qdisc_watchdog_init(&q->watchdog, sch); qdisc_watchdog_init(&q->watchdog, sch);
q->qdisc = &noop_qdisc; q->qdisc = &noop_qdisc;
...@@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch) ...@@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch); struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog); qdisc_watchdog_cancel(&q->watchdog);
if (q->P_tab)
qdisc_put_rtab(q->P_tab);
if (q->R_tab)
qdisc_put_rtab(q->R_tab);
qdisc_destroy(q->qdisc); qdisc_destroy(q->qdisc);
} }
...@@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) ...@@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
goto nla_put_failure; goto nla_put_failure;
opt.limit = q->limit; opt.limit = q->limit;
opt.rate = q->R_tab->rate; opt.rate.rate = psched_ratecfg_getrate(&q->rate);
if (q->P_tab) if (q->peak_present)
opt.peakrate = q->P_tab->rate; opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
else else
memset(&opt.peakrate, 0, sizeof(opt.peakrate)); memset(&opt.peakrate, 0, sizeof(opt.peakrate));
opt.mtu = q->mtu; opt.mtu = PSCHED_NS2TICKS(q->mtu);
opt.buffer = q->buffer; opt.buffer = PSCHED_NS2TICKS(q->buffer);
if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
goto nla_put_failure; goto nla_put_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment