Commit 39740872 authored by Patrick McHardy's avatar Patrick McHardy Committed by Adrian Bunk

[NET_SCHED]: Fix endless loops caused by inaccurate qlen counters

There are multiple problems related to qlen adjustment that can lead
to an upper qdisc getting out of sync with the real number of packets
queued, leading to endless dequeueing attempts by the upper layer code.

All qdiscs must maintain an accurate q.qlen counter. There are basically
two groups of operations affecting the qlen: operations that propagate
down the tree (enqueue, dequeue, requeue, drop, reset) beginning at the
root qdisc and operations only affecting a subtree or single qdisc
(change, graft, delete class). Since qlen changes during operations from
the second group don't propagate to ancestor qdiscs, their qlen values
become desynchronized.

This patch adds a function to propagate qlen changes up the qdisc tree,
optionally calling a callback function to perform qdisc-internal
maintenance when the child qdisc is deactivated, and converts all
qdiscs to use this where necessary.
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarAdrian Bunk <bunk@stusta.de>
parent 921e8ebf
......@@ -61,6 +61,7 @@ struct Qdisc_class_ops
int (*graft)(struct Qdisc *, unsigned long cl,
struct Qdisc *, struct Qdisc **);
struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl);
void (*qlen_notify)(struct Qdisc *, unsigned long);
/* Class manipulation routines */
unsigned long (*get)(struct Qdisc *, u32 classid);
......@@ -173,9 +174,10 @@ extern void dev_activate(struct net_device *dev);
extern void dev_deactivate(struct net_device *dev);
extern void qdisc_reset(struct Qdisc *qdisc);
extern void qdisc_destroy(struct Qdisc *qdisc);
extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
struct Qdisc_ops *ops);
struct Qdisc_ops *ops, u32 parentid);
static inline void
tcf_destroy(struct tcf_proto *tp)
......
......@@ -192,21 +192,28 @@ int unregister_qdisc(struct Qdisc_ops *qops)
(root qdisc, all its children, children of children etc.)
*/
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
read_lock(&qdisc_tree_lock);
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q->handle == handle) {
read_unlock(&qdisc_tree_lock);
if (q->handle == handle)
return q;
}
}
read_unlock(&qdisc_tree_lock);
return NULL;
}
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
read_lock(&qdisc_tree_lock);
q = __qdisc_lookup(dev, handle);
read_unlock(&qdisc_tree_lock);
return q;
}
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
......@@ -349,6 +356,26 @@ dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
return oqdisc;
}
void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
{
struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
if (n == 0)
return;
while ((parentid = sch->parent)) {
sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
cops = sch->ops->cl_ops;
if (cops->qlen_notify) {
cl = cops->get(sch, parentid);
cops->qlen_notify(sch, cl);
cops->put(sch, cl);
}
sch->q.qlen -= n;
}
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
to device "dev".
......
......@@ -317,7 +317,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
}
memset(flow,0,sizeof(*flow));
flow->filter_list = NULL;
if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid)))
flow->q = &noop_qdisc;
DPRINTK("atm_tc_change: qdisc %p\n",flow->q);
flow->sock = sock;
......@@ -577,7 +577,8 @@ static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt)
DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
p->flows = &p->link;
if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops)))
if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,
sch->handle)))
p->link.q = &noop_qdisc;
DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q);
p->link.filter_list = NULL;
......
......@@ -1430,7 +1430,8 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
q->link.sibling = &q->link;
q->link.classid = sch->handle;
q->link.qdisc = sch;
if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
sch->handle)))
q->link.q = &noop_qdisc;
q->link.priority = TC_CBQ_MAXPRIO-1;
......@@ -1675,7 +1676,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl) {
if (new == NULL) {
if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
cl->classid)) == NULL)
return -ENOBUFS;
} else {
#ifdef CONFIG_NET_CLS_POLICE
......@@ -1686,7 +1688,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = cl->q;
cl->q = new;
sch->q.qlen -= (*old)->q.qlen;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
......@@ -1934,7 +1936,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t
cl->R_tab = rtab;
rtab = NULL;
cl->refcnt = 1;
if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
cl->q = &noop_qdisc;
cl->classid = classid;
cl->tparent = parent;
......
......@@ -89,15 +89,16 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
sch, p, new, old);
if (new == NULL) {
new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
sch->handle);
if (new == NULL)
new = &noop_qdisc;
}
sch_tree_lock(sch);
*old = xchg(&p->q, new);
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
......@@ -388,7 +389,7 @@ static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
p->default_index = default_index;
p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
......
......@@ -440,13 +440,15 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
return ERR_PTR(-err);
}
struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
unsigned int parentid)
{
struct Qdisc *sch;
sch = qdisc_alloc(dev, ops);
if (IS_ERR(sch))
goto errout;
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
return sch;
......@@ -510,7 +512,8 @@ void dev_activate(struct net_device *dev)
if (dev->qdisc_sleeping == &noop_qdisc) {
struct Qdisc *qdisc;
if (dev->tx_queue_len) {
qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops);
qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
TC_H_ROOT);
if (qdisc == NULL) {
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
......
......@@ -958,11 +958,7 @@ hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
unsigned int len = cl->qdisc->q.qlen;
qdisc_reset(cl->qdisc);
if (len > 0) {
update_vf(cl, 0, 0);
set_passive(cl);
sch->q.qlen -= len;
}
qdisc_tree_decrease_qlen(cl->qdisc, len);
}
static void
......@@ -1140,7 +1136,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->classid = classid;
cl->sched = q;
cl->cl_parent = parent;
cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
cl->stats_lock = &sch->dev->queue_lock;
......@@ -1202,10 +1198,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
list_del(&cl->hlist);
list_del(&cl->siblings);
hfsc_adjust_levels(cl->cl_parent);
hfsc_purge_queue(sch, cl);
list_del(&cl->hlist);
if (--cl->refcnt == 0)
hfsc_destroy_class(sch, cl);
......@@ -1273,7 +1271,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl->level > 0)
return -EINVAL;
if (new == NULL) {
new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
cl->classid);
if (new == NULL)
new = &noop_qdisc;
}
......@@ -1296,6 +1295,17 @@ hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
return NULL;
}
static void
hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
if (cl->qdisc->q.qlen == 0) {
update_vf(cl, 0, 0);
set_passive(cl);
}
}
static unsigned long
hfsc_get_class(struct Qdisc *sch, u32 classid)
{
......@@ -1516,7 +1526,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
q->root.refcnt = 1;
q->root.classid = sch->handle;
q->root.sched = q;
q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
q->root.stats_lock = &sch->dev->queue_lock;
......@@ -1779,6 +1790,7 @@ static struct Qdisc_class_ops hfsc_class_ops = {
.delete = hfsc_delete_class,
.graft = hfsc_graft_class,
.leaf = hfsc_class_leaf,
.qlen_notify = hfsc_qlen_notify,
.get = hfsc_get_class,
.put = hfsc_put_class,
.bind_tcf = hfsc_bind_tcf,
......
......@@ -1385,16 +1385,13 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct htb_class *cl = (struct htb_class*)arg;
if (cl && !cl->level) {
if (new == NULL && (new = qdisc_create_dflt(sch->dev,
&pfifo_qdisc_ops)) == NULL)
if (new == NULL &&
(new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
cl->classid)) == NULL)
return -ENOBUFS;
sch_tree_lock(sch);
if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
if (cl->prio_activity)
htb_deactivate (qdisc_priv(sch),cl);
/* TODO: is it correct ? Why CBQ doesn't do it ? */
sch->q.qlen -= (*old)->q.qlen;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
}
sch_tree_unlock(sch);
......@@ -1409,6 +1406,14 @@ static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
return (cl && !cl->level) ? cl->un.leaf.q : NULL;
}
static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
{
struct htb_class *cl = (struct htb_class *)arg;
if (cl->un.leaf.q->q.qlen == 0)
htb_deactivate(qdisc_priv(sch), cl);
}
static unsigned long htb_get(struct Qdisc *sch, u32 classid)
{
#ifdef HTB_DEBUG
......@@ -1434,10 +1439,10 @@ static void htb_destroy_filters(struct tcf_proto **fl)
static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
{
struct htb_sched *q = qdisc_priv(sch);
HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
if (!cl->level) {
BUG_TRAP(cl->un.leaf.q);
sch->q.qlen -= cl->un.leaf.q->q.qlen;
qdisc_destroy(cl->un.leaf.q);
}
qdisc_put_rtab(cl->rate);
......@@ -1489,6 +1494,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class*)arg;
unsigned int qlen;
HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
// TODO: why don't allow to delete subtree ? references ? does
......@@ -1498,7 +1505,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
return -EBUSY;
sch_tree_lock(sch);
if (!cl->level) {
qlen = cl->un.leaf.q->q.qlen;
qdisc_reset(cl->un.leaf.q);
qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
}
/* delete from hash and active; remainder in destroy_class */
list_del_init(&cl->hlist);
if (cl->prio_activity)
......@@ -1576,11 +1589,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
so that can't be used inside of sch_tree_lock
-- thanks to Karlis Peisenieks */
new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
/* turn parent into inner node */
sch->q.qlen -= parent->un.leaf.q->q.qlen;
qdisc_reset(parent->un.leaf.q);
qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
qdisc_destroy (parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate (q,parent);
......@@ -1721,6 +1737,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
static struct Qdisc_class_ops htb_class_ops = {
.graft = htb_graft,
.leaf = htb_leaf,
.qlen_notify = htb_qlen_notify,
.get = htb_get,
.put = htb_put,
.change = htb_change_class,
......
......@@ -571,7 +571,8 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch;
q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
if (!q->qdisc) {
pr_debug("netem: qdisc create failed\n");
return -ENOMEM;
......@@ -658,8 +659,8 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = xchg(&q->qdisc, new);
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
......
......@@ -223,21 +223,27 @@ static int prio_tune(struct Qdisc *sch, struct rtattr *opt)
for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
if (child != &noop_qdisc)
if (child != &noop_qdisc) {
qdisc_tree_decrease_qlen(child, child->q.qlen);
qdisc_destroy(child);
}
}
sch_tree_unlock(sch);
for (i=0; i<q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
struct Qdisc *child;
child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, i + 1));
if (child) {
sch_tree_lock(sch);
child = xchg(&q->queues[i], child);
if (child != &noop_qdisc)
if (child != &noop_qdisc) {
qdisc_tree_decrease_qlen(child,
child->q.qlen);
qdisc_destroy(child);
}
sch_tree_unlock(sch);
}
}
......@@ -295,7 +301,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = q->queues[band];
q->queues[band] = new;
sch->q.qlen -= (*old)->q.qlen;
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch_tree_unlock(sch);
......
......@@ -389,6 +389,7 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = RTA_DATA(opt);
unsigned int qlen;
if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
return -EINVAL;
......@@ -399,8 +400,10 @@ static int sfq_change(struct Qdisc *sch, struct rtattr *opt)
if (ctl->limit)
q->limit = min_t(u32, ctl->limit, SFQ_DEPTH);
qlen = sch->q.qlen;
while (sch->q.qlen >= q->limit-1)
sfq_drop(sch);
qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
......
......@@ -274,12 +274,14 @@ static void tbf_reset(struct Qdisc* sch)
del_timer(&q->wd_timer);
}
static struct Qdisc *tbf_create_dflt_qdisc(struct net_device *dev, u32 limit)
static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
{
struct Qdisc *q = qdisc_create_dflt(dev, &bfifo_qdisc_ops);
struct Qdisc *q;
struct rtattr *rta;
int ret;
q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
if (q) {
rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (rta) {
......@@ -342,12 +344,15 @@ static int tbf_change(struct Qdisc* sch, struct rtattr *opt)
goto done;
if (q->qdisc == &noop_qdisc) {
if ((child = tbf_create_dflt_qdisc(sch->dev, qopt->limit)) == NULL)
if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL)
goto done;
}
sch_tree_lock(sch);
if (child) q->qdisc = child;
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
qdisc_destroy(xchg(&q->qdisc, child));
}
q->limit = qopt->limit;
q->mtu = qopt->mtu;
q->max_size = max_size;
......@@ -449,8 +454,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
sch_tree_lock(sch);
*old = xchg(&q->qdisc, new);
qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
qdisc_reset(*old);
sch->q.qlen = 0;
sch_tree_unlock(sch);
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment