Commit 30723673 authored by David S. Miller's avatar David S. Miller

pkt_sched: Manage qdisc list inside of root qdisc.

Idea is from Patrick McHardy.

Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue.  This solves all kinds of
visibility issues during qdisc destruction.

The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.

The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup().  That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 72b25a91
...@@ -636,8 +636,6 @@ struct net_device ...@@ -636,8 +636,6 @@ struct net_device
unsigned int real_num_tx_queues; unsigned int real_num_tx_queues;
unsigned long tx_queue_len; /* Max frames per queue allowed */ unsigned long tx_queue_len; /* Max frames per queue allowed */
spinlock_t qdisc_list_lock;
struct list_head qdisc_list;
/* /*
* One part is mostly used on xmit path (device) * One part is mostly used on xmit path (device)
......
...@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev) ...@@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev)
net = dev_net(dev); net = dev_net(dev);
spin_lock_init(&dev->addr_list_lock); spin_lock_init(&dev->addr_list_lock);
spin_lock_init(&dev->qdisc_list_lock);
INIT_LIST_HEAD(&dev->qdisc_list);
netdev_init_queue_locks(dev); netdev_init_queue_locks(dev);
dev->iflink = -1; dev->iflink = -1;
......
...@@ -185,12 +185,21 @@ EXPORT_SYMBOL(unregister_qdisc); ...@@ -185,12 +185,21 @@ EXPORT_SYMBOL(unregister_qdisc);
struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{ {
struct Qdisc *q; unsigned int i;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct Qdisc *q, *txq_root = txq->qdisc;
list_for_each_entry(q, &dev->qdisc_list, list) { if (!(txq_root->flags & TCQ_F_BUILTIN) &&
txq_root->handle == handle)
return txq_root;
list_for_each_entry(q, &txq_root->list, list) {
if (q->handle == handle) if (q->handle == handle)
return q; return q;
} }
}
return NULL; return NULL;
} }
...@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, ...@@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
goto err_out3; goto err_out3;
} }
} }
spin_lock_bh(&dev->qdisc_list_lock); if (parent)
list_add_tail(&sch->list, &dev->qdisc_list); list_add_tail(&sch->list, &dev_queue->qdisc->list);
spin_unlock_bh(&dev->qdisc_list_lock);
return sch; return sch;
} }
...@@ -1037,13 +1045,57 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, ...@@ -1037,13 +1045,57 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
return -EINVAL; return -EINVAL;
} }
static bool tc_qdisc_dump_ignore(struct Qdisc *q)
{
return (q->flags & TCQ_F_BUILTIN) ? true : false;
}
static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
int *q_idx_p, int s_q_idx)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;
if (!root)
return 0;
q = root;
if (q_idx < s_q_idx) {
q_idx++;
} else {
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}
list_for_each_entry(q, &root->list, list) {
if (q_idx < s_q_idx) {
q_idx++;
continue;
}
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}
out:
*q_idx_p = q_idx;
return ret;
done:
ret = -1;
goto out;
}
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
int idx, q_idx; int idx, q_idx;
int s_idx, s_q_idx; int s_idx, s_q_idx;
struct net_device *dev; struct net_device *dev;
struct Qdisc *q;
if (net != &init_net) if (net != &init_net)
return 0; return 0;
...@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
read_lock(&dev_base_lock); read_lock(&dev_base_lock);
idx = 0; idx = 0;
for_each_netdev(&init_net, dev) { for_each_netdev(&init_net, dev) {
struct netdev_queue *dev_queue;
if (idx < s_idx) if (idx < s_idx)
goto cont; goto cont;
if (idx > s_idx) if (idx > s_idx)
s_q_idx = 0; s_q_idx = 0;
q_idx = 0; q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) { dev_queue = netdev_get_tx_queue(dev, 0);
q_idx++; if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done; goto done;
q_idx++;
} dev_queue = &dev->rx_queue;
if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
goto done;
cont: cont:
idx++; idx++;
} }
...@@ -1285,35 +1338,20 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk ...@@ -1285,35 +1338,20 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
} }
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
int *t_p, int s_t)
{ {
struct net *net = sock_net(skb->sk);
int t;
int s_t;
struct net_device *dev;
struct Qdisc *q;
struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct qdisc_dump_args arg; struct qdisc_dump_args arg;
if (net != &init_net) if (tc_qdisc_dump_ignore(q) ||
return 0; *t_p < s_t || !q->ops->cl_ops ||
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return 0;
if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return 0;
s_t = cb->args[0];
t = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent && (tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) { TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
t++; (*t_p)++;
continue; return 0;
} }
if (t > s_t) if (*t_p > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
arg.w.fn = qdisc_class_dump; arg.w.fn = qdisc_class_dump;
arg.skb = skb; arg.skb = skb;
...@@ -1324,10 +1362,59 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1324,10 +1362,59 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
q->ops->cl_ops->walk(q, &arg.w); q->ops->cl_ops->walk(q, &arg.w);
cb->args[1] = arg.w.count; cb->args[1] = arg.w.count;
if (arg.w.stop) if (arg.w.stop)
break; return -1;
t++; (*t_p)++;
return 0;
}
static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
int *t_p, int s_t)
{
struct Qdisc *q;
if (!root)
return 0;
if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;
list_for_each_entry(q, &root->list, list) {
if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
return -1;
} }
return 0;
}
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct net *net = sock_net(skb->sk);
struct netdev_queue *dev_queue;
struct net_device *dev;
int t, s_t;
if (net != &init_net)
return 0;
if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
return 0;
if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return 0;
s_t = cb->args[0];
t = 0;
dev_queue = netdev_get_tx_queue(dev, 0);
if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
goto done;
dev_queue = &dev->rx_queue;
if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
goto done;
done:
cb->args[0] = t; cb->args[0] = t;
dev_put(dev); dev_put(dev);
......
...@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head) ...@@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head)
void qdisc_destroy(struct Qdisc *qdisc) void qdisc_destroy(struct Qdisc *qdisc)
{ {
struct net_device *dev = qdisc_dev(qdisc);
if (qdisc->flags & TCQ_F_BUILTIN || if (qdisc->flags & TCQ_F_BUILTIN ||
!atomic_dec_and_test(&qdisc->refcnt)) !atomic_dec_and_test(&qdisc->refcnt))
return; return;
spin_lock_bh(&dev->qdisc_list_lock); if (qdisc->parent)
list_del(&qdisc->list); list_del(&qdisc->list);
spin_unlock_bh(&dev->qdisc_list_lock);
call_rcu(&qdisc->q_rcu, __qdisc_destroy); call_rcu(&qdisc->q_rcu, __qdisc_destroy);
} }
...@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev, ...@@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev,
printk(KERN_INFO "%s: activation failed\n", dev->name); printk(KERN_INFO "%s: activation failed\n", dev->name);
return; return;
} }
spin_lock_bh(&dev->qdisc_list_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
spin_unlock_bh(&dev->qdisc_list_lock);
} else { } else {
qdisc = &noqueue_qdisc; qdisc = &noqueue_qdisc;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment