Commit 5891cd5e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net_sched: add __rcu annotation to netdev->qdisc

syzbot found a data-race [1] which lead me to add __rcu
annotations to netdev->qdisc, and proper accessors
to get LOCKDEP support.

[1]
BUG: KCSAN: data-race in dev_activate / qdisc_lookup_rcu

write to 0xffff888168ad6410 of 8 bytes by task 13559 on cpu 1:
 attach_default_qdiscs net/sched/sch_generic.c:1167 [inline]
 dev_activate+0x2ed/0x8f0 net/sched/sch_generic.c:1221
 __dev_open+0x2e9/0x3a0 net/core/dev.c:1416
 __dev_change_flags+0x167/0x3f0 net/core/dev.c:8139
 rtnl_configure_link+0xc2/0x150 net/core/rtnetlink.c:3150
 __rtnl_newlink net/core/rtnetlink.c:3489 [inline]
 rtnl_newlink+0xf4d/0x13e0 net/core/rtnetlink.c:3529
 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5594
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2413
 ___sys_sendmsg net/socket.c:2467 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2496
 __do_sys_sendmsg net/socket.c:2505 [inline]
 __se_sys_sendmsg net/socket.c:2503 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888168ad6410 of 8 bytes by task 13560 on cpu 0:
 qdisc_lookup_rcu+0x30/0x2e0 net/sched/sch_api.c:323
 __tcf_qdisc_find+0x74/0x3a0 net/sched/cls_api.c:1050
 tc_del_tfilter+0x1c7/0x1350 net/sched/cls_api.c:2211
 rtnetlink_rcv_msg+0x5ba/0x7e0 net/core/rtnetlink.c:5585
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2413
 ___sys_sendmsg net/socket.c:2467 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2496
 __do_sys_sendmsg net/socket.c:2505 [inline]
 __se_sys_sendmsg net/socket.c:2503 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0xffffffff85dee080 -> 0xffff88815d96ec00

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 13560 Comm: syz-executor.2 Not tainted 5.17.0-rc3-syzkaller-00116-gf1baf68e-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 470502de ("net: sched: unlock rules update API")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a2614140
...@@ -2158,7 +2158,7 @@ struct net_device { ...@@ -2158,7 +2158,7 @@ struct net_device {
struct netdev_queue *_tx ____cacheline_aligned_in_smp; struct netdev_queue *_tx ____cacheline_aligned_in_smp;
unsigned int num_tx_queues; unsigned int num_tx_queues;
unsigned int real_num_tx_queues; unsigned int real_num_tx_queues;
struct Qdisc *qdisc; struct Qdisc __rcu *qdisc;
unsigned int tx_queue_len; unsigned int tx_queue_len;
spinlock_t tx_global_lock; spinlock_t tx_global_lock;
......
...@@ -1699,6 +1699,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1699,6 +1699,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
{ {
struct ifinfomsg *ifm; struct ifinfomsg *ifm;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct Qdisc *qdisc;
ASSERT_RTNL(); ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
...@@ -1716,6 +1717,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1716,6 +1717,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure; goto nla_put_failure;
qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) || if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE, nla_put_u8(skb, IFLA_OPERSTATE,
...@@ -1735,8 +1737,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1735,8 +1737,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
#endif #endif
put_master_ifindex(skb, dev) || put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc && (qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) || nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES, nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_up_count) +
......
...@@ -1044,7 +1044,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, ...@@ -1044,7 +1044,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
/* Find qdisc */ /* Find qdisc */
if (!*parent) { if (!*parent) {
*q = dev->qdisc; *q = rcu_dereference(dev->qdisc);
*parent = (*q)->handle; *parent = (*q)->handle;
} else { } else {
*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
...@@ -2587,7 +2587,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -2587,7 +2587,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
parent = tcm->tcm_parent; parent = tcm->tcm_parent;
if (!parent) if (!parent)
q = dev->qdisc; q = rtnl_dereference(dev->qdisc);
else else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q) if (!q)
...@@ -2962,7 +2962,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -2962,7 +2962,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len; return skb->len;
if (!tcm->tcm_parent) if (!tcm->tcm_parent)
q = dev->qdisc; q = rtnl_dereference(dev->qdisc);
else else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
......
...@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) ...@@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
if (!handle) if (!handle)
return NULL; return NULL;
q = qdisc_match_from_root(dev->qdisc, handle); q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
if (q) if (q)
goto out; goto out;
...@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) ...@@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
if (!handle) if (!handle)
return NULL; return NULL;
q = qdisc_match_from_root(dev->qdisc, handle); q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
if (q) if (q)
goto out; goto out;
...@@ -1082,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, ...@@ -1082,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
skip: skip:
if (!ingress) { if (!ingress) {
notify_and_destroy(net, skb, n, classid, notify_and_destroy(net, skb, n, classid,
dev->qdisc, new); rtnl_dereference(dev->qdisc), new);
if (new && !new->ops->attach) if (new && !new->ops->attach)
qdisc_refcount_inc(new); qdisc_refcount_inc(new);
dev->qdisc = new ? : &noop_qdisc; rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
if (new && new->ops->attach) if (new && new->ops->attach)
new->ops->attach(new); new->ops->attach(new);
...@@ -1451,7 +1451,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, ...@@ -1451,7 +1451,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping; q = dev_ingress_queue(dev)->qdisc_sleeping;
} }
} else { } else {
q = dev->qdisc; q = rtnl_dereference(dev->qdisc);
} }
if (!q) { if (!q) {
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
...@@ -1540,7 +1540,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, ...@@ -1540,7 +1540,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
q = dev_ingress_queue(dev)->qdisc_sleeping; q = dev_ingress_queue(dev)->qdisc_sleeping;
} }
} else { } else {
q = dev->qdisc; q = rtnl_dereference(dev->qdisc);
} }
/* It may be default qdisc, ignore it */ /* It may be default qdisc, ignore it */
...@@ -1762,7 +1762,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -1762,7 +1762,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
s_q_idx = 0; s_q_idx = 0;
q_idx = 0; q_idx = 0;
if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
skb, cb, &q_idx, s_q_idx,
true, tca[TCA_DUMP_INVISIBLE]) < 0) true, tca[TCA_DUMP_INVISIBLE]) < 0)
goto done; goto done;
...@@ -2033,7 +2034,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, ...@@ -2033,7 +2034,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
} else if (qid1) { } else if (qid1) {
qid = qid1; qid = qid1;
} else if (qid == 0) } else if (qid == 0)
qid = dev->qdisc->handle; qid = rtnl_dereference(dev->qdisc)->handle;
/* Now qid is genuine qdisc handle consistent /* Now qid is genuine qdisc handle consistent
* both with parent and child. * both with parent and child.
...@@ -2044,7 +2045,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, ...@@ -2044,7 +2045,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
portid = TC_H_MAKE(qid, portid); portid = TC_H_MAKE(qid, portid);
} else { } else {
if (qid == 0) if (qid == 0)
qid = dev->qdisc->handle; qid = rtnl_dereference(dev->qdisc)->handle;
} }
/* OK. Locate qdisc */ /* OK. Locate qdisc */
...@@ -2205,7 +2206,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -2205,7 +2206,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0]; s_t = cb->args[0];
t = 0; t = 0;
if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
skb, tcm, cb, &t, s_t, true) < 0)
goto done; goto done;
dev_queue = dev_ingress_queue(dev); dev_queue = dev_ingress_queue(dev);
......
...@@ -1164,30 +1164,33 @@ static void attach_default_qdiscs(struct net_device *dev) ...@@ -1164,30 +1164,33 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) || if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) { dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping; qdisc = txq->qdisc_sleeping;
qdisc_refcount_inc(dev->qdisc); rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
} else { } else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL); qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) { if (qdisc) {
dev->qdisc = qdisc; rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc); qdisc->ops->attach(qdisc);
} }
} }
qdisc = rtnl_dereference(dev->qdisc);
/* Detect default qdisc setup/init failed and fallback to "noqueue" */ /* Detect default qdisc setup/init failed and fallback to "noqueue" */
if (dev->qdisc == &noop_qdisc) { if (qdisc == &noop_qdisc) {
netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
default_qdisc_ops->id, noqueue_qdisc_ops.id); default_qdisc_ops->id, noqueue_qdisc_ops.id);
dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_QUEUE;
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping; qdisc = txq->qdisc_sleeping;
qdisc_refcount_inc(dev->qdisc); rcu_assign_pointer(dev->qdisc, qdisc);
qdisc_refcount_inc(qdisc);
dev->priv_flags ^= IFF_NO_QUEUE; dev->priv_flags ^= IFF_NO_QUEUE;
} }
#ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_SCHED
if (dev->qdisc != &noop_qdisc) if (qdisc != &noop_qdisc)
qdisc_hash_add(dev->qdisc, false); qdisc_hash_add(qdisc, false);
#endif #endif
} }
...@@ -1217,7 +1220,7 @@ void dev_activate(struct net_device *dev) ...@@ -1217,7 +1220,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces * and noqueue_qdisc for virtual interfaces
*/ */
if (dev->qdisc == &noop_qdisc) if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev); attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev)) if (!netif_carrier_ok(dev))
...@@ -1383,7 +1386,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev, ...@@ -1383,7 +1386,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
void dev_qdisc_change_real_num_tx(struct net_device *dev, void dev_qdisc_change_real_num_tx(struct net_device *dev,
unsigned int new_real_tx) unsigned int new_real_tx)
{ {
struct Qdisc *qdisc = dev->qdisc; struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
if (qdisc->ops->change_real_num_tx) if (qdisc->ops->change_real_num_tx)
qdisc->ops->change_real_num_tx(qdisc, new_real_tx); qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
...@@ -1447,7 +1450,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, ...@@ -1447,7 +1450,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev) void dev_init_scheduler(struct net_device *dev)
{ {
dev->qdisc = &noop_qdisc; rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev)) if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
...@@ -1475,8 +1478,8 @@ void dev_shutdown(struct net_device *dev) ...@@ -1475,8 +1478,8 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev)) if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
qdisc_put(dev->qdisc); qdisc_put(rtnl_dereference(dev->qdisc));
dev->qdisc = &noop_qdisc; rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer)); WARN_ON(timer_pending(&dev->watchdog_timer));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment