Commit 7b67baf1 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'rtnetlink-more-rcu-conversions-for-rtnl_fill_ifinfo'

Eric Dumazet says:

====================
rtnetlink: more rcu conversions for rtnl_fill_ifinfo()

We want to no longer rely on RTNL for "ip link show" command.

This is a long road, this series takes care of some parts.
====================

Link: https://lore.kernel.org/r/20240503192059.3884225-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 25010156 9cf621bd
......@@ -1357,7 +1357,7 @@ static struct net *ppp_nl_get_link_net(const struct net_device *dev)
{
struct ppp *ppp = netdev_priv(dev);
return ppp->ppp_net;
return READ_ONCE(ppp->ppp_net);
}
static struct rtnl_link_ops ppp_link_ops __read_mostly = {
......
......@@ -4569,7 +4569,7 @@ static struct net *vxlan_get_link_net(const struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
return vxlan->net;
return READ_ONCE(vxlan->net);
}
static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
......
......@@ -8544,27 +8544,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags;
unsigned int promiscuity, flags;
kuid_t uid;
kgid_t gid;
ASSERT_RTNL();
dev->flags |= IFF_PROMISC;
dev->promiscuity += inc;
if (dev->promiscuity == 0) {
promiscuity = dev->promiscuity + inc;
if (promiscuity == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch promisc and return error.
*/
if (inc < 0)
dev->flags &= ~IFF_PROMISC;
else {
dev->promiscuity -= inc;
if (unlikely(inc > 0)) {
netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW;
}
flags = old_flags & ~IFF_PROMISC;
} else {
flags = old_flags | IFF_PROMISC;
}
if (dev->flags != old_flags) {
WRITE_ONCE(dev->promiscuity, promiscuity);
if (flags != old_flags) {
WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s promiscuous mode\n",
dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
......@@ -8615,25 +8617,27 @@ EXPORT_SYMBOL(dev_set_promiscuity);
static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{
unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
unsigned int allmulti, flags;
ASSERT_RTNL();
dev->flags |= IFF_ALLMULTI;
dev->allmulti += inc;
if (dev->allmulti == 0) {
allmulti = dev->allmulti + inc;
if (allmulti == 0) {
/*
* Avoid overflow.
* If inc causes overflow, untouch allmulti and return error.
*/
if (inc < 0)
dev->flags &= ~IFF_ALLMULTI;
else {
dev->allmulti -= inc;
if (unlikely(inc > 0)) {
netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW;
}
flags = old_flags & ~IFF_ALLMULTI;
} else {
flags = old_flags | IFF_ALLMULTI;
}
if (dev->flags ^ old_flags) {
WRITE_ONCE(dev->allmulti, allmulti);
if (flags != old_flags) {
WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s allmulticast mode\n",
dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
......@@ -8959,7 +8963,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
return -ERANGE;
if (new_len != orig_len) {
dev->tx_queue_len = new_len;
WRITE_ONCE(dev->tx_queue_len, new_len);
res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
res = notifier_to_errno(res);
if (res)
......@@ -8973,7 +8977,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
err_rollback:
netdev_err(dev, "refused to change device tx_queue_len\n");
dev->tx_queue_len = orig_len;
WRITE_ONCE(dev->tx_queue_len, orig_len);
return res;
}
......@@ -9219,7 +9223,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
netif_carrier_off(dev);
else
netif_carrier_on(dev);
dev->proto_down = proto_down;
WRITE_ONCE(dev->proto_down, proto_down);
return 0;
}
......@@ -9233,18 +9237,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
u32 value)
{
u32 proto_down_reason;
int b;
if (!mask) {
dev->proto_down_reason = value;
proto_down_reason = value;
} else {
proto_down_reason = dev->proto_down_reason;
for_each_set_bit(b, &mask, 32) {
if (value & (1 << b))
dev->proto_down_reason |= BIT(b);
proto_down_reason |= BIT(b);
else
dev->proto_down_reason &= ~BIT(b);
proto_down_reason &= ~BIT(b);
}
}
WRITE_ONCE(dev->proto_down_reason, proto_down_reason);
}
struct bpf_xdp_link {
......
......@@ -1036,7 +1036,7 @@ static size_t rtnl_proto_down_size(const struct net_device *dev)
{
size_t size = nla_total_size(1);
if (dev->proto_down_reason)
/* Assume dev->proto_down_reason is not zero. */
size += nla_total_size(0) + nla_total_size(4);
return size;
......@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb,
static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{
const struct bpf_prog *generic_xdp_prog;
u32 res = 0;
ASSERT_RTNL();
rcu_read_lock();
generic_xdp_prog = rcu_dereference(dev->xdp_prog);
if (generic_xdp_prog)
res = generic_xdp_prog->aux->id;
rcu_read_unlock();
generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
if (!generic_xdp_prog)
return 0;
return generic_xdp_prog->aux->id;
return res;
}
static u32 rtnl_xdp_prog_drv(struct net_device *dev)
......@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev)
ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
ret = nla_put_u32(skb, IFLA_MASTER,
READ_ONCE(upper_dev->ifindex));
rcu_read_unlock();
return ret;
......@@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb,
struct nlattr *pr;
u32 preason;
if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down)))
goto nla_put_failure;
preason = dev->proto_down_reason;
preason = READ_ONCE(dev->proto_down_reason);
if (!preason)
return 0;
......@@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
u32 event, int *new_nsid, int new_ifindex,
int tgt_netnsid, gfp_t gfp)
{
char devname[IFNAMSIZ];
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
struct Qdisc *qdisc;
......@@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
ifm->ifi_type = READ_ONCE(dev->type);
ifm->ifi_index = READ_ONCE(dev->ifindex);
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure;
qdisc = rtnl_dereference(dev->qdisc);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
netdev_copy_name(dev, devname);
if (nla_put_string(skb, IFLA_IFNAME, devname))
goto nla_put_failure;
if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
nla_put_u32(skb, IFLA_GROUP, dev->group) ||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
netif_running(dev) ? READ_ONCE(dev->operstate) :
IF_OPER_DOWN) ||
nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) ||
nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) ||
nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) ||
nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) ||
nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) ||
nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES,
READ_ONCE(dev->num_tx_queues)) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS,
READ_ONCE(dev->gso_max_segs)) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE,
READ_ONCE(dev->gso_max_size)) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE,
READ_ONCE(dev->gro_max_size)) ||
nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE,
READ_ONCE(dev->gso_ipv4_max_size)) ||
nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE,
READ_ONCE(dev->gro_ipv4_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SIZE,
READ_ONCE(dev->tso_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
READ_ONCE(dev->tso_max_segs)) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
READ_ONCE(dev->num_rx_queues)) ||
#endif
put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(qdisc &&
nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) +
......@@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
goto nla_put_failure;
if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
......@@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
rcu_read_lock();
if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC))
goto nla_put_failure_rcu;
qdisc = rcu_dereference(dev->qdisc);
if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id))
goto nla_put_failure_rcu;
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu;
if (rtnl_fill_link_ifmap(skb, dev))
......
......@@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
return tunnel->net;
return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip_tunnel_get_link_net);
......
......@@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
return tunnel->net;
return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);
......
......@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc.
*/
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
}
......
......@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
if (q->q.qlen < dev->tx_queue_len) {
if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
__skb_queue_tail(&q->q, skb);
return NET_XMIT_SUCCESS;
}
......
......@@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
return xi->net;
return READ_ONCE(xi->net);
}
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment