Commit b9a12601 authored by David S. Miller's avatar David S. Miller

Merge branch 'Close-race-between-un-register_netdevice_notifier-and-pernet_operations'

Kirill Tkhai says:

====================
Close race between {un, }register_netdevice_notifier and pernet_operations

the problem is {,un}register_netdevice_notifier() do not take
pernet_ops_rwsem, and they don't see network namespaces, being
initialized in setup_net() and cleanup_net(), since at this
time net is not hashed to net_namespace_list.

This may lead to imbalance, when a notifier is called at time of
setup_net()/net is alive, but it's not called at time of cleanup_net(),
for the devices, hashed to the net, and vise versa. See (3/3) for
the scheme of imbalance.

This patchset fixes the problem by acquiring pernet_ops_rwsem
at the time of {,un}register_netdevice_notifier() (3/3).
(1-2/3) are preparations in xfrm and netfilter subsystems.

The problem was introduced a long ago, but backporting won't be easy,
since every previous kernel version may have changes in netdevice
notifiers, and they all need review and testing. Otherwise, there
may be more pernet_operations, which register or unregister
netdevice notifiers, and that leads to deadlock (which is was fixed
in 1-2/3). This patchset is for net-next.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents caeeeda3 328fbe74
......@@ -1894,7 +1894,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
#endif
}
void __net_init xfrm_dev_init(void);
void __init xfrm_dev_init(void);
#ifdef CONFIG_XFRM_OFFLOAD
void xfrm_dev_resume(struct sk_buff *skb);
......
......@@ -1625,6 +1625,8 @@ int register_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
/* Close race with setup_net() and cleanup_net() */
down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
......@@ -1649,6 +1651,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
unlock:
rtnl_unlock();
up_write(&pernet_ops_rwsem);
return err;
rollback:
......@@ -1694,6 +1697,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
struct net *net;
int err;
/* Close race with setup_net() and cleanup_net() */
down_write(&pernet_ops_rwsem);
rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_chain, nb);
if (err)
......@@ -1713,6 +1718,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
up_read(&net_rwsem);
unlock:
rtnl_unlock();
up_write(&pernet_ops_rwsem);
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier);
......
......@@ -20,7 +20,7 @@
#include <linux/netfilter/xt_TEE.h>
struct xt_tee_priv {
struct notifier_block notifier;
struct list_head list;
struct xt_tee_tginfo *tginfo;
int oif;
};
......@@ -51,29 +51,35 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
}
#endif
static DEFINE_MUTEX(priv_list_mutex);
static LIST_HEAD(priv_list);
static int tee_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct xt_tee_priv *priv;
priv = container_of(this, struct xt_tee_priv, notifier);
switch (event) {
case NETDEV_REGISTER:
if (!strcmp(dev->name, priv->tginfo->oif))
priv->oif = dev->ifindex;
break;
case NETDEV_UNREGISTER:
if (dev->ifindex == priv->oif)
priv->oif = -1;
break;
case NETDEV_CHANGENAME:
if (!strcmp(dev->name, priv->tginfo->oif))
priv->oif = dev->ifindex;
else if (dev->ifindex == priv->oif)
priv->oif = -1;
break;
mutex_lock(&priv_list_mutex);
list_for_each_entry(priv, &priv_list, list) {
switch (event) {
case NETDEV_REGISTER:
if (!strcmp(dev->name, priv->tginfo->oif))
priv->oif = dev->ifindex;
break;
case NETDEV_UNREGISTER:
if (dev->ifindex == priv->oif)
priv->oif = -1;
break;
case NETDEV_CHANGENAME:
if (!strcmp(dev->name, priv->tginfo->oif))
priv->oif = dev->ifindex;
else if (dev->ifindex == priv->oif)
priv->oif = -1;
break;
}
}
mutex_unlock(&priv_list_mutex);
return NOTIFY_DONE;
}
......@@ -89,8 +95,6 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
if (info->oif[0]) {
int ret;
if (info->oif[sizeof(info->oif)-1] != '\0')
return -EINVAL;
......@@ -100,14 +104,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
priv->tginfo = info;
priv->oif = -1;
priv->notifier.notifier_call = tee_netdev_event;
info->priv = priv;
ret = register_netdevice_notifier(&priv->notifier);
if (ret) {
kfree(priv);
return ret;
}
mutex_lock(&priv_list_mutex);
list_add(&priv->list, &priv_list);
mutex_unlock(&priv_list_mutex);
} else
info->priv = NULL;
......@@ -120,7 +121,9 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
struct xt_tee_tginfo *info = par->targinfo;
if (info->priv) {
unregister_netdevice_notifier(&info->priv->notifier);
mutex_lock(&priv_list_mutex);
list_del(&info->priv->list);
mutex_unlock(&priv_list_mutex);
kfree(info->priv);
}
static_key_slow_dec(&xt_tee_enabled);
......@@ -153,13 +156,29 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
#endif
};
static struct notifier_block tee_netdev_notifier = {
.notifier_call = tee_netdev_event,
};
static int __init tee_tg_init(void)
{
return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
int ret;
ret = xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
if (ret)
return ret;
ret = register_netdevice_notifier(&tee_netdev_notifier);
if (ret) {
xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
return ret;
}
return 0;
}
static void __exit tee_tg_exit(void)
{
unregister_netdevice_notifier(&tee_netdev_notifier);
xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
}
......
......@@ -350,7 +350,7 @@ static struct notifier_block xfrm_dev_notifier = {
.notifier_call = xfrm_dev_event,
};
void __net_init xfrm_dev_init(void)
void __init xfrm_dev_init(void)
{
register_netdevice_notifier(&xfrm_dev_notifier);
}
......@@ -2895,8 +2895,6 @@ static int __net_init xfrm_policy_init(struct net *net)
INIT_LIST_HEAD(&net->xfrm.policy_all);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
if (net_eq(net, &init_net))
xfrm_dev_init();
return 0;
out_bydst:
......@@ -2999,6 +2997,7 @@ void __init xfrm_init(void)
INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment