Commit decbc7a6 authored by Parav Pandit's avatar Parav Pandit Committed by Jason Gunthorpe

RDMA/core: Introduce a helper function to change net namespace of rdma device

Introduce a helper function that changes rdma device's net namespace which
performs mini disable/enable sequence to have device visible only in
assigned net namespace.

Device unregistration, device rename and device change net namespace
may be invoked concurrently.

(a) device unregistration needs to wait if a device change (rename or net
    namespace change) operation is in progress.
(b) device net namespace change should not proceed if the unregistration
    has started.
(c) while one cpu is changing device net namespace, other cpu should not
    be able to rename or change net namespace.

To address above concurrency,
(a) Use unreg_mutex to synchronize between ib_unregister_device() and net
    namespace change operation
(b) In cases where unregister_device() has started unregistration before
    change_netns got chance to acquire unreg_mutex, validate the refcount
    - if it dropped to zero, abort the net namespace change operation.

Finally use the helper function to change net namespace of ib device to
move the device back to init_net when such net is deleted.
Signed-off-by: default avatarParav Pandit <parav@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 3042492b
...@@ -201,6 +201,9 @@ static struct notifier_block ibdev_lsm_nb = { ...@@ -201,6 +201,9 @@ static struct notifier_block ibdev_lsm_nb = {
.notifier_call = ib_security_change, .notifier_call = ib_security_change,
}; };
static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
struct net *net);
/* Pointer to the RCU head at the start of the ib_port_data array */ /* Pointer to the RCU head at the start of the ib_port_data array */
struct ib_port_data_rcu { struct ib_port_data_rcu {
struct rcu_head rcu_head; struct rcu_head rcu_head;
...@@ -861,6 +864,8 @@ static int add_compat_devs(struct ib_device *device) ...@@ -861,6 +864,8 @@ static int add_compat_devs(struct ib_device *device)
unsigned long index; unsigned long index;
int ret = 0; int ret = 0;
lockdep_assert_held(&devices_rwsem);
down_read(&rdma_nets_rwsem); down_read(&rdma_nets_rwsem);
xa_for_each (&rdma_nets, index, rnet) { xa_for_each (&rdma_nets, index, rnet) {
ret = add_one_compat_dev(device, rnet); ret = add_one_compat_dev(device, rnet);
...@@ -978,6 +983,11 @@ static void rdma_dev_exit_net(struct net *net) ...@@ -978,6 +983,11 @@ static void rdma_dev_exit_net(struct net *net)
remove_one_compat_dev(dev, rnet->id); remove_one_compat_dev(dev, rnet->id);
/*
* If the real device is in the NS then move it back to init.
*/
rdma_dev_change_netns(dev, net, &init_net);
put_device(&dev->dev); put_device(&dev->dev);
down_read(&devices_rwsem); down_read(&devices_rwsem);
} }
...@@ -1428,6 +1438,73 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) ...@@ -1428,6 +1438,73 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
} }
EXPORT_SYMBOL(ib_unregister_device_queued); EXPORT_SYMBOL(ib_unregister_device_queued);
/*
* The caller must pass in a device that has the kref held and the refcount
* released. If the device is in cur_net and still registered then it is moved
* into net.
*/
static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
struct net *net)
{
int ret2 = -EINVAL;
int ret;
mutex_lock(&device->unregistration_lock);
/*
* If a device not under ib_device_get() or the unregistration_lock
* the namespace can be changed, or it can be unregistered. Check
* again under the lock.
*/
if (refcount_read(&device->refcount) == 0 ||
!net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
ret = -ENODEV;
goto out;
}
kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
disable_device(device);
/*
* At this point no one can be using the device, so it is safe to
* change the namespace.
*/
write_pnet(&device->coredev.rdma_net, net);
/*
* Currently rdma devices are system wide unique. So the device name
* is guaranteed free in the new namespace. Publish the new namespace
* at the sysfs level.
*/
down_read(&devices_rwsem);
ret = device_rename(&device->dev, dev_name(&device->dev));
up_read(&devices_rwsem);
if (ret) {
dev_warn(&device->dev,
"%s: Couldn't rename device after namespace change\n",
__func__);
/* Try and put things back and re-enable the device */
write_pnet(&device->coredev.rdma_net, cur_net);
}
ret2 = enable_device_and_get(device);
if (ret2)
/*
* This shouldn't really happen, but if it does, let the user
* retry at later point. So don't disable the device.
*/
dev_warn(&device->dev,
"%s: Couldn't re-enable device after namespace change\n",
__func__);
kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_put(device);
out:
mutex_unlock(&device->unregistration_lock);
if (ret)
return ret;
return ret2;
}
static struct pernet_operations rdma_dev_net_ops = { static struct pernet_operations rdma_dev_net_ops = {
.init = rdma_dev_init_net, .init = rdma_dev_init_net,
.exit = rdma_dev_exit_net, .exit = rdma_dev_exit_net,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment