Commit 4be3a4fa authored by Parav Pandit's avatar Parav Pandit Committed by Doug Ledford

IB/core: Fix kernel crash during fail to initialize device

This patch fixes the kernel crash that occurs during ib_dealloc_device()
called due to provider driver fails with an error after
ib_alloc_device() and before it can register using ib_register_device().

This crashed seen in tha lab as below which can occur with any IB device
which fails to perform its device initialization before invoking
ib_register_device().

This patch avoids touching cache and port immutable structures if device
is not yet initialized.
It also releases related memory when cache and port immutable data
structure initialization fails during register_device() state.

[81416.561946] BUG: unable to handle kernel NULL pointer dereference at (null)
[81416.570340] IP: ib_cache_release_one+0x29/0x80 [ib_core]
[81416.576222] PGD 78da66067
[81416.576223] PUD 7f2d7c067
[81416.579484] PMD 0
[81416.582720]
[81416.587242] Oops: 0000 [#1] SMP
[81416.722395] task: ffff8807887515c0 task.stack: ffffc900062c0000
[81416.729148] RIP: 0010:ib_cache_release_one+0x29/0x80 [ib_core]
[81416.735793] RSP: 0018:ffffc900062c3a90 EFLAGS: 00010202
[81416.741823] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
[81416.749785] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff880859fec000
[81416.757757] RBP: ffffc900062c3aa0 R08: ffff8808536e5ac0 R09: ffff880859fec5b0
[81416.765708] R10: 00000000536e5c01 R11: ffff8808536e5ac0 R12: ffff880859fec000
[81416.773672] R13: 0000000000000000 R14: ffff8808536e5ac0 R15: ffff88084ebc0060
[81416.781621] FS:  00007fd879fab740(0000) GS:ffff88085fac0000(0000) knlGS:0000000000000000
[81416.790522] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[81416.797094] CR2: 0000000000000000 CR3: 00000007eb215000 CR4: 00000000003406e0
[81416.805051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[81416.812997] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[81416.820950] Call Trace:
[81416.824226]  ib_device_release+0x1e/0x40 [ib_core]
[81416.829858]  device_release+0x32/0xa0
[81416.834370]  kobject_cleanup+0x63/0x170
[81416.839058]  kobject_put+0x25/0x50
[81416.843319]  ib_dealloc_device+0x25/0x40 [ib_core]
[81416.848986]  mlx5_ib_add+0x163/0x1990 [mlx5_ib]
[81416.854414]  mlx5_add_device+0x5a/0x160 [mlx5_core]
[81416.860191]  mlx5_register_interface+0x8d/0xc0 [mlx5_core]
[81416.866587]  ? 0xffffffffa09e9000
[81416.870816]  mlx5_ib_init+0x15/0x17 [mlx5_ib]
[81416.876094]  do_one_initcall+0x51/0x1b0
[81416.880861]  ? __vunmap+0x85/0xd0
[81416.885113]  ? kmem_cache_alloc_trace+0x14b/0x1b0
[81416.890768]  ? vfree+0x2e/0x70
[81416.894762]  do_init_module+0x60/0x1fa
[81416.899441]  load_module+0x15f6/0x1af0
[81416.904114]  ? __symbol_put+0x60/0x60
[81416.908709]  ? ima_post_read_file+0x3d/0x80
[81416.913828]  ? security_kernel_post_read_file+0x6b/0x80
[81416.920006]  SYSC_finit_module+0xa6/0xf0
[81416.924888]  SyS_finit_module+0xe/0x10
[81416.929568]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[81416.935089] RIP: 0033:0x7fd879494949
[81416.939543] RSP: 002b:00007ffdbc1b4e58 EFLAGS: 00000202 ORIG_RAX: 0000000000000139
[81416.947982] RAX: ffffffffffffffda RBX: 0000000001b66f00 RCX: 00007fd879494949
[81416.955965] RDX: 0000000000000000 RSI: 000000000041a13c RDI: 0000000000000003
[81416.963926] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000001b652a0
[81416.971861] R10: 0000000000000003 R11: 0000000000000202 R12: 00007ffdbc1b3e70
[81416.979763] R13: 00007ffdbc1b3e50 R14: 0000000000000005 R15: 0000000000000000
[81417.008005] RIP: ib_cache_release_one+0x29/0x80 [ib_core] RSP: ffffc900062c3a90
[81417.016045] CR2: 0000000000000000

Fixes: 55aeed06 ("IB/core: Make ib_alloc_device init the kobject")
Fixes: 7738613e ("IB/core: Add per port immutable struct to ib_device")
Cc: <stable@vger.kernel.org> # v4.2+
Reviewed-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Signed-off-by: default avatarParav Pandit <parav@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 3e31a490
...@@ -172,8 +172,16 @@ static void ib_device_release(struct device *device) ...@@ -172,8 +172,16 @@ static void ib_device_release(struct device *device)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
if (dev->reg_state == IB_DEV_UNREGISTERED) {
/*
* In IB_DEV_UNINITIALIZED state, cache or port table
* is not even created. Free cache and port table only when
* device reaches UNREGISTERED state.
*/
ib_cache_release_one(dev); ib_cache_release_one(dev);
kfree(dev->port_immutable); kfree(dev->port_immutable);
}
kfree(dev); kfree(dev);
} }
...@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device, ...@@ -380,32 +388,27 @@ int ib_register_device(struct ib_device *device,
ret = ib_cache_setup_one(device); ret = ib_cache_setup_one(device);
if (ret) { if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
goto out; goto port_cleanup;
} }
ret = ib_device_register_rdmacg(device); ret = ib_device_register_rdmacg(device);
if (ret) { if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n"); pr_warn("Couldn't register device with rdma cgroup\n");
ib_cache_cleanup_one(device); goto cache_cleanup;
goto out;
} }
memset(&device->attrs, 0, sizeof(device->attrs)); memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw); ret = device->query_device(device, &device->attrs, &uhw);
if (ret) { if (ret) {
pr_warn("Couldn't query the device attributes\n"); pr_warn("Couldn't query the device attributes\n");
ib_device_unregister_rdmacg(device); goto cache_cleanup;
ib_cache_cleanup_one(device);
goto out;
} }
ret = ib_device_register_sysfs(device, port_callback); ret = ib_device_register_sysfs(device, port_callback);
if (ret) { if (ret) {
pr_warn("Couldn't register device %s with driver model\n", pr_warn("Couldn't register device %s with driver model\n",
device->name); device->name);
ib_device_unregister_rdmacg(device); goto cache_cleanup;
ib_cache_cleanup_one(device);
goto out;
} }
device->reg_state = IB_DEV_REGISTERED; device->reg_state = IB_DEV_REGISTERED;
...@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device, ...@@ -417,6 +420,14 @@ int ib_register_device(struct ib_device *device,
down_write(&lists_rwsem); down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list); list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem); up_write(&lists_rwsem);
mutex_unlock(&device_mutex);
return 0;
cache_cleanup:
ib_cache_cleanup_one(device);
ib_cache_release_one(device);
port_cleanup:
kfree(device->port_immutable);
out: out:
mutex_unlock(&device_mutex); mutex_unlock(&device_mutex);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment