Commit 6b57cea9 authored by Parav Pandit's avatar Parav Pandit Committed by Jason Gunthorpe

IB/core: Let IB core distribute cache update events

Currently when the low level driver notifies Pkey, GID, and port change
events they are notified to the registered handlers in the order they are
registered.

IB core and other ULPs such as IPoIB are interested in GID, LID, Pkey
change events.

Since all GID queries done by ULPs are serviced by IB core, and the IB
core deferes cache updates to a work queue, it is possible for other
clients to see stale cache data when they handle their own events.

For example, the below call tree shows how ipoib will call
rdma_query_gid() concurrently with the update to the cache sitting in the
WQ.

mlx5_ib_handle_event()
  ib_dispatch_event()
    ib_cache_event()
       queue_work() -> slow cache update

    [..]
    ipoib_event()
     queue_work()
       [..]
       work handler
         ipoib_ib_dev_flush_light()
           __ipoib_ib_dev_flush()
              ipoib_dev_addr_changed_valid()
                rdma_query_gid() <- Returns old GID, cache not updated.

Move all the event dispatch to a work queue so that the cache update is
always done before any clients are notified.

Fixes: f35faa4b ("IB/core: Simplify ib_query_gid to always refer to cache")
Link: https://lore.kernel.org/r/20191212113024.336702-3-leon@kernel.orgSigned-off-by: default avatarParav Pandit <parav@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 4cca96a8
...@@ -51,8 +51,7 @@ struct ib_pkey_cache { ...@@ -51,8 +51,7 @@ struct ib_pkey_cache {
struct ib_update_work { struct ib_update_work {
struct work_struct work; struct work_struct work;
struct ib_device *device; struct ib_event event;
u8 port_num;
bool enforce_security; bool enforce_security;
}; };
...@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) ...@@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
event.element.port_num = port; event.element.port_num = port;
event.event = IB_EVENT_GID_CHANGE; event.event = IB_EVENT_GID_CHANGE;
ib_dispatch_event(&event); ib_dispatch_event_clients(&event);
} }
static const char * const gid_type_str[] = { static const char * const gid_type_str[] = {
...@@ -1381,9 +1380,8 @@ static int config_non_roce_gid_cache(struct ib_device *device, ...@@ -1381,9 +1380,8 @@ static int config_non_roce_gid_cache(struct ib_device *device,
return ret; return ret;
} }
static void ib_cache_update(struct ib_device *device, static int
u8 port, ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
bool enforce_security)
{ {
struct ib_port_attr *tprops = NULL; struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
...@@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device, ...@@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device,
int ret; int ret;
if (!rdma_is_port_valid(device, port)) if (!rdma_is_port_valid(device, port))
return; return -EINVAL;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL); tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops) if (!tprops)
return; return -ENOMEM;
ret = ib_query_port(device, port, tprops); ret = ib_query_port(device, port, tprops);
if (ret) { if (ret) {
...@@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device, ...@@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache = kmalloc(struct_size(pkey_cache, table, pkey_cache = kmalloc(struct_size(pkey_cache, table,
tprops->pkey_tbl_len), tprops->pkey_tbl_len),
GFP_KERNEL); GFP_KERNEL);
if (!pkey_cache) if (!pkey_cache) {
ret = -ENOMEM;
goto err; goto err;
}
pkey_cache->table_len = tprops->pkey_tbl_len; pkey_cache->table_len = tprops->pkey_tbl_len;
...@@ -1446,50 +1446,84 @@ static void ib_cache_update(struct ib_device *device, ...@@ -1446,50 +1446,84 @@ static void ib_cache_update(struct ib_device *device,
kfree(old_pkey_cache); kfree(old_pkey_cache);
kfree(tprops); kfree(tprops);
return; return 0;
err: err:
kfree(pkey_cache); kfree(pkey_cache);
kfree(tprops); kfree(tprops);
return ret;
} }
static void ib_cache_task(struct work_struct *_work) static void ib_cache_event_task(struct work_struct *_work)
{ {
struct ib_update_work *work = struct ib_update_work *work =
container_of(_work, struct ib_update_work, work); container_of(_work, struct ib_update_work, work);
int ret;
ib_cache_update(work->device, /* Before distributing the cache update event, first sync
work->port_num, * the cache.
*/
ret = ib_cache_update(work->event.device, work->event.element.port_num,
work->enforce_security); work->enforce_security);
/* GID event is notified already for individual GID entries by
* dispatch_gid_change_event(). Hence, notifiy for rest of the
* events.
*/
if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
ib_dispatch_event_clients(&work->event);
kfree(work); kfree(work);
} }
static void ib_cache_event(struct ib_event_handler *handler, static void ib_generic_event_task(struct work_struct *_work)
struct ib_event *event)
{ {
struct ib_update_work *work; struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
if (event->event == IB_EVENT_PORT_ERR || ib_dispatch_event_clients(&work->event);
kfree(work);
}
static bool is_cache_update_event(const struct ib_event *event)
{
return (event->event == IB_EVENT_PORT_ERR ||
event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_GID_CHANGE) { event->event == IB_EVENT_GID_CHANGE);
work = kmalloc(sizeof *work, GFP_ATOMIC); }
if (work) {
INIT_WORK(&work->work, ib_cache_task); /**
work->device = event->device; * ib_dispatch_event - Dispatch an asynchronous event
work->port_num = event->element.port_num; * @event:Event to dispatch
*
* Low-level drivers must call ib_dispatch_event() to dispatch the
* event to all registered event handlers when an asynchronous event
* occurs.
*/
void ib_dispatch_event(const struct ib_event *event)
{
struct ib_update_work *work;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
if (is_cache_update_event(event))
INIT_WORK(&work->work, ib_cache_event_task);
else
INIT_WORK(&work->work, ib_generic_event_task);
work->event = *event;
if (event->event == IB_EVENT_PKEY_CHANGE || if (event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_GID_CHANGE) event->event == IB_EVENT_GID_CHANGE)
work->enforce_security = true; work->enforce_security = true;
else
work->enforce_security = false;
queue_work(ib_wq, &work->work); queue_work(ib_wq, &work->work);
}
}
} }
EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device) int ib_cache_setup_one(struct ib_device *device)
{ {
...@@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device) ...@@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device)
rdma_for_each_port (device, p) rdma_for_each_port (device, p)
ib_cache_update(device, p, true); ib_cache_update(device, p, true);
INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
device, ib_cache_event);
ib_register_event_handler(&device->cache.event_handler);
return 0; return 0;
} }
...@@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device) ...@@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device)
void ib_cache_cleanup_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device)
{ {
/* The cleanup function unregisters the event handler, /* The cleanup function waits for all in-progress workqueue
* waits for all in-progress workqueue elements and cleans * elements and cleans up the GID cache. This function should be
* up the GID cache. This function should be called after * called after the device was removed from the devices list and
* the device was removed from the devices list and all * all clients were removed, so the cache exists but is
* clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore. * non-functional and shouldn't be updated anymore.
*/ */
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq); flush_workqueue(ib_wq);
gid_table_cleanup_one(device); gid_table_cleanup_one(device);
......
...@@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); ...@@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device); int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device);
void ib_dispatch_event_clients(struct ib_event *event);
#ifdef CONFIG_CGROUP_RDMA #ifdef CONFIG_CGROUP_RDMA
void ib_device_register_rdmacg(struct ib_device *device); void ib_device_register_rdmacg(struct ib_device *device);
......
...@@ -588,6 +588,7 @@ struct ib_device *_ib_alloc_device(size_t size) ...@@ -588,6 +588,7 @@ struct ib_device *_ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list); INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->event_handler_lock);
init_rwsem(&device->event_handler_rwsem);
mutex_init(&device->unregistration_lock); mutex_init(&device->unregistration_lock);
/* /*
* client_data needs to be alloc because we don't want our mark to be * client_data needs to be alloc because we don't want our mark to be
...@@ -1932,16 +1933,14 @@ EXPORT_SYMBOL(ib_set_client_data); ...@@ -1932,16 +1933,14 @@ EXPORT_SYMBOL(ib_set_client_data);
* ib_register_event_handler() registers an event handler that will be * ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in * called back when asynchronous IB events occur (as defined in
* chapter 11 of the InfiniBand Architecture Specification). This * chapter 11 of the InfiniBand Architecture Specification). This
* callback may occur in interrupt context. * callback occurs in workqueue context.
*/ */
void ib_register_event_handler(struct ib_event_handler *event_handler) void ib_register_event_handler(struct ib_event_handler *event_handler)
{ {
unsigned long flags; down_write(&event_handler->device->event_handler_rwsem);
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_add_tail(&event_handler->list, list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list); &event_handler->device->event_handler_list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); up_write(&event_handler->device->event_handler_rwsem);
} }
EXPORT_SYMBOL(ib_register_event_handler); EXPORT_SYMBOL(ib_register_event_handler);
...@@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler); ...@@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler);
*/ */
void ib_unregister_event_handler(struct ib_event_handler *event_handler) void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{ {
unsigned long flags; down_write(&event_handler->device->event_handler_rwsem);
spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
list_del(&event_handler->list); list_del(&event_handler->list);
spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); up_write(&event_handler->device->event_handler_rwsem);
} }
EXPORT_SYMBOL(ib_unregister_event_handler); EXPORT_SYMBOL(ib_unregister_event_handler);
/** void ib_dispatch_event_clients(struct ib_event *event)
* ib_dispatch_event - Dispatch an asynchronous event
* @event:Event to dispatch
*
* Low-level drivers must call ib_dispatch_event() to dispatch the
* event to all registered event handlers when an asynchronous event
* occurs.
*/
void ib_dispatch_event(struct ib_event *event)
{ {
unsigned long flags;
struct ib_event_handler *handler; struct ib_event_handler *handler;
spin_lock_irqsave(&event->device->event_handler_lock, flags); down_read(&event->device->event_handler_rwsem);
list_for_each_entry(handler, &event->device->event_handler_list, list) list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event); handler->handler(handler, event);
spin_unlock_irqrestore(&event->device->event_handler_lock, flags); up_read(&event->device->event_handler_rwsem);
} }
EXPORT_SYMBOL(ib_dispatch_event);
static int iw_query_port(struct ib_device *device, static int iw_query_port(struct ib_device *device,
u8 port_num, u8 port_num,
......
...@@ -2154,7 +2154,6 @@ struct ib_port_cache { ...@@ -2154,7 +2154,6 @@ struct ib_port_cache {
struct ib_cache { struct ib_cache {
rwlock_t lock; rwlock_t lock;
struct ib_event_handler event_handler;
}; };
struct ib_port_immutable { struct ib_port_immutable {
...@@ -2632,6 +2631,10 @@ struct ib_device { ...@@ -2632,6 +2631,10 @@ struct ib_device {
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct list_head event_handler_list; struct list_head event_handler_list;
/* Protects event_handler_list */
struct rw_semaphore event_handler_rwsem;
/* Protects QP's event_handler calls and open_qp list */
spinlock_t event_handler_lock; spinlock_t event_handler_lock;
struct rw_semaphore client_data_rwsem; struct rw_semaphore client_data_rwsem;
...@@ -2947,7 +2950,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, ...@@ -2947,7 +2950,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_register_event_handler(struct ib_event_handler *event_handler);
void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event); void ib_dispatch_event(const struct ib_event *event);
int ib_query_port(struct ib_device *device, int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr); u8 port_num, struct ib_port_attr *port_attr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment