Commit bb7e22a8 authored by Huy Nguyen's avatar Huy Nguyen Committed by Jason Gunthorpe

IB/mlx5: Fix long EEH recover time with NVMe offloads

On NVMe offloads connection with many IO queues, EEH takes long time to
recover. The culprit is the synchronize_srcu in the destroy_mkey. The
solution is to use synchronize_srcu only for ODP mkey.

Fixes: b4cfe447 ("IB/mlx5: Implement on demand paging by adding support for MMU notifiers")
Signed-off-by: default avatarHuy Nguyen <huyn@mellanox.com>
Reviewed-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 842a9c83
...@@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) ...@@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* Wait until all page fault handlers using the mr complete. */ /* Wait until all page fault handlers using the mr complete. */
synchronize_srcu(&dev->mr_srcu); if (mr->umem && mr->umem->is_odp)
synchronize_srcu(&dev->mr_srcu);
#endif #endif
return err; return err;
...@@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ...@@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{ {
struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_cache_ent *ent = &cache->ent[c];
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
bool odp_mkey_exist = false;
#endif
struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr; struct mlx5_ib_mr *mr;
LIST_HEAD(del_list); LIST_HEAD(del_list);
...@@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ...@@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
break; break;
} }
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (mr->umem && mr->umem->is_odp)
odp_mkey_exist = true;
#endif
list_move(&mr->list, &del_list); list_move(&mr->list, &del_list);
ent->cur--; ent->cur--;
ent->size--; ent->size--;
...@@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) ...@@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
} }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
synchronize_srcu(&dev->mr_srcu); if (odp_mkey_exist)
synchronize_srcu(&dev->mr_srcu);
#endif #endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
...@@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ...@@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{ {
struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_cache_ent *ent = &cache->ent[c];
bool odp_mkey_exist = false;
struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr; struct mlx5_ib_mr *mr;
LIST_HEAD(del_list); LIST_HEAD(del_list);
...@@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ...@@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
break; break;
} }
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
if (mr->umem && mr->umem->is_odp)
odp_mkey_exist = true;
list_move(&mr->list, &del_list); list_move(&mr->list, &del_list);
ent->cur--; ent->cur--;
ent->size--; ent->size--;
...@@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) ...@@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
} }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
synchronize_srcu(&dev->mr_srcu); if (odp_mkey_exist)
synchronize_srcu(&dev->mr_srcu);
#endif #endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment