Commit 4197344b authored by Dennis Dalessandro's avatar Dennis Dalessandro Committed by Doug Ledford

IB/hfi1: Add global structure for affinity assignments

When HFI units get initialized, they each use their own mask copy for
affinity assignments. On a multi-HFI system, affinity assignments
overbook CPU cores as each HFI doesn't have knowledge of affinity
assignments for other HFI units. Therefore, some CPU cores are never
used for interrupt handlers in systems with high number of CPU cores
per NUMA node.

For multi-HFI systems, SDMA engine interrupt assignments start all over
from the first CPU in the local NUMA node after the first HFI
initialization. This change allows assignments to continue where the
last HFI unit left off.

Add global structure for affinity assignments for multiple HFIs to share
affinity mask.
Reviewed-by: default avatarJianxin Xiong <jianxin.xiong@intel.com>
Reviewed-by: default avatarJubin John <jubin.john@intel.com>
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 2b719046
This diff is collapsed.
...@@ -82,11 +82,9 @@ struct hfi1_affinity { ...@@ -82,11 +82,9 @@ struct hfi1_affinity {
struct hfi1_msix_entry; struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */ /* Initialize non-HT cpu cores mask */
int init_real_cpu_mask(struct hfi1_devdata *); void init_real_cpu_mask(void);
/* Initialize driver affinity data */ /* Initialize driver affinity data */
void hfi1_dev_affinity_init(struct hfi1_devdata *); int hfi1_dev_affinity_init(struct hfi1_devdata *);
/* Free driver affinity data */
void hfi1_dev_affinity_free(struct hfi1_devdata *);
/* /*
* Set IRQ affinity to a CPU. The function will determine the * Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it. * CPU and set the affinity to it.
...@@ -105,4 +103,23 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *, int); ...@@ -105,4 +103,23 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
/* Release a CPU used by a user process. */ /* Release a CPU used by a user process. */
void hfi1_put_proc_affinity(struct hfi1_devdata *, int); void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
struct hfi1_affinity_node {
int node;
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct list_head list;
};
struct hfi1_affinity_node_list {
struct list_head list;
struct cpumask real_cpu_mask;
struct cpu_mask_set proc;
/* protect affinity node list */
spinlock_t lock;
};
void node_affinity_init(void);
void node_affinity_destroy(void);
extern struct hfi1_affinity_node_list node_affinity;
#endif /* _HFI1_AFFINITY_H */ #endif /* _HFI1_AFFINITY_H */
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#include "efivar.h" #include "efivar.h"
#include "platform.h" #include "platform.h"
#include "aspm.h" #include "aspm.h"
#include "affinity.h"
#define NUM_IB_PORTS 1 #define NUM_IB_PORTS 1
...@@ -12838,7 +12839,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd) ...@@ -12838,7 +12839,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/ */
if (num_user_contexts < 0) if (num_user_contexts < 0)
num_user_contexts = num_user_contexts =
cpumask_weight(&dd->affinity->real_cpu_mask); cpumask_weight(&node_affinity.real_cpu_mask);
total_contexts = num_kernel_contexts + num_user_contexts; total_contexts = num_kernel_contexts + num_user_contexts;
...@@ -14468,19 +14469,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, ...@@ -14468,19 +14469,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
(dd->revision >> CCE_REVISION_SW_SHIFT) (dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK); & CCE_REVISION_SW_MASK);
/*
* The real cpu mask is part of the affinity struct but has to be
* initialized earlier than the rest of the affinity struct because it
* is needed to calculate the number of user contexts in
* set_up_context_variables(). However, hfi1_dev_affinity_init(),
* which initializes the rest of the affinity struct members,
* depends on set_up_context_variables() for the number of kernel
* contexts, so it cannot be called before set_up_context_variables().
*/
ret = init_real_cpu_mask(dd);
if (ret)
goto bail_cleanup;
ret = set_up_context_variables(dd); ret = set_up_context_variables(dd);
if (ret) if (ret)
goto bail_cleanup; goto bail_cleanup;
...@@ -14494,7 +14482,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, ...@@ -14494,7 +14482,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* set up KDETH QP prefix in both RX and TX CSRs */ /* set up KDETH QP prefix in both RX and TX CSRs */
init_kdeth_qp(dd); init_kdeth_qp(dd);
hfi1_dev_affinity_init(dd); ret = hfi1_dev_affinity_init(dd);
if (ret)
goto bail_cleanup;
/* send contexts must be set up before receive contexts */ /* send contexts must be set up before receive contexts */
ret = init_send_contexts(dd); ret = init_send_contexts(dd);
......
...@@ -64,6 +64,7 @@ ...@@ -64,6 +64,7 @@
#include "debugfs.h" #include "debugfs.h"
#include "verbs.h" #include "verbs.h"
#include "aspm.h" #include "aspm.h"
#include "affinity.h"
#undef pr_fmt #undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt
...@@ -1004,7 +1005,6 @@ static void __hfi1_free_devdata(struct kobject *kobj) ...@@ -1004,7 +1005,6 @@ static void __hfi1_free_devdata(struct kobject *kobj)
rcu_barrier(); /* wait for rcu callbacks to complete */ rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter); free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit); free_percpu(dd->rcv_limit);
hfi1_dev_affinity_free(dd);
free_percpu(dd->send_schedule); free_percpu(dd->send_schedule);
rvt_dealloc_device(&dd->verbs_dev.rdi); rvt_dealloc_device(&dd->verbs_dev.rdi);
} }
...@@ -1198,6 +1198,8 @@ static int __init hfi1_mod_init(void) ...@@ -1198,6 +1198,8 @@ static int __init hfi1_mod_init(void)
if (ret) if (ret)
goto bail; goto bail;
node_affinity_init();
/* validate max MTU before any devices start */ /* validate max MTU before any devices start */
if (!valid_opa_max_mtu(hfi1_max_mtu)) { if (!valid_opa_max_mtu(hfi1_max_mtu)) {
pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n", pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
...@@ -1278,6 +1280,7 @@ module_init(hfi1_mod_init); ...@@ -1278,6 +1280,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void) static void __exit hfi1_mod_cleanup(void)
{ {
pci_unregister_driver(&hfi1_pci_driver); pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy();
hfi1_wss_exit(); hfi1_wss_exit();
hfi1_dbg_exit(); hfi1_dbg_exit();
hfi1_cpulist_count = 0; hfi1_cpulist_count = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment