Commit e9ef81e1 authored by Parth Shah's avatar Parth Shah Committed by Michael Ellerman

powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings

On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
itself to find both the L2 and L3 cache siblings.
Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings,
but fill the mask using same property "2" array.
Signed-off-by: default avatarParth Shah <parth@linux.ibm.com>
Reviewed-by: default avatarGautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210728175607.591679-4-parth@linux.ibm.com
parent 69aa8e07
...@@ -35,6 +35,7 @@ extern int *chip_id_lookup_table; ...@@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu); ...@@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
extern bool has_big_cores; extern bool has_big_cores;
extern bool thread_group_shares_l2; extern bool thread_group_shares_l2;
extern bool thread_group_shares_l3;
#define cpu_smt_mask cpu_smt_mask #define cpu_smt_mask cpu_smt_mask
#ifdef CONFIG_SCHED_SMT #ifdef CONFIG_SCHED_SMT
...@@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu); ...@@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
#define hard_smp_processor_id() get_hard_smp_processor_id(0) #define hard_smp_processor_id() get_hard_smp_processor_id(0)
#define smp_setup_cpu_maps() #define smp_setup_cpu_maps()
#define thread_group_shares_l2 0 #define thread_group_shares_l2 0
#define thread_group_shares_l3 0
static inline void inhibit_secondary_onlining(void) {} static inline void inhibit_secondary_onlining(void) {}
static inline void uninhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {}
static inline const struct cpumask *cpu_sibling_mask(int cpu) static inline const struct cpumask *cpu_sibling_mask(int cpu)
......
...@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) ...@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
else if (thread_group_shares_l2 && level == 2) else if (thread_group_shares_l2 && level == 2)
return cpumask_first(per_cpu(thread_group_l2_cache_map, return cpumask_first(per_cpu(thread_group_l2_cache_map,
cpu_id)); cpu_id));
else if (thread_group_shares_l3 && level == 3)
return cpumask_first(per_cpu(thread_group_l3_cache_map,
cpu_id));
return -1; return -1;
} }
......
...@@ -78,6 +78,7 @@ struct task_struct *secondary_current; ...@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
bool has_big_cores; bool has_big_cores;
bool coregroup_enabled; bool coregroup_enabled;
bool thread_group_shares_l2; bool thread_group_shares_l2;
bool thread_group_shares_l3;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
...@@ -101,7 +102,7 @@ enum { ...@@ -101,7 +102,7 @@ enum {
#define MAX_THREAD_LIST_SIZE 8 #define MAX_THREAD_LIST_SIZE 8
#define THREAD_GROUP_SHARE_L1 1 #define THREAD_GROUP_SHARE_L1 1
#define THREAD_GROUP_SHARE_L2 2 #define THREAD_GROUP_SHARE_L2_L3 2
struct thread_groups { struct thread_groups {
unsigned int property; unsigned int property;
unsigned int nr_groups; unsigned int nr_groups;
...@@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); ...@@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
*/ */
DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
/*
* On P10, thread_group_l3_cache_map for each CPU is equal to the
* thread_group_l2_cache_map
*/
DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
/* SMP operations for this machine */ /* SMP operations for this machine */
struct smp_ops_t *smp_ops; struct smp_ops_t *smp_ops;
...@@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu, ...@@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu,
return tg; return tg;
} }
static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
{
int first_thread = cpu_first_thread_sibling(cpu);
int i;
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
for (i = first_thread; i < first_thread + threads_per_core; i++) {
int i_group_start = get_cpu_thread_group_start(i, tg);
if (unlikely(i_group_start == -1)) {
WARN_ON_ONCE(1);
return -ENODATA;
}
if (i_group_start == cpu_group_start)
cpumask_set_cpu(i, *mask);
}
return 0;
}
static int __init init_thread_group_cache_map(int cpu, int cache_property) static int __init init_thread_group_cache_map(int cpu, int cache_property)
{ {
int first_thread = cpu_first_thread_sibling(cpu); int cpu_group_start = -1, err = 0;
int i, cpu_group_start = -1, err = 0;
struct thread_groups *tg = NULL; struct thread_groups *tg = NULL;
cpumask_var_t *mask = NULL; cpumask_var_t *mask = NULL;
if (cache_property != THREAD_GROUP_SHARE_L1 && if (cache_property != THREAD_GROUP_SHARE_L1 &&
cache_property != THREAD_GROUP_SHARE_L2) cache_property != THREAD_GROUP_SHARE_L2_L3)
return -EINVAL; return -EINVAL;
tg = get_thread_groups(cpu, cache_property, &err); tg = get_thread_groups(cpu, cache_property, &err);
if (!tg) if (!tg)
return err; return err;
...@@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) ...@@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
return -ENODATA; return -ENODATA;
} }
if (cache_property == THREAD_GROUP_SHARE_L1) if (cache_property == THREAD_GROUP_SHARE_L1) {
mask = &per_cpu(thread_group_l1_cache_map, cpu); mask = &per_cpu(thread_group_l1_cache_map, cpu);
else if (cache_property == THREAD_GROUP_SHARE_L2) update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
}
else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
mask = &per_cpu(thread_group_l2_cache_map, cpu); mask = &per_cpu(thread_group_l2_cache_map, cpu);
update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); mask = &per_cpu(thread_group_l3_cache_map, cpu);
update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
for (i = first_thread; i < first_thread + threads_per_core; i++) {
int i_group_start = get_cpu_thread_group_start(i, tg);
if (unlikely(i_group_start == -1)) {
WARN_ON_ONCE(1);
return -ENODATA;
}
if (i_group_start == cpu_group_start)
cpumask_set_cpu(i, *mask);
} }
return 0; return 0;
} }
...@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void) ...@@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
has_big_cores = true; has_big_cores = true;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
if (err) if (err)
return err; return err;
} }
thread_group_shares_l2 = true; thread_group_shares_l2 = true;
pr_debug("L2 cache only shared by the threads in the small core\n"); thread_group_shares_l3 = true;
pr_debug("L2/L3 cache only shared by the threads in the small core\n");
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment