Commit aa8e3291 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu

Pull percpu updates from Dennis Zhou:
 "Percpu had a cleanup come in that makes use of the cpu bitmask helpers
  instead of the current iterative approach.

  This clean up then had an adverse interaction when clang's inlining
  sensitivity is changed such that not all sites are inlined resulting
  in modpost being upset with section mismatch due to percpu setup being
  marked __init.

  That was fixed by introducing __flatten to compiler_attributes.h"

* 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu:
  percpu: fix clang modpost section mismatch
  percpu: reduce the number of cpu distance comparisons
parents 5cf0fd59 258e0815
...@@ -210,6 +210,12 @@ ...@@ -210,6 +210,12 @@
# define fallthrough do {} while (0) /* fallthrough */ # define fallthrough do {} while (0) /* fallthrough */
#endif #endif
/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes
* clang: https://clang.llvm.org/docs/AttributeReference.html#flatten
*/
# define __flatten __attribute__((flatten))
/* /*
* Note the missing underscores. * Note the missing underscores.
* *
......
...@@ -69,6 +69,7 @@ ...@@ -69,6 +69,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/bitmap.h> #include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/lcm.h> #include <linux/lcm.h>
...@@ -2662,13 +2663,14 @@ early_param("percpu_alloc", percpu_alloc_setup); ...@@ -2662,13 +2663,14 @@ early_param("percpu_alloc", percpu_alloc_setup);
* On success, pointer to the new allocation_info is returned. On * On success, pointer to the new allocation_info is returned. On
* failure, ERR_PTR value is returned. * failure, ERR_PTR value is returned.
*/ */
static struct pcpu_alloc_info * __init pcpu_build_alloc_info( static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
size_t reserved_size, size_t dyn_size, size_t reserved_size, size_t dyn_size,
size_t atom_size, size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn) pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
{ {
static int group_map[NR_CPUS] __initdata; static int group_map[NR_CPUS] __initdata;
static int group_cnt[NR_CPUS] __initdata; static int group_cnt[NR_CPUS] __initdata;
static struct cpumask mask __initdata;
const size_t static_size = __per_cpu_end - __per_cpu_start; const size_t static_size = __per_cpu_end - __per_cpu_start;
int nr_groups = 1, nr_units = 0; int nr_groups = 1, nr_units = 0;
size_t size_sum, min_unit_size, alloc_size; size_t size_sum, min_unit_size, alloc_size;
...@@ -2681,6 +2683,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( ...@@ -2681,6 +2683,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
/* this function may be called multiple times */ /* this function may be called multiple times */
memset(group_map, 0, sizeof(group_map)); memset(group_map, 0, sizeof(group_map));
memset(group_cnt, 0, sizeof(group_cnt)); memset(group_cnt, 0, sizeof(group_cnt));
cpumask_clear(&mask);
/* calculate size_sum and ensure dyn_size is enough for early alloc */ /* calculate size_sum and ensure dyn_size is enough for early alloc */
size_sum = PFN_ALIGN(static_size + reserved_size + size_sum = PFN_ALIGN(static_size + reserved_size +
...@@ -2702,24 +2705,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( ...@@ -2702,24 +2705,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
upa--; upa--;
max_upa = upa; max_upa = upa;
cpumask_copy(&mask, cpu_possible_mask);
/* group cpus according to their proximity */ /* group cpus according to their proximity */
for_each_possible_cpu(cpu) { for (group = 0; !cpumask_empty(&mask); group++) {
group = 0; /* pop the group's first cpu */
next_group: cpu = cpumask_first(&mask);
for_each_possible_cpu(tcpu) {
if (cpu == tcpu)
break;
if (group_map[tcpu] == group && cpu_distance_fn &&
(cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
group++;
nr_groups = max(nr_groups, group + 1);
goto next_group;
}
}
group_map[cpu] = group; group_map[cpu] = group;
group_cnt[group]++; group_cnt[group]++;
cpumask_clear_cpu(cpu, &mask);
for_each_cpu(tcpu, &mask) {
if (!cpu_distance_fn ||
(cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
group_map[tcpu] = group;
group_cnt[group]++;
cpumask_clear_cpu(tcpu, &mask);
}
}
} }
nr_groups = group;
/* /*
* Wasted space is caused by a ratio imbalance of upa to group_cnt. * Wasted space is caused by a ratio imbalance of upa to group_cnt.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment