Commit 222a21d2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-topology-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 topology updates from Ingo Molnar:
 "Implement multi-die topology support on Intel CPUs and expose the die
  topology to user-space tooling, by Len Brown, Kan Liang and Zhang Rui.

  These changes should have no effect on the kernel's existing
  understanding of topologies, i.e. there should be no behavioral impact
  on cache, NUMA, scheduler, perf and other topologies and overall
  system performance"

* 'x86-topology-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel/rapl: Cosmetic rename internal variables in response to multi-die/pkg support
  perf/x86/intel/uncore: Cosmetic renames in response to multi-die/pkg support
  hwmon/coretemp: Cosmetic: Rename internal variables to zones from packages
  thermal/x86_pkg_temp_thermal: Cosmetic: Rename internal variables to zones from packages
  perf/x86/intel/cstate: Support multi-die/package
  perf/x86/intel/rapl: Support multi-die/package
  perf/x86/intel/uncore: Support multi-die/package
  topology: Create core_cpus and die_cpus sysfs attributes
  topology: Create package_cpus sysfs attribute
  hwmon/coretemp: Support multi-die/package
  powercap/intel_rapl: Update RAPL domain name and debug messages
  thermal/x86_pkg_temp_thermal: Support multi-die/package
  powercap/intel_rapl: Support multi-die/package
  powercap/intel_rapl: Simplify rapl_find_package()
  x86/topology: Define topology_logical_die_id()
  x86/topology: Define topology_die_id()
  cpu/topology: Export die_id
  x86/topology: Create topology_max_die_per_package()
  x86/topology: Add CPUID.1F multi-die/package support
parents 8faef712 eb876fbc
......@@ -12,6 +12,12 @@ physical_package_id:
socket number, but the actual value is architecture and platform
dependent.
die_id:
the CPU die ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's). The actual value is
architecture and platform dependent.
core_id:
the CPU core ID of cpuX. Typically it is the hardware platform's
......@@ -30,25 +36,33 @@ drawer_id:
identifier (rather than the kernel's). The actual value is
architecture and platform dependent.
thread_siblings:
core_cpus:
internal kernel map of cpuX's hardware threads within the same
core as cpuX.
internal kernel map of CPUs within the same core.
(deprecated name: "thread_siblings")
thread_siblings_list:
core_cpus_list:
human-readable list of cpuX's hardware threads within the same
core as cpuX.
human-readable list of CPUs within the same core.
(deprecated name: "thread_siblings_list");
core_siblings:
package_cpus:
internal kernel map of cpuX's hardware threads within the same
physical_package_id.
internal kernel map of the CPUs sharing the same physical_package_id.
(deprecated name: "core_siblings")
core_siblings_list:
package_cpus_list:
human-readable list of cpuX's hardware threads within the same
physical_package_id.
human-readable list of CPUs sharing the same physical_package_id.
(deprecated name: "core_siblings_list")
die_cpus:
internal kernel map of CPUs within the same die.
die_cpus_list:
human-readable list of CPUs within the same die.
book_siblings:
......@@ -81,11 +95,13 @@ For an architecture to support this feature, it must define some of
these macros in include/asm-XXX/topology.h::
#define topology_physical_package_id(cpu)
#define topology_die_id(cpu)
#define topology_core_id(cpu)
#define topology_book_id(cpu)
#define topology_drawer_id(cpu)
#define topology_sibling_cpumask(cpu)
#define topology_core_cpumask(cpu)
#define topology_die_cpumask(cpu)
#define topology_book_cpumask(cpu)
#define topology_drawer_cpumask(cpu)
......@@ -99,9 +115,11 @@ provides default definitions for any of the above macros that are
not defined by include/asm-XXX/topology.h:
1) topology_physical_package_id: -1
2) topology_core_id: 0
3) topology_sibling_cpumask: just the given CPU
4) topology_core_cpumask: just the given CPU
2) topology_die_id: -1
3) topology_core_id: 0
4) topology_sibling_cpumask: just the given CPU
5) topology_core_cpumask: just the given CPU
6) topology_die_cpumask: just the given CPU
For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
default definitions for topology_book_id() and topology_book_cpumask().
......
......@@ -49,6 +49,10 @@ Package-related topology information in the kernel:
The number of cores in a package. This information is retrieved via CPUID.
- cpuinfo_x86.x86_max_dies:
The number of dies in a package. This information is retrieved via CPUID.
- cpuinfo_x86.phys_proc_id:
The physical ID of the package. This information is retrieved via CPUID
......
......@@ -302,7 +302,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
topology_core_cpumask(event->cpu));
topology_die_cpumask(event->cpu));
} else {
return -ENOENT;
}
......@@ -385,7 +385,7 @@ static int cstate_cpu_exit(unsigned int cpu)
if (has_cstate_pkg &&
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate events if there is a valid target */
if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
......@@ -414,7 +414,7 @@ static int cstate_cpu_init(unsigned int cpu)
* in the package cpu mask as the designated reader.
*/
target = cpumask_any_and(&cstate_pkg_cpu_mask,
topology_core_cpumask(cpu));
topology_die_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
......@@ -663,7 +663,13 @@ static int __init cstate_init(void)
}
if (has_cstate_pkg) {
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
if (topology_max_die_per_package() > 1) {
err = perf_pmu_register(&cstate_pkg_pmu,
"cstate_die", -1);
} else {
err = perf_pmu_register(&cstate_pkg_pmu,
cstate_pkg_pmu.name, -1);
}
if (err) {
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
......
......@@ -149,7 +149,7 @@ struct rapl_pmu {
struct rapl_pmus {
struct pmu pmu;
unsigned int maxpkg;
unsigned int maxdie;
struct rapl_pmu *pmus[];
};
......@@ -162,13 +162,13 @@ static u64 rapl_timer_ms;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
unsigned int pkgid = topology_logical_package_id(cpu);
unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
......@@ -572,7 +572,7 @@ static int rapl_cpu_offline(unsigned int cpu)
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate rapl events to the new target */
if (target < nr_cpu_ids) {
......@@ -599,14 +599,14 @@ static int rapl_cpu_online(unsigned int cpu)
pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
rapl_hrtimer_init(pmu);
rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
}
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
*/
target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
......@@ -669,22 +669,22 @@ static void cleanup_rapl_pmus(void)
{
int i;
for (i = 0; i < rapl_pmus->maxpkg; i++)
for (i = 0; i < rapl_pmus->maxdie; i++)
kfree(rapl_pmus->pmus[i]);
kfree(rapl_pmus);
}
static int __init init_rapl_pmus(void)
{
int maxpkg = topology_max_packages();
int maxdie = topology_max_packages() * topology_max_die_per_package();
size_t size;
size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
rapl_pmus = kzalloc(size, GFP_KERNEL);
if (!rapl_pmus)
return -ENOMEM;
rapl_pmus->maxpkg = maxpkg;
rapl_pmus->maxdie = maxdie;
rapl_pmus->pmu.attr_groups = rapl_attr_groups;
rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
rapl_pmus->pmu.event_init = rapl_pmu_event_init;
......
......@@ -15,7 +15,7 @@ struct pci_driver *uncore_pci_driver;
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
struct pci_extra_dev *uncore_extra_pci_dev;
static int max_packages;
static int max_dies;
/* mask of cpus that collect uncore events */
static cpumask_t uncore_cpu_mask;
......@@ -101,13 +101,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
{
unsigned int pkgid = topology_logical_package_id(cpu);
unsigned int dieid = topology_logical_die_id(cpu);
/*
* The unsigned check also catches the '-1' return value for non
* existent mappings in the topology map.
*/
return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
return dieid < max_dies ? pmu->boxes[dieid] : NULL;
}
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
......@@ -312,7 +312,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
uncore_pmu_init_hrtimer(box);
box->cpu = -1;
box->pci_phys_id = -1;
box->pkgid = -1;
box->dieid = -1;
/* set default hrtimer timeout */
box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
......@@ -827,10 +827,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
{
int pkg;
int die;
for (pkg = 0; pkg < max_packages; pkg++)
kfree(pmu->boxes[pkg]);
for (die = 0; die < max_dies; die++)
kfree(pmu->boxes[die]);
kfree(pmu->boxes);
}
......@@ -867,7 +867,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
if (!pmus)
return -ENOMEM;
size = max_packages * sizeof(struct intel_uncore_box *);
size = max_dies * sizeof(struct intel_uncore_box *);
for (i = 0; i < type->num_boxes; i++) {
pmus[i].func_id = setid ? i : -1;
......@@ -937,20 +937,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
int phys_id, die, ret;
phys_id = uncore_pcibus_to_physid(pdev->bus);
if (phys_id < 0)
return -ENODEV;
pkg = topology_phys_to_logical_pkg(phys_id);
if (pkg < 0)
die = (topology_max_die_per_package() > 1) ? phys_id :
topology_phys_to_logical_pkg(phys_id);
if (die < 0)
return -EINVAL;
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
uncore_extra_pci_dev[pkg].dev[idx] = pdev;
uncore_extra_pci_dev[die].dev[idx] = pdev;
pci_set_drvdata(pdev, NULL);
return 0;
}
......@@ -989,7 +990,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
return -EINVAL;
box = uncore_alloc_box(type, NUMA_NO_NODE);
......@@ -1003,13 +1004,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
atomic_inc(&box->refcnt);
box->pci_phys_id = phys_id;
box->pkgid = pkg;
box->dieid = die;
box->pci_dev = pdev;
box->pmu = pmu;
uncore_box_init(box);
pci_set_drvdata(pdev, box);
pmu->boxes[pkg] = box;
pmu->boxes[die] = box;
if (atomic_inc_return(&pmu->activeboxes) > 1)
return 0;
......@@ -1017,7 +1018,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
ret = uncore_pmu_register(pmu);
if (ret) {
pci_set_drvdata(pdev, NULL);
pmu->boxes[pkg] = NULL;
pmu->boxes[die] = NULL;
uncore_box_exit(box);
kfree(box);
}
......@@ -1028,16 +1029,17 @@ static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
int i, phys_id, die;
phys_id = uncore_pcibus_to_physid(pdev->bus);
box = pci_get_drvdata(pdev);
if (!box) {
pkg = topology_phys_to_logical_pkg(phys_id);
die = (topology_max_die_per_package() > 1) ? phys_id :
topology_phys_to_logical_pkg(phys_id);
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
uncore_extra_pci_dev[pkg].dev[i] = NULL;
if (uncore_extra_pci_dev[die].dev[i] == pdev) {
uncore_extra_pci_dev[die].dev[i] = NULL;
break;
}
}
......@@ -1050,7 +1052,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
return;
pci_set_drvdata(pdev, NULL);
pmu->boxes[box->pkgid] = NULL;
pmu->boxes[box->dieid] = NULL;
if (atomic_dec_return(&pmu->activeboxes) == 0)
uncore_pmu_unregister(pmu);
uncore_box_exit(box);
......@@ -1062,7 +1064,7 @@ static int __init uncore_pci_init(void)
size_t size;
int ret;
size = max_packages * sizeof(struct pci_extra_dev);
size = max_dies * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
ret = -ENOMEM;
......@@ -1109,11 +1111,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
int i, pkg;
int i, die;
pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
box = pmu->boxes[die];
if (!box)
continue;
......@@ -1146,13 +1148,13 @@ static int uncore_event_cpu_offline(unsigned int cpu)
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
int i, pkg, target;
int i, die, target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
goto unref;
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
/* Migrate uncore events to the new target */
if (target < nr_cpu_ids)
......@@ -1165,12 +1167,12 @@ static int uncore_event_cpu_offline(unsigned int cpu)
unref:
/* Clear the references */
pkg = topology_logical_package_id(cpu);
die = topology_logical_die_id(cpu);
for (; *types; types++) {
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
box = pmu->boxes[die];
if (box && atomic_dec_return(&box->refcnt) == 0)
uncore_box_exit(box);
}
......@@ -1179,7 +1181,7 @@ static int uncore_event_cpu_offline(unsigned int cpu)
}
static int allocate_boxes(struct intel_uncore_type **types,
unsigned int pkg, unsigned int cpu)
unsigned int die, unsigned int cpu)
{
struct intel_uncore_box *box, *tmp;
struct intel_uncore_type *type;
......@@ -1192,20 +1194,20 @@ static int allocate_boxes(struct intel_uncore_type **types,
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
if (pmu->boxes[pkg])
if (pmu->boxes[die])
continue;
box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
goto cleanup;
box->pmu = pmu;
box->pkgid = pkg;
box->dieid = die;
list_add(&box->active_list, &allocated);
}
}
/* Install them in the pmus */
list_for_each_entry_safe(box, tmp, &allocated, active_list) {
list_del_init(&box->active_list);
box->pmu->boxes[pkg] = box;
box->pmu->boxes[die] = box;
}
return 0;
......@@ -1222,10 +1224,10 @@ static int uncore_event_cpu_online(unsigned int cpu)
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
int i, ret, pkg, target;
int i, ret, die, target;
pkg = topology_logical_package_id(cpu);
ret = allocate_boxes(types, pkg, cpu);
die = topology_logical_die_id(cpu);
ret = allocate_boxes(types, die, cpu);
if (ret)
return ret;
......@@ -1233,7 +1235,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
type = *types;
pmu = type->pmus;
for (i = 0; i < type->num_boxes; i++, pmu++) {
box = pmu->boxes[pkg];
box = pmu->boxes[die];
if (box && atomic_inc_return(&box->refcnt) == 1)
uncore_box_init(box);
}
......@@ -1243,7 +1245,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
* Check if there is an online cpu in the package
* which collects uncore events already.
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
if (target < nr_cpu_ids)
return 0;
......@@ -1419,7 +1421,7 @@ static int __init intel_uncore_init(void)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
max_packages = topology_max_packages();
max_dies = topology_max_packages() * topology_max_die_per_package();
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_init->pci_init) {
......
......@@ -108,7 +108,7 @@ struct intel_uncore_extra_reg {
struct intel_uncore_box {
int pci_phys_id;
int pkgid; /* Logical package ID */
int dieid; /* Logical die ID */
int n_active; /* number of active events */
int n_events;
int cpu; /* cpu to collect events */
......@@ -467,7 +467,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box)
static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
{
return (box->pkgid < 0);
return (box->dieid < 0);
}
static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
......
......@@ -1058,8 +1058,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
if (reg1->idx != EXTRA_REG_NONE) {
int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
int pkg = box->pkgid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
int die = box->dieid;
struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx];
if (filter_pdev) {
pci_write_config_dword(filter_pdev, reg1->reg,
......
......@@ -117,6 +117,8 @@ struct cpuinfo_x86 {
u16 logical_proc_id;
/* Core id: */
u16 cpu_core_id;
u16 cpu_die_id;
u16 logical_die_id;
/* Index into per_cpu list: */
u16 cpu_index;
u32 microcode;
......
......@@ -23,6 +23,7 @@ extern unsigned int num_processors;
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
......
......@@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#ifdef CONFIG_SMP
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
extern unsigned int __max_logical_packages;
#define topology_max_packages() (__max_logical_packages)
extern unsigned int __max_die_per_package;
static inline int topology_max_die_per_package(void)
{
return __max_die_per_package;
}
extern int __max_smt_threads;
static inline int topology_max_smt_threads(void)
......@@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void)
}
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
#else
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int
topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_phys_to_logical_die(unsigned int die,
unsigned int cpu) { return 0; }
static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
......
......@@ -1322,6 +1322,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
cpu, apicid, c->initial_apicid);
}
BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
#else
c->logical_proc_id = 0;
#endif
......
......@@ -15,33 +15,66 @@
/* leaf 0xb SMT level */
#define SMT_LEVEL 0
/* leaf 0xb sub-leaf types */
/* extended topology sub-leaf types */
#define INVALID_TYPE 0
#define SMT_TYPE 1
#define CORE_TYPE 2
#define DIE_TYPE 5
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
int detect_extended_topology_early(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int __max_die_per_package __read_mostly = 1;
EXPORT_SYMBOL(__max_die_per_package);
/*
* Check if given CPUID extended toplogy "leaf" is implemented
*/
static int check_extended_topology_leaf(int leaf)
{
unsigned int eax, ebx, ecx, edx;
if (c->cpuid_level < 0xb)
return -1;
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return -1;
/*
* check if the cpuid leaf 0xb is actually implemented.
return 0;
}
/*
* Return best CPUID Extended Toplogy Leaf supported
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
{
if (c->cpuid_level >= 0x1f) {
if (check_extended_topology_leaf(0x1f) == 0)
return 0x1f;
}
if (c->cpuid_level >= 0xb) {
if (check_extended_topology_leaf(0xb) == 0)
return 0xb;
}
return -1;
}
#endif
int detect_extended_topology_early(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx;
int leaf;
leaf = detect_extended_topology_leaf(c);
if (leaf < 0)
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
/*
* initial apic id, which also represents 32-bit extended x2apic id.
*/
......@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
}
/*
* Check for extended topology enumeration cpuid leaf 0xb and if it
* Check for extended topology enumeration cpuid leaf, and if it
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
......@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
unsigned int ht_mask_width, core_plus_mask_width;
unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
unsigned int core_select_mask, core_level_siblings;
unsigned int die_select_mask, die_level_siblings;
int leaf;
if (detect_extended_topology_early(c) < 0)
leaf = detect_extended_topology_leaf(c);
if (leaf < 0)
return -1;
/*
* Populate HT related information from sub-leaf level 0.
*/
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
c->initial_apicid = edx;
core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
do {
cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
/*
* Check for the Core type in the implemented sub leaves.
......@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
break;
die_level_siblings = core_level_siblings;
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
}
sub_index++;
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
& core_select_mask;
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
die_select_mask = (~(-1 << die_plus_mask_width)) >>
core_plus_mask_width;
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
ht_mask_width) & core_select_mask;
c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
core_plus_mask_width) & die_select_mask;
c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
die_plus_mask_width);
/*
* Reinit the apicid, now that we have extended initial_apicid.
*/
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
c->x86_max_cores = (core_level_siblings / smp_num_siblings);
__max_die_per_package = (die_level_siblings / core_level_siblings);
#endif
return 0;
}
......@@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* representing HT, core, and die siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
......@@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
static unsigned int logical_die __read_mostly;
/* Maximum number of SMT threads on any online core */
int __read_mostly __max_smt_threads = 1;
......@@ -306,6 +311,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
/**
* topology_phys_to_logical_die - Map a physical die id to logical
*
* Returns logical die id or -1 if not found
*/
int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
{
int cpu;
int proc_id = cpu_data(cur_cpu).phys_proc_id;
for_each_possible_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->initialized && c->cpu_die_id == die_id &&
c->phys_proc_id == proc_id)
return c->logical_die_id;
}
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
......@@ -330,6 +355,29 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu)
cpu_data(cpu).logical_proc_id = new;
return 0;
}
/**
* topology_update_die_map - Update the physical to logical die map
* @die: The die id as retrieved via CPUID
* @cpu: The cpu for which this is updated
*/
int topology_update_die_map(unsigned int die, unsigned int cpu)
{
int new;
/* Already available somewhere? */
new = topology_phys_to_logical_die(die, cpu);
if (new >= 0)
goto found;
new = logical_die++;
if (new != die) {
pr_info("CPU %u Converting physical %u to logical die %u\n",
cpu, die, new);
}
found:
cpu_data(cpu).logical_die_id = new;
return 0;
}
void __init smp_store_boot_cpu_info(void)
{
......@@ -339,6 +387,7 @@ void __init smp_store_boot_cpu_info(void)
*c = boot_cpu_data;
c->cpu_index = id;
topology_update_package_map(c->phys_proc_id, id);
topology_update_die_map(c->cpu_die_id, id);
c->initialized = true;
}
......@@ -393,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
c->cpu_die_id == o->cpu_die_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
if (c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
......@@ -404,6 +454,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
}
} else if (c->phys_proc_id == o->phys_proc_id &&
c->cpu_die_id == o->cpu_die_id &&
c->cpu_core_id == o->cpu_core_id) {
return topology_sane(c, o, "smt");
}
......@@ -466,6 +517,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if ((c->phys_proc_id == o->phys_proc_id) &&
(c->cpu_die_id == o->cpu_die_id))
return true;
return false;
}
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
......@@ -528,6 +588,7 @@ void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
return;
}
......@@ -576,6 +637,9 @@ void set_cpu_sibling_map(int cpu)
}
if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
if ((i == cpu) || (has_mp && match_die(c, o)))
link_mask(topology_die_cpumask, cpu, i);
}
threads = cpumask_weight(topology_sibling_cpumask(cpu));
......@@ -1180,6 +1244,7 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(0, &phys_cpu_present_map);
cpumask_set_cpu(0, topology_sibling_cpumask(0));
cpumask_set_cpu(0, topology_core_cpumask(0));
cpumask_set_cpu(0, topology_die_cpumask(0));
}
/*
......@@ -1275,6 +1340,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
......@@ -1495,6 +1561,8 @@ static void remove_siblinginfo(int cpu)
cpu_data(sibling).booted_cores--;
}
for_each_cpu(sibling, topology_die_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
for_each_cpu(sibling, topology_sibling_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
......@@ -1502,6 +1570,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
cpumask_clear(topology_die_cpumask(cpu));
c->cpu_core_id = 0;
c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
......
......@@ -251,6 +251,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
......
......@@ -43,6 +43,9 @@ static ssize_t name##_list_show(struct device *dev, \
define_id_show_func(physical_package_id);
static DEVICE_ATTR_RO(physical_package_id);
define_id_show_func(die_id);
static DEVICE_ATTR_RO(die_id);
define_id_show_func(core_id);
static DEVICE_ATTR_RO(core_id);
......@@ -50,10 +53,22 @@ define_siblings_show_func(thread_siblings, sibling_cpumask);
static DEVICE_ATTR_RO(thread_siblings);
static DEVICE_ATTR_RO(thread_siblings_list);
define_siblings_show_func(core_cpus, sibling_cpumask);
static DEVICE_ATTR_RO(core_cpus);
static DEVICE_ATTR_RO(core_cpus_list);
define_siblings_show_func(core_siblings, core_cpumask);
static DEVICE_ATTR_RO(core_siblings);
static DEVICE_ATTR_RO(core_siblings_list);
define_siblings_show_func(die_cpus, die_cpumask);
static DEVICE_ATTR_RO(die_cpus);
static DEVICE_ATTR_RO(die_cpus_list);
define_siblings_show_func(package_cpus, core_cpumask);
static DEVICE_ATTR_RO(package_cpus);
static DEVICE_ATTR_RO(package_cpus_list);
#ifdef CONFIG_SCHED_BOOK
define_id_show_func(book_id);
static DEVICE_ATTR_RO(book_id);
......@@ -72,11 +87,18 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
static struct attribute *default_attrs[] = {
&dev_attr_physical_package_id.attr,
&dev_attr_die_id.attr,
&dev_attr_core_id.attr,
&dev_attr_thread_siblings.attr,
&dev_attr_thread_siblings_list.attr,
&dev_attr_core_cpus.attr,
&dev_attr_core_cpus_list.attr,
&dev_attr_core_siblings.attr,
&dev_attr_core_siblings_list.attr,
&dev_attr_die_cpus.attr,
&dev_attr_die_cpus_list.attr,
&dev_attr_package_cpus.attr,
&dev_attr_package_cpus_list.attr,
#ifdef CONFIG_SCHED_BOOK
&dev_attr_book_id.attr,
&dev_attr_book_siblings.attr,
......
......@@ -96,10 +96,10 @@ struct platform_data {
struct device_attribute name_attr;
};
/* Keep track of how many package pointers we allocated in init() */
static int max_packages __read_mostly;
/* Array of package pointers. Serialized by cpu hotplug lock */
static struct platform_device **pkg_devices;
/* Keep track of how many zone pointers we allocated in init() */
static int max_zones __read_mostly;
/* Array of zone pointers. Serialized by cpu hotplug lock */
static struct platform_device **zone_devices;
static ssize_t show_label(struct device *dev,
struct device_attribute *devattr, char *buf)
......@@ -422,10 +422,10 @@ static int chk_ucode_version(unsigned int cpu)
static struct platform_device *coretemp_get_pdev(unsigned int cpu)
{
int pkgid = topology_logical_package_id(cpu);
int id = topology_logical_die_id(cpu);
if (pkgid >= 0 && pkgid < max_packages)
return pkg_devices[pkgid];
if (id >= 0 && id < max_zones)
return zone_devices[id];
return NULL;
}
......@@ -531,7 +531,7 @@ static int coretemp_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct platform_data *pdata;
/* Initialize the per-package data structures */
/* Initialize the per-zone data structures */
pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
......@@ -566,13 +566,13 @@ static struct platform_driver coretemp_driver = {
static struct platform_device *coretemp_device_add(unsigned int cpu)
{
int err, pkgid = topology_logical_package_id(cpu);
int err, zoneid = topology_logical_die_id(cpu);
struct platform_device *pdev;
if (pkgid < 0)
if (zoneid < 0)
return ERR_PTR(-ENOMEM);
pdev = platform_device_alloc(DRVNAME, pkgid);
pdev = platform_device_alloc(DRVNAME, zoneid);
if (!pdev)
return ERR_PTR(-ENOMEM);
......@@ -582,7 +582,7 @@ static struct platform_device *coretemp_device_add(unsigned int cpu)
return ERR_PTR(err);
}
pkg_devices[pkgid] = pdev;
zone_devices[zoneid] = pdev;
return pdev;
}
......@@ -690,7 +690,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
* the rest.
*/
if (cpumask_empty(&pd->cpumask)) {
pkg_devices[topology_logical_package_id(cpu)] = NULL;
zone_devices[topology_logical_die_id(cpu)] = NULL;
platform_device_unregister(pdev);
return 0;
}
......@@ -728,10 +728,10 @@ static int __init coretemp_init(void)
if (!x86_match_cpu(coretemp_ids))
return -ENODEV;
max_packages = topology_max_packages();
pkg_devices = kcalloc(max_packages, sizeof(struct platform_device *),
max_zones = topology_max_packages() * topology_max_die_per_package();
zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
GFP_KERNEL);
if (!pkg_devices)
if (!zone_devices)
return -ENOMEM;
err = platform_driver_register(&coretemp_driver);
......@@ -747,7 +747,7 @@ static int __init coretemp_init(void)
outdrv:
platform_driver_unregister(&coretemp_driver);
kfree(pkg_devices);
kfree(zone_devices);
return err;
}
module_init(coretemp_init)
......@@ -756,7 +756,7 @@ static void __exit coretemp_exit(void)
{
cpuhp_remove_state(coretemp_hp_online);
platform_driver_unregister(&coretemp_driver);
kfree(pkg_devices);
kfree(zone_devices);
}
module_exit(coretemp_exit)
......
......@@ -166,12 +166,15 @@ struct rapl_domain {
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
/* maximum rapl package domain name: package-%d-die-%d */
#define PACKAGE_DOMAIN_NAME_LENGTH 30
/* Each physical package contains multiple domains, these are the common
/* Each rapl package contains multiple domains, these are the common
* data across RAPL domains within a package.
*/
struct rapl_package {
unsigned int id; /* physical package/socket id */
unsigned int id; /* logical die id, equals physical 1-die systems */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit;
......@@ -186,6 +189,7 @@ struct rapl_package {
int lead_cpu; /* one active cpu per package for access */
/* Track active cpus */
struct cpumask cpumask;
char name[PACKAGE_DOMAIN_NAME_LENGTH];
};
struct rapl_defaults {
......@@ -252,8 +256,9 @@ static struct powercap_control_type *control_type; /* PowerCap Controller */
static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
/* caller to ensure CPU hotplug lock is held */
static struct rapl_package *find_package_by_id(int id)
static struct rapl_package *rapl_find_package_domain(int cpu)
{
int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
list_for_each_entry(rp, &rapl_packages, plist) {
......@@ -913,8 +918,8 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n",
rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
return 0;
}
......@@ -938,8 +943,8 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n",
rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
return 0;
}
......@@ -1168,7 +1173,7 @@ static void rapl_update_domain_data(struct rapl_package *rp)
u64 val;
for (dmn = 0; dmn < rp->nr_domains; dmn++) {
pr_debug("update package %d domain %s data\n", rp->id,
pr_debug("update %s domain %s data\n", rp->name,
rp->domains[dmn].name);
/* exclude non-raw primitives */
for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
......@@ -1193,7 +1198,6 @@ static void rapl_unregister_powercap(void)
static int rapl_package_register_powercap(struct rapl_package *rp)
{
struct rapl_domain *rd;
char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/
struct powercap_zone *power_zone = NULL;
int nr_pl, ret;
......@@ -1204,20 +1208,16 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
if (rd->id == RAPL_DOMAIN_PACKAGE) {
nr_pl = find_nr_power_limit(rd);
pr_debug("register socket %d package domain %s\n",
rp->id, rd->name);
memset(dev_name, 0, sizeof(dev_name));
snprintf(dev_name, sizeof(dev_name), "%s-%d",
rd->name, rp->id);
pr_debug("register package domain %s\n", rp->name);
power_zone = powercap_register_zone(&rd->power_zone,
control_type,
dev_name, NULL,
rp->name, NULL,
&zone_ops[rd->id],
nr_pl,
&constraint_ops);
if (IS_ERR(power_zone)) {
pr_debug("failed to register package, %d\n",
rp->id);
pr_debug("failed to register power zone %s\n",
rp->name);
return PTR_ERR(power_zone);
}
/* track parent zone in per package/socket data */
......@@ -1243,8 +1243,8 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
&constraint_ops);
if (IS_ERR(power_zone)) {
pr_debug("failed to register power_zone, %d:%s:%s\n",
rp->id, rd->name, dev_name);
pr_debug("failed to register power_zone, %s:%s\n",
rp->name, rd->name);
ret = PTR_ERR(power_zone);
goto err_cleanup;
}
......@@ -1257,7 +1257,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
* failed after the first domain setup.
*/
while (--rd >= rp->domains) {
pr_debug("unregister package %d domain %s\n", rp->id, rd->name);
pr_debug("unregister %s domain %s\n", rp->name, rd->name);
powercap_unregister_zone(control_type, &rd->power_zone);
}
......@@ -1288,7 +1288,7 @@ static int __init rapl_register_psys(void)
rd->rpl[0].name = pl1_name;
rd->rpl[1].prim_id = PL2_ENABLE;
rd->rpl[1].name = pl2_name;
rd->rp = find_package_by_id(0);
rd->rp = rapl_find_package_domain(0);
power_zone = powercap_register_zone(&rd->power_zone, control_type,
"psys", NULL,
......@@ -1367,8 +1367,8 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
if (val64) {
pr_info("RAPL package %d domain %s locked by BIOS\n",
rd->rp->id, rd->name);
pr_info("RAPL %s domain %s locked by BIOS\n",
rd->rp->name, rd->name);
rd->state |= DOMAIN_STATE_BIOS_LOCKED;
}
}
......@@ -1397,10 +1397,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
}
rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
if (!rp->nr_domains) {
pr_debug("no valid rapl domains found in package %d\n", rp->id);
pr_debug("no valid rapl domains found in %s\n", rp->name);
return -ENODEV;
}
pr_debug("found %d domains on package %d\n", rp->nr_domains, rp->id);
pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
GFP_KERNEL);
......@@ -1433,8 +1433,8 @@ static void rapl_remove_package(struct rapl_package *rp)
rd_package = rd;
continue;
}
pr_debug("remove package, undo power limit on %d: %s\n",
rp->id, rd->name);
pr_debug("remove package, undo power limit on %s: %s\n",
rp->name, rd->name);
powercap_unregister_zone(control_type, &rd->power_zone);
}
/* do parent zone last */
......@@ -1444,9 +1444,11 @@ static void rapl_remove_package(struct rapl_package *rp)
}
/* called from CPU hotplug notifier, hotplug lock held */
static struct rapl_package *rapl_add_package(int cpu, int pkgid)
static struct rapl_package *rapl_add_package(int cpu)
{
int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
struct cpuinfo_x86 *c = &cpu_data(cpu);
int ret;
rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
......@@ -1454,9 +1456,16 @@ static struct rapl_package *rapl_add_package(int cpu, int pkgid)
return ERR_PTR(-ENOMEM);
/* add the new package to the list */
rp->id = pkgid;
rp->id = id;
rp->lead_cpu = cpu;
if (topology_max_die_per_package() > 1)
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
"package-%d-die-%d", c->phys_proc_id, c->cpu_die_id);
else
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
c->phys_proc_id);
/* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) ||
rapl_defaults->check_unit(rp, cpu)) {
......@@ -1485,12 +1494,11 @@ static struct rapl_package *rapl_add_package(int cpu, int pkgid)
*/
static int rapl_cpu_online(unsigned int cpu)
{
int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
rp = find_package_by_id(pkgid);
rp = rapl_find_package_domain(cpu);
if (!rp) {
rp = rapl_add_package(cpu, pkgid);
rp = rapl_add_package(cpu);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
......@@ -1500,11 +1508,10 @@ static int rapl_cpu_online(unsigned int cpu)
static int rapl_cpu_down_prep(unsigned int cpu)
{
int pkgid = topology_physical_package_id(cpu);
struct rapl_package *rp;
int lead_cpu;
rp = find_package_by_id(pkgid);
rp = rapl_find_package_domain(cpu);
if (!rp)
return 0;
......
......@@ -43,7 +43,7 @@ MODULE_PARM_DESC(notify_delay_ms,
*/
#define MAX_NUMBER_OF_TRIPS 2
struct pkg_device {
struct zone_device {
int cpu;
bool work_scheduled;
u32 tj_max;
......@@ -58,10 +58,10 @@ static struct thermal_zone_params pkg_temp_tz_params = {
.no_hwmon = true,
};
/* Keep track of how many package pointers we allocated in init() */
static int max_packages __read_mostly;
/* Array of package pointers */
static struct pkg_device **packages;
/* Keep track of how many zone pointers we allocated in init() */
static int max_id __read_mostly;
/* Array of zone pointers */
static struct zone_device **zones;
/* Serializes interrupt notification, work and hotplug */
static DEFINE_SPINLOCK(pkg_temp_lock);
/* Protects zone operation in the work function against hotplug removal */
......@@ -108,12 +108,12 @@ static int pkg_temp_debugfs_init(void)
*
* - Other callsites: Must hold pkg_temp_lock
*/
static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu)
static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
{
int pkgid = topology_logical_package_id(cpu);
int id = topology_logical_die_id(cpu);
if (pkgid >= 0 && pkgid < max_packages)
return packages[pkgid];
if (id >= 0 && id < max_id)
return zones[id];
return NULL;
}
......@@ -138,12 +138,13 @@ static int get_tj_max(int cpu, u32 *tj_max)
static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
struct pkg_device *pkgdev = tzd->devdata;
struct zone_device *zonedev = tzd->devdata;
u32 eax, edx;
rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx);
rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
&eax, &edx);
if (eax & 0x80000000) {
*temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000;
*temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
pr_debug("sys_get_curr_temp %d\n", *temp);
return 0;
}
......@@ -153,7 +154,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
static int sys_get_trip_temp(struct thermal_zone_device *tzd,
int trip, int *temp)
{
struct pkg_device *pkgdev = tzd->devdata;
struct zone_device *zonedev = tzd->devdata;
unsigned long thres_reg_value;
u32 mask, shift, eax, edx;
int ret;
......@@ -169,14 +170,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
shift = THERM_SHIFT_THRESHOLD0;
}
ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
&eax, &edx);
if (ret < 0)
return ret;
thres_reg_value = (eax & mask) >> shift;
if (thres_reg_value)
*temp = pkgdev->tj_max - thres_reg_value * 1000;
*temp = zonedev->tj_max - thres_reg_value * 1000;
else
*temp = 0;
pr_debug("sys_get_trip_temp %d\n", *temp);
......@@ -187,14 +188,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
static int
sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
{
struct pkg_device *pkgdev = tzd->devdata;
struct zone_device *zonedev = tzd->devdata;
u32 l, h, mask, shift, intr;
int ret;
if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max)
if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
return -EINVAL;
ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
&l, &h);
if (ret < 0)
return ret;
......@@ -216,11 +217,12 @@ sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
if (!temp) {
l &= ~intr;
} else {
l |= (pkgdev->tj_max - temp)/1000 << shift;
l |= (zonedev->tj_max - temp)/1000 << shift;
l |= intr;
}
return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
l, h);
}
static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
......@@ -275,26 +277,26 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
{
struct thermal_zone_device *tzone = NULL;
int cpu = smp_processor_id();
struct pkg_device *pkgdev;
struct zone_device *zonedev;
u64 msr_val, wr_val;
mutex_lock(&thermal_zone_mutex);
spin_lock_irq(&pkg_temp_lock);
++pkg_work_cnt;
pkgdev = pkg_temp_thermal_get_dev(cpu);
if (!pkgdev) {
zonedev = pkg_temp_thermal_get_dev(cpu);
if (!zonedev) {
spin_unlock_irq(&pkg_temp_lock);
mutex_unlock(&thermal_zone_mutex);
return;
}
pkgdev->work_scheduled = false;
zonedev->work_scheduled = false;
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
if (wr_val != msr_val) {
wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
tzone = pkgdev->tzone;
tzone = zonedev->tzone;
}
enable_pkg_thres_interrupt();
......@@ -320,7 +322,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
static int pkg_thermal_notify(u64 msr_val)
{
int cpu = smp_processor_id();
struct pkg_device *pkgdev;
struct zone_device *zonedev;
unsigned long flags;
spin_lock_irqsave(&pkg_temp_lock, flags);
......@@ -329,10 +331,10 @@ static int pkg_thermal_notify(u64 msr_val)
disable_pkg_thres_interrupt();
/* Work is per package, so scheduling it once is enough. */
pkgdev = pkg_temp_thermal_get_dev(cpu);
if (pkgdev && !pkgdev->work_scheduled) {
pkgdev->work_scheduled = true;
pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work);
zonedev = pkg_temp_thermal_get_dev(cpu);
if (zonedev && !zonedev->work_scheduled) {
zonedev->work_scheduled = true;
pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
}
spin_unlock_irqrestore(&pkg_temp_lock, flags);
......@@ -341,12 +343,12 @@ static int pkg_thermal_notify(u64 msr_val)
static int pkg_temp_thermal_device_add(unsigned int cpu)
{
int pkgid = topology_logical_package_id(cpu);
int id = topology_logical_die_id(cpu);
u32 tj_max, eax, ebx, ecx, edx;
struct pkg_device *pkgdev;
struct zone_device *zonedev;
int thres_count, err;
if (pkgid >= max_packages)
if (id >= max_id)
return -ENOMEM;
cpuid(6, &eax, &ebx, &ecx, &edx);
......@@ -360,51 +362,51 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
if (err)
return err;
pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL);
if (!pkgdev)
zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
if (!zonedev)
return -ENOMEM;
INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn);
pkgdev->cpu = cpu;
pkgdev->tj_max = tj_max;
pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp",
INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
zonedev->cpu = cpu;
zonedev->tj_max = tj_max;
zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
thres_count,
(thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
if (IS_ERR(pkgdev->tzone)) {
err = PTR_ERR(pkgdev->tzone);
kfree(pkgdev);
zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
if (IS_ERR(zonedev->tzone)) {
err = PTR_ERR(zonedev->tzone);
kfree(zonedev);
return err;
}
/* Store MSR value for package thermal interrupt, to restore at exit */
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low,
pkgdev->msr_pkg_therm_high);
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
zonedev->msr_pkg_therm_high);
cpumask_set_cpu(cpu, &pkgdev->cpumask);
cpumask_set_cpu(cpu, &zonedev->cpumask);
spin_lock_irq(&pkg_temp_lock);
packages[pkgid] = pkgdev;
zones[id] = zonedev;
spin_unlock_irq(&pkg_temp_lock);
return 0;
}
static int pkg_thermal_cpu_offline(unsigned int cpu)
{
struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
bool lastcpu, was_target;
int target;
if (!pkgdev)
if (!zonedev)
return 0;
target = cpumask_any_but(&pkgdev->cpumask, cpu);
cpumask_clear_cpu(cpu, &pkgdev->cpumask);
target = cpumask_any_but(&zonedev->cpumask, cpu);
cpumask_clear_cpu(cpu, &zonedev->cpumask);
lastcpu = target >= nr_cpu_ids;
/*
* Remove the sysfs files, if this is the last cpu in the package
* before doing further cleanups.
*/
if (lastcpu) {
struct thermal_zone_device *tzone = pkgdev->tzone;
struct thermal_zone_device *tzone = zonedev->tzone;
/*
* We must protect against a work function calling
......@@ -413,7 +415,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* won't try to call.
*/
mutex_lock(&thermal_zone_mutex);
pkgdev->tzone = NULL;
zonedev->tzone = NULL;
mutex_unlock(&thermal_zone_mutex);
thermal_zone_device_unregister(tzone);
......@@ -427,8 +429,8 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* one. When we drop the lock, then the interrupt notify function
* will see the new target.
*/
was_target = pkgdev->cpu == cpu;
pkgdev->cpu = target;
was_target = zonedev->cpu == cpu;
zonedev->cpu = target;
/*
* If this is the last CPU in the package remove the package
......@@ -437,23 +439,23 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* worker will see the package anymore.
*/
if (lastcpu) {
packages[topology_logical_package_id(cpu)] = NULL;
zones[topology_logical_die_id(cpu)] = NULL;
/* After this point nothing touches the MSR anymore. */
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high);
zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
}
/*
* Check whether there is work scheduled and whether the work is
* targeted at the outgoing CPU.
*/
if (pkgdev->work_scheduled && was_target) {
if (zonedev->work_scheduled && was_target) {
/*
* To cancel the work we need to drop the lock, otherwise
* we might deadlock if the work needs to be flushed.
*/
spin_unlock_irq(&pkg_temp_lock);
cancel_delayed_work_sync(&pkgdev->work);
cancel_delayed_work_sync(&zonedev->work);
spin_lock_irq(&pkg_temp_lock);
/*
* If this is not the last cpu in the package and the work
......@@ -461,21 +463,21 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* need to reschedule the work, otherwise the interrupt
* stays disabled forever.
*/
if (!lastcpu && pkgdev->work_scheduled)
pkg_thermal_schedule_work(target, &pkgdev->work);
if (!lastcpu && zonedev->work_scheduled)
pkg_thermal_schedule_work(target, &zonedev->work);
}
spin_unlock_irq(&pkg_temp_lock);
/* Final cleanup if this is the last cpu */
if (lastcpu)
kfree(pkgdev);
kfree(zonedev);
return 0;
}
static int pkg_thermal_cpu_online(unsigned int cpu)
{
struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu);
struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
struct cpuinfo_x86 *c = &cpu_data(cpu);
/* Paranoia check */
......@@ -483,8 +485,8 @@ static int pkg_thermal_cpu_online(unsigned int cpu)
return -ENODEV;
/* If the package exists, nothing to do */
if (pkgdev) {
cpumask_set_cpu(cpu, &pkgdev->cpumask);
if (zonedev) {
cpumask_set_cpu(cpu, &zonedev->cpumask);
return 0;
}
return pkg_temp_thermal_device_add(cpu);
......@@ -503,10 +505,10 @@ static int __init pkg_temp_thermal_init(void)
if (!x86_match_cpu(pkg_temp_thermal_ids))
return -ENODEV;
max_packages = topology_max_packages();
packages = kcalloc(max_packages, sizeof(struct pkg_device *),
max_id = topology_max_packages() * topology_max_die_per_package();
zones = kcalloc(max_id, sizeof(struct zone_device *),
GFP_KERNEL);
if (!packages)
if (!zones)
return -ENOMEM;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
......@@ -525,7 +527,7 @@ static int __init pkg_temp_thermal_init(void)
return 0;
err:
kfree(packages);
kfree(zones);
return ret;
}
module_init(pkg_temp_thermal_init)
......@@ -537,7 +539,7 @@ static void __exit pkg_temp_thermal_exit(void)
cpuhp_remove_state(pkg_thermal_hp_state);
debugfs_remove_recursive(debugfs);
kfree(packages);
kfree(zones);
}
module_exit(pkg_temp_thermal_exit)
......
......@@ -184,6 +184,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_physical_package_id
#define topology_physical_package_id(cpu) ((void)(cpu), -1)
#endif
#ifndef topology_die_id
#define topology_die_id(cpu) ((void)(cpu), -1)
#endif
#ifndef topology_core_id
#define topology_core_id(cpu) ((void)(cpu), 0)
#endif
......@@ -193,6 +196,9 @@ static inline int cpu_to_mem(int cpu)
#ifndef topology_core_cpumask
#define topology_core_cpumask(cpu) cpumask_of(cpu)
#endif
#ifndef topology_die_cpumask
#define topology_die_cpumask(cpu) cpumask_of(cpu)
#endif
#ifdef CONFIG_SCHED_SMT
static inline const struct cpumask *cpu_smt_mask(int cpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment