Commit f5709176 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cache quality monitoring update from Thomas Gleixner:
 "This update provides a complete rewrite of the Cache Quality
  Monitoring (CQM) facility.

  The existing CQM support was duct taped into perf with a lot of issues
  and the attempts to fix those turned out to be incomplete and
  horrible.

  After lengthy discussions it was decided to integrate the CQM support
  into the Resource Director Technology (RDT) facility, which is the
  obvious choise as in hardware CQM is part of RDT. This allowed to add
  Memory Bandwidth Monitoring support on top.

  As a result the mechanisms for allocating cache/memory bandwidth and
  the corresponding monitoring mechanisms are integrated into a single
  management facility with a consistent user interface"

* 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  x86/intel_rdt: Turn off most RDT features on Skylake
  x86/intel_rdt: Add command line options for resource director technology
  x86/intel_rdt: Move special case code for Haswell to a quirk function
  x86/intel_rdt: Remove redundant ternary operator on return
  x86/intel_rdt/cqm: Improve limbo list processing
  x86/intel_rdt/mbm: Fix MBM overflow handler during CPU hotplug
  x86/intel_rdt: Modify the intel_pqr_state for better performance
  x86/intel_rdt/cqm: Clear the default RMID during hotcpu
  x86/intel_rdt: Show bitmask of shareable resource with other executing units
  x86/intel_rdt/mbm: Handle counter overflow
  x86/intel_rdt/mbm: Add mbm counter initialization
  x86/intel_rdt/mbm: Basic counting of MBM events (total and local)
  x86/intel_rdt/cqm: Add CPU hotplug support
  x86/intel_rdt/cqm: Add sched_in support
  x86/intel_rdt: Introduce rdt_enable_key for scheduling
  x86/intel_rdt/cqm: Add mount,umount support
  x86/intel_rdt/cqm: Add rmdir support
  x86/intel_rdt: Separate the ctrl bits from rmdir
  x86/intel_rdt/cqm: Add mon_data
  x86/intel_rdt: Prepare for RDT monitor data support
  ...
parents d725c7ac d56593eb
...@@ -138,6 +138,7 @@ parameter is applicable:: ...@@ -138,6 +138,7 @@ parameter is applicable::
PPT Parallel port support is enabled. PPT Parallel port support is enabled.
PS2 Appropriate PS/2 support is enabled. PS2 Appropriate PS/2 support is enabled.
RAM RAM disk support is enabled. RAM RAM disk support is enabled.
RDT Intel Resource Director Technology.
S390 S390 architecture is enabled. S390 S390 architecture is enabled.
SCSI Appropriate SCSI support is enabled. SCSI Appropriate SCSI support is enabled.
A lot of drivers have their options described inside A lot of drivers have their options described inside
......
...@@ -3612,6 +3612,12 @@ ...@@ -3612,6 +3612,12 @@
Run specified binary instead of /init from the ramdisk, Run specified binary instead of /init from the ramdisk,
used for early userspace startup. See initrd. used for early userspace startup. See initrd.
rdt= [HW,X86,RDT]
Turn on/off individual RDT features. List is:
cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, mba.
E.g. to turn on cmt and turn off mba use:
rdt=cmt,!mba
reboot= [KNL] reboot= [KNL]
Format (x86 or x86_64): Format (x86 or x86_64):
[w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \ [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \
......
This diff is collapsed.
...@@ -11121,7 +11121,7 @@ M: Fenghua Yu <fenghua.yu@intel.com> ...@@ -11121,7 +11121,7 @@ M: Fenghua Yu <fenghua.yu@intel.com>
L: linux-kernel@vger.kernel.org L: linux-kernel@vger.kernel.org
S: Supported S: Supported
F: arch/x86/kernel/cpu/intel_rdt* F: arch/x86/kernel/cpu/intel_rdt*
F: arch/x86/include/asm/intel_rdt* F: arch/x86/include/asm/intel_rdt_sched.h
F: Documentation/x86/intel_rdt* F: Documentation/x86/intel_rdt*
READ-COPY UPDATE (RCU) READ-COPY UPDATE (RCU)
......
...@@ -429,16 +429,16 @@ config GOLDFISH ...@@ -429,16 +429,16 @@ config GOLDFISH
def_bool y def_bool y
depends on X86_GOLDFISH depends on X86_GOLDFISH
config INTEL_RDT_A config INTEL_RDT
bool "Intel Resource Director Technology Allocation support" bool "Intel Resource Director Technology support"
default n default n
depends on X86 && CPU_SUP_INTEL depends on X86 && CPU_SUP_INTEL
select KERNFS select KERNFS
help help
Select to enable resource allocation which is a sub-feature of Select to enable resource allocation and monitoring which are
Intel Resource Director Technology(RDT). More information about sub-features of Intel Resource Director Technology(RDT). More
RDT can be found in the Intel x86 Architecture Software information about RDT can be found in the Intel x86
Developer Manual. Architecture Software Developer Manual.
Say N if unsure. Say N if unsure.
......
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o
......
This diff is collapsed.
#ifndef _ASM_X86_INTEL_RDT_COMMON_H
#define _ASM_X86_INTEL_RDT_COMMON_H
#define MSR_IA32_PQR_ASSOC 0x0c8f
/**
* struct intel_pqr_state - State cache for the PQR MSR
* @rmid: The cached Resource Monitoring ID
* @closid: The cached Class Of Service ID
* @rmid_usecnt: The usage counter for rmid
*
* The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
* lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
* contains both parts, so we need to cache them.
*
* The cache also helps to avoid pointless updates if the value does
* not change.
*/
struct intel_pqr_state {
u32 rmid;
u32 closid;
int rmid_usecnt;
};
DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
#ifndef _ASM_X86_INTEL_RDT_SCHED_H
#define _ASM_X86_INTEL_RDT_SCHED_H
#ifdef CONFIG_INTEL_RDT
#include <linux/sched.h>
#include <linux/jump_label.h>
#define IA32_PQR_ASSOC 0x0c8f
/**
* struct intel_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
* @cur_closid: The cached Class Of Service ID
* @default_rmid: The user assigned Resource Monitoring ID
* @default_closid: The user assigned cached Class Of Service ID
*
* The upper 32 bits of IA32_PQR_ASSOC contain closid and the
* lower 10 bits rmid. The update to IA32_PQR_ASSOC always
* contains both parts, so we need to cache them. This also
* stores the user configured per cpu CLOSID and RMID.
*
* The cache also helps to avoid pointless updates if the value does
* not change.
*/
struct intel_pqr_state {
u32 cur_rmid;
u32 cur_closid;
u32 default_rmid;
u32 default_closid;
};
DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
/*
* __intel_rdt_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
* Following considerations are made so that this has minimal impact
* on scheduler hot path:
* - This will stay as no-op unless we are running on an Intel SKU
* which supports resource control or monitoring and we enable by
* mounting the resctrl file system.
* - Caches the per cpu CLOSid/RMID values and does the MSR write only
* when a task with a different CLOSid/RMID is scheduled in.
* - We allocate RMIDs/CLOSids globally in order to keep this as
* simple as possible.
* Must be called with preemption disabled.
*/
static void __intel_rdt_sched_in(void)
{
struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
u32 closid = state->default_closid;
u32 rmid = state->default_rmid;
/*
* If this task has a closid/rmid assigned, use it.
* Else use the closid/rmid assigned to this cpu.
*/
if (static_branch_likely(&rdt_alloc_enable_key)) {
if (current->closid)
closid = current->closid;
}
if (static_branch_likely(&rdt_mon_enable_key)) {
if (current->rmid)
rmid = current->rmid;
}
if (closid != state->cur_closid || rmid != state->cur_rmid) {
state->cur_closid = closid;
state->cur_rmid = rmid;
wrmsr(IA32_PQR_ASSOC, rmid, closid);
}
}
static inline void intel_rdt_sched_in(void)
{
if (static_branch_likely(&rdt_enable_key))
__intel_rdt_sched_in();
}
#else
static inline void intel_rdt_sched_in(void) {}
#endif /* CONFIG_INTEL_RDT */
#endif /* _ASM_X86_INTEL_RDT_SCHED_H */
...@@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o ...@@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o
obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MTRR) += mtrr/
......
This diff is collapsed.
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <asm/intel_rdt.h> #include "intel_rdt.h"
/* /*
* Check whether MBA bandwidth percentage value is correct. The value is * Check whether MBA bandwidth percentage value is correct. The value is
...@@ -192,7 +192,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid) ...@@ -192,7 +192,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
{ {
struct rdt_resource *r; struct rdt_resource *r;
for_each_enabled_rdt_resource(r) { for_each_alloc_enabled_rdt_resource(r) {
if (!strcmp(resname, r->name) && closid < r->num_closid) if (!strcmp(resname, r->name) && closid < r->num_closid)
return parse_line(tok, r); return parse_line(tok, r);
} }
...@@ -221,7 +221,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, ...@@ -221,7 +221,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
closid = rdtgrp->closid; closid = rdtgrp->closid;
for_each_enabled_rdt_resource(r) { for_each_alloc_enabled_rdt_resource(r) {
list_for_each_entry(dom, &r->domains, list) list_for_each_entry(dom, &r->domains, list)
dom->have_new_ctrl = false; dom->have_new_ctrl = false;
} }
...@@ -237,7 +237,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, ...@@ -237,7 +237,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
goto out; goto out;
} }
for_each_enabled_rdt_resource(r) { for_each_alloc_enabled_rdt_resource(r) {
ret = update_domains(r, closid); ret = update_domains(r, closid);
if (ret) if (ret)
goto out; goto out;
...@@ -269,12 +269,13 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, ...@@ -269,12 +269,13 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
{ {
struct rdtgroup *rdtgrp; struct rdtgroup *rdtgrp;
struct rdt_resource *r; struct rdt_resource *r;
int closid, ret = 0; int ret = 0;
u32 closid;
rdtgrp = rdtgroup_kn_lock_live(of->kn); rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) { if (rdtgrp) {
closid = rdtgrp->closid; closid = rdtgrp->closid;
for_each_enabled_rdt_resource(r) { for_each_alloc_enabled_rdt_resource(r) {
if (closid < r->num_closid) if (closid < r->num_closid)
show_doms(s, r, closid); show_doms(s, r, closid);
} }
...@@ -284,3 +285,57 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, ...@@ -284,3 +285,57 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
rdtgroup_kn_unlock(of->kn); rdtgroup_kn_unlock(of->kn);
return ret; return ret;
} }
void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
struct rdtgroup *rdtgrp, int evtid, int first)
{
/*
* setup the parameters to send to the IPI to read the data.
*/
rr->rgrp = rdtgrp;
rr->evtid = evtid;
rr->d = d;
rr->val = 0;
rr->first = first;
smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
}
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
union mon_data_bits md;
struct rdt_domain *d;
struct rmid_read rr;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
md.priv = of->kn->priv;
resid = md.u.rid;
domid = md.u.domid;
evtid = md.u.evtid;
r = &rdt_resources_all[resid];
d = rdt_find_domain(r, domid, NULL);
if (!d) {
ret = -ENOENT;
goto out;
}
mon_event_read(&rr, d, rdtgrp, evtid, false);
if (rr.val & RMID_VAL_ERROR)
seq_puts(m, "Error\n");
else if (rr.val & RMID_VAL_UNAVAIL)
seq_puts(m, "Unavailable\n");
else
seq_printf(m, "%llu\n", rr.val * r->mon_scale);
out:
rdtgroup_kn_unlock(of->kn);
return ret;
}
This diff is collapsed.
This diff is collapsed.
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/vm86.h> #include <asm/vm86.h>
#include <asm/intel_rdt.h> #include <asm/intel_rdt_sched.h>
#include <asm/proto.h> #include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all) void __show_regs(struct pt_regs *regs, int all)
......
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <asm/vdso.h> #include <asm/vdso.h>
#include <asm/intel_rdt.h> #include <asm/intel_rdt_sched.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */ /* Not included via unistd.h */
......
...@@ -139,14 +139,6 @@ struct hw_perf_event { ...@@ -139,14 +139,6 @@ struct hw_perf_event {
/* for tp_event->class */ /* for tp_event->class */
struct list_head tp_list; struct list_head tp_list;
}; };
struct { /* intel_cqm */
int cqm_state;
u32 cqm_rmid;
int is_group_event;
struct list_head cqm_events_entry;
struct list_head cqm_groups_entry;
struct list_head cqm_group_entry;
};
struct { /* amd_power */ struct { /* amd_power */
u64 pwr_acc; u64 pwr_acc;
u64 ptsc; u64 ptsc;
...@@ -413,11 +405,6 @@ struct pmu { ...@@ -413,11 +405,6 @@ struct pmu {
size_t task_ctx_size; size_t task_ctx_size;
/*
* Return the count value for a counter.
*/
u64 (*count) (struct perf_event *event); /*optional*/
/* /*
* Set up pmu-private data structures for an AUX area * Set up pmu-private data structures for an AUX area
*/ */
...@@ -1112,11 +1099,6 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, ...@@ -1112,11 +1099,6 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
__perf_event_task_sched_out(prev, next); __perf_event_task_sched_out(prev, next);
} }
static inline u64 __perf_event_count(struct perf_event *event)
{
return local64_read(&event->count) + atomic64_read(&event->child_count);
}
extern void perf_event_mmap(struct vm_area_struct *vma); extern void perf_event_mmap(struct vm_area_struct *vma);
extern struct perf_guest_info_callbacks *perf_guest_cbs; extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
......
...@@ -909,8 +909,9 @@ struct task_struct { ...@@ -909,8 +909,9 @@ struct task_struct {
/* cg_list protected by css_set_lock and tsk->alloc_lock: */ /* cg_list protected by css_set_lock and tsk->alloc_lock: */
struct list_head cg_list; struct list_head cg_list;
#endif #endif
#ifdef CONFIG_INTEL_RDT_A #ifdef CONFIG_INTEL_RDT
int closid; u32 closid;
u32 rmid;
#endif #endif
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
struct robust_list_head __user *robust_list; struct robust_list_head __user *robust_list;
......
...@@ -3673,10 +3673,7 @@ static void __perf_event_read(void *info) ...@@ -3673,10 +3673,7 @@ static void __perf_event_read(void *info)
static inline u64 perf_event_count(struct perf_event *event) static inline u64 perf_event_count(struct perf_event *event)
{ {
if (event->pmu->count) return local64_read(&event->count) + atomic64_read(&event->child_count);
return event->pmu->count(event);
return __perf_event_count(event);
} }
/* /*
...@@ -3707,15 +3704,6 @@ int perf_event_read_local(struct perf_event *event, u64 *value) ...@@ -3707,15 +3704,6 @@ int perf_event_read_local(struct perf_event *event, u64 *value)
goto out; goto out;
} }
/*
* It must not have a pmu::count method, those are not
* NMI safe.
*/
if (event->pmu->count) {
ret = -EOPNOTSUPP;
goto out;
}
/* If this is a per-task event, it must be for current */ /* If this is a per-task event, it must be for current */
if ((event->attach_state & PERF_ATTACH_TASK) && if ((event->attach_state & PERF_ATTACH_TASK) &&
event->hw.target != current) { event->hw.target != current) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment