Commit 30de24c7 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cache QoS (RDT/CAR) updates from Thomas Gleixner:
 "Add support for pseudo-locked cache regions.

  Cache Allocation Technology (CAT) allows on certain CPUs to isolate a
  region of cache and 'lock' it. Cache pseudo-locking builds on the fact
  that a CPU can still read and write data pre-allocated outside its
  current allocated area on cache hit. With cache pseudo-locking data
  can be preloaded into a reserved portion of cache that no application
  can fill, and from that point on will only serve cache hits. The cache
  pseudo-locked memory is made accessible to user space where an
  application can map it into its virtual address space and thus have a
  region of memory with reduced average read latency.

  The locking is not perfect and gets totally screwed by WBINDV and
  similar mechanisms, but it provides a reasonable enhancement for
  certain types of latency sensitive applications.

  The implementation extends the current CAT mechanism and provides a
  generally useful exclusive CAT mode on which it builds the extra
  pseude-locked regions"

* 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits)
  x86/intel_rdt: Disable PMU access
  x86/intel_rdt: Fix possible circular lock dependency
  x86/intel_rdt: Make CPU information accessible for pseudo-locked regions
  x86/intel_rdt: Support restoration of subset of permissions
  x86/intel_rdt: Fix cleanup of plr structure on error
  x86/intel_rdt: Move pseudo_lock_region_clear()
  x86/intel_rdt: Limit C-states dynamically when pseudo-locking active
  x86/intel_rdt: Support L3 cache performance event of Broadwell
  x86/intel_rdt: More precise L2 hit/miss measurements
  x86/intel_rdt: Create character device exposing pseudo-locked region
  x86/intel_rdt: Create debugfs files for pseudo-locking testing
  x86/intel_rdt: Create resctrl debug area
  x86/intel_rdt: Ensure RDT cleanup on exit
  x86/intel_rdt: Resctrl files reflect pseudo-locked information
  x86/intel_rdt: Support creation/removal of pseudo-locked region
  x86/intel_rdt: Pseudo-lock region creation/removal core
  x86/intel_rdt: Discover supported platforms via prefetch disable bits
  x86/intel_rdt: Add utilities to test pseudo-locked region possibility
  x86/intel_rdt: Split resource group removal in two
  x86/intel_rdt: Enable entering of pseudo-locksetup mode
  ...
parents f4990264 4a7a54a5
This diff is collapsed.
......@@ -35,7 +35,9 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o
obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o
obj-$(CONFIG_INTEL_RDT) += intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o
CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
......
......@@ -859,6 +859,8 @@ static __init bool get_rdt_resources(void)
return (rdt_mon_capable || rdt_alloc_capable);
}
static enum cpuhp_state rdt_online;
static int __init intel_rdt_late_init(void)
{
struct rdt_resource *r;
......@@ -880,6 +882,7 @@ static int __init intel_rdt_late_init(void)
cpuhp_remove_state(state);
return ret;
}
rdt_online = state;
for_each_alloc_capable_rdt_resource(r)
pr_info("Intel RDT %s allocation detected\n", r->name);
......@@ -891,3 +894,11 @@ static int __init intel_rdt_late_init(void)
}
late_initcall(intel_rdt_late_init);
static void __exit intel_rdt_exit(void)
{
cpuhp_remove_state(rdt_online);
rdtgroup_exit();
}
__exitcall(intel_rdt_exit);
......@@ -80,6 +80,34 @@ enum rdt_group_type {
RDT_NUM_GROUP,
};
/**
* enum rdtgrp_mode - Mode of a RDT resource group
* @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
* @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
* @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
* @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
* allowed AND the allocations are Cache Pseudo-Locked
*
* The mode of a resource group enables control over the allowed overlap
* between allocations associated with different resource groups (classes
* of service). User is able to modify the mode of a resource group by
* writing to the "mode" resctrl file associated with the resource group.
*
* The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
* writing the appropriate text to the "mode" file. A resource group enters
* "pseudo-locked" mode after the schemata is written while the resource
* group is in "pseudo-locksetup" mode.
*/
enum rdtgrp_mode {
RDT_MODE_SHAREABLE = 0,
RDT_MODE_EXCLUSIVE,
RDT_MODE_PSEUDO_LOCKSETUP,
RDT_MODE_PSEUDO_LOCKED,
/* Must be last */
RDT_NUM_MODES,
};
/**
* struct mongroup - store mon group's data in resctrl fs.
* @mon_data_kn kernlfs node for the mon_data directory
......@@ -94,6 +122,43 @@ struct mongroup {
u32 rmid;
};
/**
* struct pseudo_lock_region - pseudo-lock region information
* @r: RDT resource to which this pseudo-locked region
* belongs
* @d: RDT domain to which this pseudo-locked region
* belongs
* @cbm: bitmask of the pseudo-locked region
* @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
* completion
* @thread_done: variable used by waitqueue to test if pseudo-locking
* thread completed
* @cpu: core associated with the cache on which the setup code
* will be run
* @line_size: size of the cache lines
* @size: size of pseudo-locked region in bytes
* @kmem: the kernel memory associated with pseudo-locked region
* @minor: minor number of character device associated with this
* region
* @debugfs_dir: pointer to this region's directory in the debugfs
* filesystem
* @pm_reqs: Power management QoS requests related to this region
*/
struct pseudo_lock_region {
struct rdt_resource *r;
struct rdt_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
int thread_done;
int cpu;
unsigned int line_size;
unsigned int size;
void *kmem;
unsigned int minor;
struct dentry *debugfs_dir;
struct list_head pm_reqs;
};
/**
* struct rdtgroup - store rdtgroup's data in resctrl file system.
* @kn: kernfs node
......@@ -106,16 +171,20 @@ struct mongroup {
* @type: indicates type of this rdtgroup - either
* monitor only or ctrl_mon group
* @mon: mongroup related data
* @mode: mode of resource group
* @plr: pseudo-locked region
*/
struct rdtgroup {
struct kernfs_node *kn;
struct list_head rdtgroup_list;
u32 closid;
struct cpumask cpu_mask;
int flags;
atomic_t waitcount;
enum rdt_group_type type;
struct mongroup mon;
struct kernfs_node *kn;
struct list_head rdtgroup_list;
u32 closid;
struct cpumask cpu_mask;
int flags;
atomic_t waitcount;
enum rdt_group_type type;
struct mongroup mon;
enum rdtgrp_mode mode;
struct pseudo_lock_region *plr;
};
/* rdtgroup.flags */
......@@ -148,6 +217,7 @@ extern struct list_head rdt_all_groups;
extern int max_name_width, max_data_width;
int __init rdtgroup_init(void);
void __exit rdtgroup_exit(void);
/**
* struct rftype - describe each file in the resctrl file system
......@@ -216,22 +286,24 @@ struct mbm_state {
* @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
* @new_ctrl: new ctrl value to be loaded
* @have_new_ctrl: did user provide new_ctrl for this domain
* @plr: pseudo-locked region (if any) associated with domain
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
struct delayed_work mbm_over;
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
u32 *mbps_val;
u32 new_ctrl;
bool have_new_ctrl;
struct list_head list;
int id;
struct cpumask cpu_mask;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
struct delayed_work mbm_over;
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
u32 *mbps_val;
u32 new_ctrl;
bool have_new_ctrl;
struct pseudo_lock_region *plr;
};
/**
......@@ -351,7 +423,7 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
const char *format_str;
int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
int (*parse_ctrlval) (void *data, struct rdt_resource *r,
struct rdt_domain *d);
struct list_head evt_list;
int num_rmid;
......@@ -359,8 +431,8 @@ struct rdt_resource {
unsigned long fflags;
};
int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
......@@ -368,7 +440,7 @@ extern struct rdt_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
int __init rdtgroup_init(void);
extern struct dentry *debugfs_resctrl;
enum {
RDT_RESOURCE_L3,
......@@ -439,13 +511,32 @@ void rdt_last_cmd_printf(const char *fmt, ...);
void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
umode_t mask);
struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
u32 _cbm, int closid, bool exclusive);
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
u32 cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm);
bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
void closid_free(int closid);
int alloc_rmid(void);
void free_rmid(u32 rmid);
int rdt_get_mon_l3_config(struct rdt_resource *r);
......
......@@ -64,9 +64,10 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
char *buf = _buf;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
......@@ -87,7 +88,7 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
*/
static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
......@@ -122,22 +123,64 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
struct rdt_cbm_parse_data {
struct rdtgroup *rdtgrp;
char *buf;
};
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
struct rdt_cbm_parse_data *data = _data;
struct rdtgroup *rdtgrp = data->rdtgrp;
u32 cbm_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
}
if(!cbm_validate(buf, &data, r))
/*
* Cannot set up more than one pseudo-locked region in a cache
* hierarchy.
*/
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
rdtgroup_pseudo_locked_in_hierarchy(d)) {
rdt_last_cmd_printf("pseudo-locked region in hierarchy\n");
return -EINVAL;
d->new_ctrl = data;
}
if (!cbm_validate(data->buf, &cbm_val, r))
return -EINVAL;
if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_SHAREABLE) &&
rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) {
rdt_last_cmd_printf("CBM overlaps with pseudo-locked region\n");
return -EINVAL;
}
/*
* The CBM may not overlap with the CBM of another closid if
* either is exclusive.
*/
if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) {
rdt_last_cmd_printf("overlaps with exclusive group\n");
return -EINVAL;
}
if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) {
if (rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
rdt_last_cmd_printf("overlaps with other group\n");
return -EINVAL;
}
}
d->new_ctrl = cbm_val;
d->have_new_ctrl = true;
return 0;
......@@ -149,8 +192,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
* separated by ";". The "id" is in decimal, and must match one of
* the "id"s for this resource.
*/
static int parse_line(char *line, struct rdt_resource *r)
static int parse_line(char *line, struct rdt_resource *r,
struct rdtgroup *rdtgrp)
{
struct rdt_cbm_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
unsigned long dom_id;
......@@ -167,15 +212,32 @@ static int parse_line(char *line, struct rdt_resource *r)
dom = strim(dom);
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
if (r->parse_ctrlval(dom, r, d))
data.buf = dom;
data.rdtgrp = rdtgrp;
if (r->parse_ctrlval(&data, r, d))
return -EINVAL;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
/*
* In pseudo-locking setup mode and just
* parsed a valid CBM that should be
* pseudo-locked. Only one locked region per
* resource group and domain so just do
* the required initialization for single
* region and return.
*/
rdtgrp->plr->r = r;
rdtgrp->plr->d = d;
rdtgrp->plr->cbm = d->new_ctrl;
d->plr = rdtgrp->plr;
return 0;
}
goto next;
}
}
return -EINVAL;
}
static int update_domains(struct rdt_resource *r, int closid)
int update_domains(struct rdt_resource *r, int closid)
{
struct msr_param msr_param;
cpumask_var_t cpu_mask;
......@@ -220,13 +282,14 @@ static int update_domains(struct rdt_resource *r, int closid)
return 0;
}
static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
static int rdtgroup_parse_resource(char *resname, char *tok,
struct rdtgroup *rdtgrp)
{
struct rdt_resource *r;
for_each_alloc_enabled_rdt_resource(r) {
if (!strcmp(resname, r->name) && closid < r->num_closid)
return parse_line(tok, r);
if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid)
return parse_line(tok, r, rdtgrp);
}
rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
return -EINVAL;
......@@ -239,7 +302,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
struct rdt_domain *dom;
struct rdt_resource *r;
char *tok, *resname;
int closid, ret = 0;
int ret = 0;
/* Valid input requires a trailing newline */
if (nbytes == 0 || buf[nbytes - 1] != '\n')
......@@ -253,7 +316,15 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
}
rdt_last_cmd_clear();
closid = rdtgrp->closid;
/*
* No changes to pseudo-locked region allowed. It has to be removed
* and re-created instead.
*/
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
ret = -EINVAL;
rdt_last_cmd_puts("resource group is pseudo-locked\n");
goto out;
}
for_each_alloc_enabled_rdt_resource(r) {
list_for_each_entry(dom, &r->domains, list)
......@@ -272,17 +343,27 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
ret = -EINVAL;
goto out;
}
ret = rdtgroup_parse_resource(resname, tok, closid);
ret = rdtgroup_parse_resource(resname, tok, rdtgrp);
if (ret)
goto out;
}
for_each_alloc_enabled_rdt_resource(r) {
ret = update_domains(r, closid);
ret = update_domains(r, rdtgrp->closid);
if (ret)
goto out;
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
/*
* If pseudo-locking fails we keep the resource group in
* mode RDT_MODE_PSEUDO_LOCKSETUP with its class of service
* active and updated for just the domain the pseudo-locked
* region was requested for.
*/
ret = rdtgroup_pseudo_lock_create(rdtgrp);
}
out:
rdtgroup_kn_unlock(of->kn);
return ret ?: nbytes;
......@@ -318,10 +399,18 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
if (closid < r->num_closid)
show_doms(s, r, closid);
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
rdtgrp->plr->d->id, rdtgrp->plr->cbm);
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
if (closid < r->num_closid)
show_doms(s, r, closid);
}
}
} else {
ret = -ENOENT;
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM resctrl
#if !defined(_TRACE_PSEUDO_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PSEUDO_LOCK_H
#include <linux/tracepoint.h>
TRACE_EVENT(pseudo_lock_mem_latency,
TP_PROTO(u32 latency),
TP_ARGS(latency),
TP_STRUCT__entry(__field(u32, latency)),
TP_fast_assign(__entry->latency = latency),
TP_printk("latency=%u", __entry->latency)
);
TRACE_EVENT(pseudo_lock_l2,
TP_PROTO(u64 l2_hits, u64 l2_miss),
TP_ARGS(l2_hits, l2_miss),
TP_STRUCT__entry(__field(u64, l2_hits)
__field(u64, l2_miss)),
TP_fast_assign(__entry->l2_hits = l2_hits;
__entry->l2_miss = l2_miss;),
TP_printk("hits=%llu miss=%llu",
__entry->l2_hits, __entry->l2_miss));
TRACE_EVENT(pseudo_lock_l3,
TP_PROTO(u64 l3_hits, u64 l3_miss),
TP_ARGS(l3_hits, l3_miss),
TP_STRUCT__entry(__field(u64, l3_hits)
__field(u64, l3_miss)),
TP_fast_assign(__entry->l3_hits = l3_hits;
__entry->l3_miss = l3_miss;),
TP_printk("hits=%llu miss=%llu",
__entry->l3_hits, __entry->l3_miss));
#endif /* _TRACE_PSEUDO_LOCK_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE intel_rdt_pseudo_lock_event
#include <trace/define_trace.h>
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment