Commit aee7f913 authored by Ingo Molnar's avatar Ingo Molnar

x86/mm/pat: Update the comments in pat.c and pat_interval.c and refresh the code a bit

Tidy up the code:

 - add comments explaining the PAT code, the role of the functions and the logic

 - fix various typos and grammar while at it

 - simplify the file-scope memtype_interval_*() namespace to interval_*()

 - simplify stylistic complications such as unnecessary linebreaks
   or convoluted control flow

 - use the simpler '#ifdef CONFIG_*' pattern instead of '#if defined(CONFIG_*)' pattern

 - remove the non-idiomatic newline between late_initcall() and its function definition
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 360db4ac
// SPDX-License-Identifier: GPL-2.0-only // SPDX-License-Identifier: GPL-2.0-only
/* /*
* Handle caching attributes in page tables (PAT) * Page Attribute Table (PAT) support: handle memory caching attributes in page tables.
* *
* Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
* Suresh B Siddha <suresh.b.siddha@intel.com> * Suresh B Siddha <suresh.b.siddha@intel.com>
* *
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
*
* Basic principles:
*
* PAT is a CPU feature supported by all modern x86 CPUs, to allow the firmware and
* the kernel to set one of a handful of 'caching type' attributes for physical
* memory ranges: uncached, write-combining, write-through, write-protected,
* and the most commonly used and default attribute: write-back caching.
*
* PAT support supercedes and augments MTRR support in a compatible fashion: MTRR is
* a hardware interface to enumerate a limited number of physical memory ranges
* and set their caching attributes explicitly, programmed into the CPU via MSRs.
* Even modern CPUs have MTRRs enabled - but these are typically not touched
* by the kernel or by user-space (such as the X server), we rely on PAT for any
* additional cache attribute logic.
*
* PAT doesn't work via explicit memory ranges, but uses page table entries to add
* cache attribute information to the mapped memory range: there's 3 bits used,
* (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT), with the 8 possible values mapped by the
* CPU to actual cache attributes via an MSR loaded into the CPU (MSR_IA32_CR_PAT).
*
* ( There's a metric ton of finer details, such as compatibility with CPU quirks
* that only support 4 types of PAT entries, and interaction with MTRRs, see
* below for details. )
*/ */
#include <linux/seq_file.h> #include <linux/seq_file.h>
...@@ -839,7 +862,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, ...@@ -839,7 +862,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
} }
/* /*
* Change the memory type for the physial address range in kernel identity * Change the memory type for the physical address range in kernel identity
* mapping space if that range is a part of identity map. * mapping space if that range is a part of identity map.
*/ */
int kernel_map_sync_memtype(u64 base, unsigned long size, int kernel_map_sync_memtype(u64 base, unsigned long size,
...@@ -851,15 +874,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, ...@@ -851,15 +874,14 @@ int kernel_map_sync_memtype(u64 base, unsigned long size,
return 0; return 0;
/* /*
* some areas in the middle of the kernel identity range * Some areas in the middle of the kernel identity range
* are not mapped, like the PCI space. * are not mapped, for example the PCI space.
*/ */
if (!page_is_ram(base >> PAGE_SHIFT)) if (!page_is_ram(base >> PAGE_SHIFT))
return 0; return 0;
id_sz = (__pa(high_memory-1) <= base + size) ? id_sz = (__pa(high_memory-1) <= base + size) ?
__pa(high_memory) - base : __pa(high_memory) - base : size;
size;
if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) { if (ioremap_change_attr((unsigned long)__va(base), id_sz, pcm) < 0) {
pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n", pr_info("x86/PAT: %s:%d ioremap_change_attr failed %s for [mem %#010Lx-%#010Lx]\n",
...@@ -1099,6 +1121,10 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough); ...@@ -1099,6 +1121,10 @@ EXPORT_SYMBOL_GPL(pgprot_writethrough);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
/*
* We are allocating a temporary printout-entry to be passed
* between seq_start()/next() and seq_show():
*/
static struct memtype *memtype_get_idx(loff_t pos) static struct memtype *memtype_get_idx(loff_t pos)
{ {
struct memtype *print_entry; struct memtype *print_entry;
...@@ -1112,12 +1138,13 @@ static struct memtype *memtype_get_idx(loff_t pos) ...@@ -1112,12 +1138,13 @@ static struct memtype *memtype_get_idx(loff_t pos)
ret = memtype_copy_nth_element(print_entry, pos); ret = memtype_copy_nth_element(print_entry, pos);
spin_unlock(&memtype_lock); spin_unlock(&memtype_lock);
if (!ret) { /* Free it on error: */
return print_entry; if (ret) {
} else {
kfree(print_entry); kfree(print_entry);
return NULL; return NULL;
} }
return print_entry;
} }
static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
...@@ -1144,8 +1171,11 @@ static int memtype_seq_show(struct seq_file *seq, void *v) ...@@ -1144,8 +1171,11 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
{ {
struct memtype *print_entry = (struct memtype *)v; struct memtype *print_entry = (struct memtype *)v;
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n",
print_entry->start, print_entry->end); cattr_name(print_entry->type),
print_entry->start,
print_entry->end);
kfree(print_entry); kfree(print_entry);
return 0; return 0;
...@@ -1178,7 +1208,6 @@ static int __init pat_memtype_list_init(void) ...@@ -1178,7 +1208,6 @@ static int __init pat_memtype_list_init(void)
} }
return 0; return 0;
} }
late_initcall(pat_memtype_list_init); late_initcall(pat_memtype_list_init);
#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */ #endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
...@@ -25,25 +25,27 @@ ...@@ -25,25 +25,27 @@
* physical memory areas. Without proper tracking, conflicting memory * physical memory areas. Without proper tracking, conflicting memory
* types in different mappings can cause CPU cache corruption. * types in different mappings can cause CPU cache corruption.
* *
* The tree is an interval tree (augmented rbtree) with tree ordered * The tree is an interval tree (augmented rbtree) which tree is ordered
* on starting address. Tree can contain multiple entries for * by the starting address. The tree can contain multiple entries for
* different regions which overlap. All the aliases have the same * different regions which overlap. All the aliases have the same
* cache attributes of course. * cache attributes of course, as enforced by the PAT logic.
* *
* memtype_lock protects the rbtree. * memtype_lock protects the rbtree.
*/ */
static inline u64 memtype_interval_start(struct memtype *memtype)
static inline u64 interval_start(struct memtype *memtype)
{ {
return memtype->start; return memtype->start;
} }
static inline u64 memtype_interval_end(struct memtype *memtype) static inline u64 interval_end(struct memtype *memtype)
{ {
return memtype->end - 1; return memtype->end - 1;
} }
INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
memtype_interval_start, memtype_interval_end, interval_start, interval_end,
static, memtype_interval) static, interval)
static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
...@@ -56,7 +58,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type) ...@@ -56,7 +58,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type)
{ {
struct memtype *match; struct memtype *match;
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); match = interval_iter_first(&memtype_rbroot, start, end-1);
while (match != NULL && match->start < end) { while (match != NULL && match->start < end) {
if ((match_type == MEMTYPE_EXACT_MATCH) && if ((match_type == MEMTYPE_EXACT_MATCH) &&
(match->start == start) && (match->end == end)) (match->start == start) && (match->end == end))
...@@ -66,7 +68,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type) ...@@ -66,7 +68,7 @@ static struct memtype *memtype_match(u64 start, u64 end, int match_type)
(match->start < start) && (match->end == end)) (match->start < start) && (match->end == end))
return match; return match;
match = memtype_interval_iter_next(match, start, end-1); match = interval_iter_next(match, start, end-1);
} }
return NULL; /* Returns NULL if there is no match */ return NULL; /* Returns NULL if there is no match */
...@@ -79,7 +81,7 @@ static int memtype_check_conflict(u64 start, u64 end, ...@@ -79,7 +81,7 @@ static int memtype_check_conflict(u64 start, u64 end,
struct memtype *match; struct memtype *match;
enum page_cache_mode found_type = reqtype; enum page_cache_mode found_type = reqtype;
match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); match = interval_iter_first(&memtype_rbroot, start, end-1);
if (match == NULL) if (match == NULL)
goto success; goto success;
...@@ -89,12 +91,12 @@ static int memtype_check_conflict(u64 start, u64 end, ...@@ -89,12 +91,12 @@ static int memtype_check_conflict(u64 start, u64 end,
dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
found_type = match->type; found_type = match->type;
match = memtype_interval_iter_next(match, start, end-1); match = interval_iter_next(match, start, end-1);
while (match) { while (match) {
if (match->type != found_type) if (match->type != found_type)
goto failure; goto failure;
match = memtype_interval_iter_next(match, start, end-1); match = interval_iter_next(match, start, end-1);
} }
success: success:
if (newtype) if (newtype)
...@@ -106,11 +108,11 @@ static int memtype_check_conflict(u64 start, u64 end, ...@@ -106,11 +108,11 @@ static int memtype_check_conflict(u64 start, u64 end,
pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
current->comm, current->pid, start, end, current->comm, current->pid, start, end,
cattr_name(found_type), cattr_name(match->type)); cattr_name(found_type), cattr_name(match->type));
return -EBUSY; return -EBUSY;
} }
int memtype_check_insert(struct memtype *new, int memtype_check_insert(struct memtype *new, enum page_cache_mode *ret_type)
enum page_cache_mode *ret_type)
{ {
int err = 0; int err = 0;
...@@ -121,7 +123,7 @@ int memtype_check_insert(struct memtype *new, ...@@ -121,7 +123,7 @@ int memtype_check_insert(struct memtype *new,
if (ret_type) if (ret_type)
new->type = *ret_type; new->type = *ret_type;
memtype_interval_insert(new, &memtype_rbroot); interval_insert(new, &memtype_rbroot);
return 0; return 0;
} }
...@@ -145,12 +147,13 @@ struct memtype *memtype_erase(u64 start, u64 end) ...@@ -145,12 +147,13 @@ struct memtype *memtype_erase(u64 start, u64 end)
if (data->start == start) { if (data->start == start) {
/* munmap: erase this node */ /* munmap: erase this node */
memtype_interval_remove(data, &memtype_rbroot); interval_remove(data, &memtype_rbroot);
} else { } else {
/* mremap: update the end value of this node */ /* mremap: update the end value of this node */
memtype_interval_remove(data, &memtype_rbroot); interval_remove(data, &memtype_rbroot);
data->end = start; data->end = start;
memtype_interval_insert(data, &memtype_rbroot); interval_insert(data, &memtype_rbroot);
return NULL; return NULL;
} }
...@@ -159,19 +162,24 @@ struct memtype *memtype_erase(u64 start, u64 end) ...@@ -159,19 +162,24 @@ struct memtype *memtype_erase(u64 start, u64 end)
struct memtype *memtype_lookup(u64 addr) struct memtype *memtype_lookup(u64 addr)
{ {
return memtype_interval_iter_first(&memtype_rbroot, addr, return interval_iter_first(&memtype_rbroot, addr, addr + PAGE_SIZE-1);
addr + PAGE_SIZE-1);
} }
#if defined(CONFIG_DEBUG_FS) /*
* Debugging helper, copy the Nth entry of the tree into a
* a copy for printout. This allows us to print out the tree
* via debugfs, without holding the memtype_lock too long:
*/
#ifdef CONFIG_DEBUG_FS
int memtype_copy_nth_element(struct memtype *out, loff_t pos) int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ {
struct memtype *match; struct memtype *match;
int i = 1; int i = 1;
match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); match = interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
while (match && pos != i) { while (match && pos != i) {
match = memtype_interval_iter_next(match, 0, ULONG_MAX); match = interval_iter_next(match, 0, ULONG_MAX);
i++; i++;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment