Commit 22742390 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, pat: Fix cacheflush address in change_page_attr_set_clr()
  mm: remove !NUMA condition from PAGEFLAGS_EXTENDED condition set
  x86: Fix earlyprintk=dbgp for machines without NX
  x86, pat: Sanity check remap_pfn_range for RAM region
  x86, pat: Lookup the protection from memtype list on vm_insert_pfn()
  x86, pat: Add lookup_memtype to get the current memtype of a paddr
  x86, pat: Use page flags to track memtypes of RAM pages
  x86, pat: Generalize the use of page flag PG_uncached
  x86, pat: Add rbtree to do quick lookup in memtype tracking
  x86, pat: Add PAT reserve free to io_mapping* APIs
  x86, pat: New i/f for driver to request memtype for IO regions
  x86, pat: ioremap to follow same PAT restrictions as other PAT users
  x86, pat: Keep identity maps consistent with mmaps even when pat_disabled
  x86, mtrr: make mtrr_aps_delayed_init static bool
  x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init
  generic-ipi: Allow cpus not yet online to call smp_call_function with irqs disabled
  x86: Fix an incorrect argument of reserve_bootmem()
  x86: Fix system crash when loading with "reservetop" parameter
parents 1aaf2e59 fa526d0d
......@@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR
bool
select GENERIC_ALLOCATOR
config ARCH_USES_PG_UNCACHED
def_bool y
depends on IA64_UNCACHED_ALLOCATOR
config AUDIT_ARCH
bool
default y
......
......@@ -1417,6 +1417,10 @@ config X86_PAT
If unsure, say Y.
config ARCH_USES_PG_UNCACHED
def_bool y
depends on X86_PAT
config EFI
bool "EFI runtime service support"
depends on ACPI
......
......@@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma,
memcpy(dst, src, len);
}
#define PG_non_WB PG_arch_1
PAGEFLAG(NonWB, non_WB)
#define PG_WC PG_arch_1
PAGEFLAG(WC, WC)
#ifdef CONFIG_X86_PAT
/*
* X86 PAT uses page flags WC and Uncached together to keep track of
* memory type of pages that have backing page struct. X86 PAT supports 3
* different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and
* _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
* been changed from its default (value of -1 used to denote this).
* Note we do not support _PAGE_CACHE_UC here.
*
* Caller must hold memtype_lock for atomicity.
*/
static inline unsigned long get_page_memtype(struct page *pg)
{
if (!PageUncached(pg) && !PageWC(pg))
return -1;
else if (!PageUncached(pg) && PageWC(pg))
return _PAGE_CACHE_WC;
else if (PageUncached(pg) && !PageWC(pg))
return _PAGE_CACHE_UC_MINUS;
else
return _PAGE_CACHE_WB;
}
static inline void set_page_memtype(struct page *pg, unsigned long memtype)
{
switch (memtype) {
case _PAGE_CACHE_WC:
ClearPageUncached(pg);
SetPageWC(pg);
break;
case _PAGE_CACHE_UC_MINUS:
SetPageUncached(pg);
ClearPageWC(pg);
break;
case _PAGE_CACHE_WB:
SetPageUncached(pg);
SetPageWC(pg);
break;
default:
case -1:
ClearPageUncached(pg);
ClearPageWC(pg);
break;
}
}
#else
static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
static inline void set_page_memtype(struct page *pg, unsigned long memtype) { }
#endif
/*
* The set_memory_* API can be used to change various attributes of a virtual
......
......@@ -26,13 +26,16 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
int
is_io_mapping_possible(resource_size_t base, unsigned long size);
void *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
void
iounmap_atomic(void *kvaddr, enum km_type type);
int
iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
void
iomap_free(resource_size_t base, unsigned long size);
#endif /* _ASM_X86_IOMAP_H */
......@@ -121,6 +121,9 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
extern void set_mtrr_aps_delayed_init(void);
extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
# else
......@@ -161,6 +164,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
#define mtrr_ap_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
# endif
#ifdef CONFIG_COMPAT
......
......@@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
unsigned long flag);
int io_reserve_memtype(resource_size_t start, resource_size_t end,
unsigned long *type);
void io_free_memtype(resource_size_t start, resource_size_t end);
#endif /* _ASM_X86_PAT_H */
......@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
static bool mtrr_aps_delayed_init;
static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
......@@ -163,7 +164,10 @@ static void ipi_handler(void *info)
if (data->smp_reg != ~0U) {
mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
} else {
} else if (mtrr_aps_delayed_init) {
/*
* Initialize the MTRRs inaddition to the synchronisation.
*/
mtrr_if->set_all();
}
......@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
*/
if (reg != ~0U)
mtrr_if->set(reg, base, size, type);
else if (!mtrr_aps_delayed_init)
mtrr_if->set_all();
/* Wait for the others */
while (atomic_read(&data.count))
......@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void)
void mtrr_ap_init(void)
{
unsigned long flags;
if (!mtrr_if || !use_intel())
if (!use_intel() || mtrr_aps_delayed_init)
return;
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
......@@ -738,11 +742,7 @@ void mtrr_ap_init(void)
* 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
* lock to prevent mtrr entry changes
*/
local_irq_save(flags);
mtrr_if->set_all();
local_irq_restore(flags);
set_mtrr(~0U, 0, 0, 0);
}
/**
......@@ -753,6 +753,34 @@ void mtrr_save_state(void)
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
}
void set_mtrr_aps_delayed_init(void)
{
if (!use_intel())
return;
mtrr_aps_delayed_init = true;
}
/*
* MTRR initialization for all AP's
*/
void mtrr_aps_init(void)
{
if (!use_intel())
return;
set_mtrr(~0U, 0, 0, 0);
mtrr_aps_delayed_init = false;
}
void mtrr_bp_restore(void)
{
if (!use_intel())
return;
mtrr_if->set_all();
}
static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
......
......@@ -712,6 +712,21 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
#ifdef CONFIG_X86_64
/*
* Must call this twice: Once just to detect whether hardware doesn't
* support NX (so that the early EHCI debug console setup can safely
* call set_fixmap(), and then again after parsing early parameters to
* honor the respective command line option.
*/
check_efer();
#endif
parse_early_param();
/* VMI may relocate the fixmap; do this before touching ioremap area */
vmi_init();
......@@ -794,11 +809,6 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
parse_early_param();
#ifdef CONFIG_X86_64
check_efer();
#endif
......
......@@ -1118,9 +1118,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
if (is_uv_system())
uv_system_init();
set_mtrr_aps_delayed_init();
out:
preempt_enable();
}
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
}
void arch_enable_nonboot_cpus_end(void)
{
mtrr_aps_init();
}
/*
* Early setup to make printk work.
*/
......@@ -1142,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
check_nmi_watchdog();
mtrr_aps_init();
}
static int __initdata setup_possible_cpus = -1;
......
......@@ -21,7 +21,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
int is_io_mapping_possible(resource_size_t base, unsigned long size)
static int is_io_mapping_possible(resource_size_t base, unsigned long size)
{
#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
/* There is no way to map greater than 1 << 32 address without PAE */
......@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
#endif
return 1;
}
EXPORT_SYMBOL_GPL(is_io_mapping_possible);
int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
{
unsigned long flag = _PAGE_CACHE_WC;
int ret;
if (!is_io_mapping_possible(base, size))
return -EINVAL;
ret = io_reserve_memtype(base, base + size, &flag);
if (ret)
return ret;
*prot = __pgprot(__PAGE_KERNEL | flag);
return 0;
}
EXPORT_SYMBOL_GPL(iomap_create_wc);
void
iomap_free(resource_size_t base, unsigned long size)
{
io_free_memtype(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
......
......@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
prot_val, &new_prot_val);
if (retval) {
pr_debug("Warning: reserve_memtype returned %d\n", retval);
printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
return NULL;
}
if (prot_val != new_prot_val) {
/*
* Do not fallback to certain memory types with certain
* requested type:
* - request is uc-, return cannot be write-back
* - request is uc-, return cannot be write-combine
* - request is write-combine, return cannot be write-back
*/
if ((prot_val == _PAGE_CACHE_UC_MINUS &&
(new_prot_val == _PAGE_CACHE_WB ||
new_prot_val == _PAGE_CACHE_WC)) ||
(prot_val == _PAGE_CACHE_WC &&
new_prot_val == _PAGE_CACHE_WB)) {
pr_debug(
if (!is_new_memtype_allowed(phys_addr, size,
prot_val, new_prot_val)) {
printk(KERN_ERR
"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
......
......@@ -822,6 +822,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
{
struct cpa_data cpa;
int ret, cache, checkalias;
unsigned long baddr = 0;
/*
* Check, if we are requested to change a not supported
......@@ -853,6 +854,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
*/
WARN_ON_ONCE(1);
}
/*
* Save address for cache flush. *addr is modified in the call
* to __change_page_attr_set_clr() below.
*/
baddr = *addr;
}
/* Must avoid aliasing mappings in the highmem code */
......@@ -900,7 +906,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
} else
cpa_flush_range(*addr, numpages, cache);
cpa_flush_range(baddr, numpages, cache);
} else
cpa_flush_all(cache);
......
This diff is collapsed.
......@@ -242,7 +242,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
fix_processor_context();
do_fpu_end();
mtrr_ap_init();
mtrr_bp_restore();
#ifdef CONFIG_X86_OLD_MCE
mcheck_init(&boot_cpu_data);
......
......@@ -49,23 +49,30 @@ static inline struct io_mapping *
io_mapping_create_wc(resource_size_t base, unsigned long size)
{
struct io_mapping *iomap;
if (!is_io_mapping_possible(base, size))
return NULL;
pgprot_t prot;
iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
if (!iomap)
return NULL;
goto out_err;
if (iomap_create_wc(base, size, &prot))
goto out_free;
iomap->base = base;
iomap->size = size;
iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL));
iomap->prot = prot;
return iomap;
out_free:
kfree(iomap);
out_err:
return NULL;
}
static inline void
io_mapping_free(struct io_mapping *mapping)
{
iomap_free(mapping->base, mapping->size);
kfree(mapping);
}
......
......@@ -99,7 +99,7 @@ enum pageflags {
#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
PG_mlocked, /* Page is vma mlocked */
#endif
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* Page has been mapped as uncached */
#endif
__NR_PAGEFLAGS,
......@@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked)
SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
#endif
#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PAGEFLAG(Uncached, uncached)
#else
PAGEFLAG_FALSE(Uncached)
......
......@@ -414,6 +414,14 @@ int disable_nonboot_cpus(void)
return error;
}
void __weak arch_enable_nonboot_cpus_begin(void)
{
}
void __weak arch_enable_nonboot_cpus_end(void)
{
}
void __ref enable_nonboot_cpus(void)
{
int cpu, error;
......@@ -425,6 +433,9 @@ void __ref enable_nonboot_cpus(void)
goto out;
printk("Enabling non-boot CPUs ...\n");
arch_enable_nonboot_cpus_begin();
for_each_cpu(cpu, frozen_cpus) {
error = _cpu_up(cpu, 1);
if (!error) {
......@@ -433,6 +444,9 @@ void __ref enable_nonboot_cpus(void)
}
printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
}
arch_enable_nonboot_cpus_end();
cpumask_clear(frozen_cpus);
out:
cpu_maps_update_done();
......
......@@ -176,6 +176,11 @@ void generic_smp_call_function_interrupt(void)
struct call_function_data *data;
int cpu = get_cpu();
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
*/
WARN_ON_ONCE(!cpu_online(cpu));
/*
* Ensure entry is visible on call_function_queue after we have
* entered the IPI. See comment in smp_call_function_many.
......@@ -230,6 +235,11 @@ void generic_smp_call_function_single_interrupt(void)
unsigned int data_flags;
LIST_HEAD(list);
/*
* Shouldn't receive this interrupt on a cpu that is not yet online.
*/
WARN_ON_ONCE(!cpu_online(smp_processor_id()));
spin_lock(&q->lock);
list_replace_init(&q->list, &list);
spin_unlock(&q->lock);
......@@ -285,8 +295,14 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
*/
this_cpu = get_cpu();
/* Can deadlock when called with interrupts disabled */
WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
if (cpu == this_cpu) {
local_irq_save(flags);
......@@ -329,8 +345,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
{
csd_lock(data);
/* Can deadlock when called with interrupts disabled */
WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress);
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
&& !oops_in_progress);
generic_exec_single(cpu, data, wait);
}
......@@ -365,8 +387,14 @@ void smp_call_function_many(const struct cpumask *mask,
unsigned long flags;
int cpu, next_cpu, this_cpu = smp_processor_id();
/* Can deadlock when called with interrupts disabled */
WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
/* So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
......
......@@ -153,7 +153,7 @@ config MEMORY_HOTREMOVE
#
config PAGEFLAGS_EXTENDED
def_bool y
depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment