Commit 2b3a81b3 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman Committed by Kleber Sacilotto de Souza

Revert "x86/efi: Build our own page table structures"

BugLink: http://bugs.launchpad.net/bugs/1745047

This reverts commit 36e0f05a which is
commit 67a9108e upstream.

Turns there was too many other issues with this patch to make it viable
for the stable tree.
Reported-by: default avatarBen Hutchings <ben.hutchings@codethink.co.uk>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-efi@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "Ghannam, Yazen" <Yazen.Ghannam@amd.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
parent a7e35257
......@@ -136,7 +136,6 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
extern void efi_sync_low_kernel_mappings(void);
extern int __init efi_alloc_page_tables(void);
extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
extern void __init old_map_region(efi_memory_desc_t *md);
......
......@@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void)
* This function will switch the EFI runtime services to virtual mode.
* Essentially, we look through the EFI memmap and map every region that
* has the runtime attribute bit set in its memory descriptor into the
* efi_pgd page table.
* ->trampoline_pgd page table using a top-down VA allocation scheme.
*
* The old method which used to update that memory descriptor with the
* virtual address obtained from ioremap() is still supported when the
......@@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void)
*
* The new method does a pagetable switch in a preemption-safe manner
* so that we're in a different address space when calling a runtime
* function. For function arguments passing we do copy the PUDs of the
* kernel page table into efi_pgd prior to each call.
* function. For function arguments passing we do copy the PGDs of the
* kernel page table into ->trampoline_pgd prior to each call.
*
* Specially for kexec boot, efi runtime maps in previous kernel should
* be passed in via setup_data. In that case runtime ranges will be mapped
......@@ -895,12 +895,6 @@ static void __init __efi_enter_virtual_mode(void)
efi.systab = NULL;
if (efi_alloc_page_tables()) {
pr_err("Failed to allocate EFI page tables\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
efi_merge_regions();
new_memmap = efi_map_regions(&count, &pg_shift);
if (!new_memmap) {
......@@ -960,11 +954,28 @@ static void __init __efi_enter_virtual_mode(void)
efi_runtime_mkexec();
/*
* We mapped the descriptor array into the EFI pagetable above
* but we're not unmapping it here because if we're running in
* EFI mixed mode we need all of memory to be accessible when
* we pass parameters to the EFI runtime services in the
* thunking code.
* We mapped the descriptor array into the EFI pagetable above but we're
* not unmapping it here. Here's why:
*
* We're copying select PGDs from the kernel page table to the EFI page
* table and when we do so and make changes to those PGDs like unmapping
* stuff from them, those changes appear in the kernel page table and we
* go boom.
*
* From setup_real_mode():
*
* ...
* trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
*
* In this particular case, our allocation is in PGD 0 of the EFI page
* table but we've copied that PGD from PGD[272] of the EFI page table:
*
* pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
*
* where the direct memory mapping in kernel space is.
*
* new_memmap's VA comes from that direct mapping and thus clearing it,
* it would get cleared in the kernel page table too.
*
* efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
*/
......
......@@ -38,11 +38,6 @@
* say 0 - 3G.
*/
int __init efi_alloc_page_tables(void)
{
return 0;
}
void efi_sync_low_kernel_mappings(void) {}
void __init efi_dump_pagetable(void) {}
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
......
......@@ -40,7 +40,6 @@
#include <asm/fixmap.h>
#include <asm/realmode.h>
#include <asm/time.h>
#include <asm/pgalloc.h>
/*
* We allocate runtime services regions bottom-up, starting from -4G, i.e.
......@@ -122,92 +121,22 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
early_code_mapping_set_exec(0);
}
static pgd_t *efi_pgd;
/*
* We need our own copy of the higher levels of the page tables
* because we want to avoid inserting EFI region mappings (EFI_VA_END
* to EFI_VA_START) into the standard kernel page tables. Everything
* else can be shared, see efi_sync_low_kernel_mappings().
*/
int __init efi_alloc_page_tables(void)
{
pgd_t *pgd;
pud_t *pud;
gfp_t gfp_mask;
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
if (!efi_pgd)
return -ENOMEM;
pgd = efi_pgd + pgd_index(EFI_VA_END);
pud = pud_alloc_one(NULL, 0);
if (!pud) {
free_page((unsigned long)efi_pgd);
return -ENOMEM;
}
pgd_populate(NULL, pgd, pud);
return 0;
}
/*
* Add low kernel mappings for passing arguments to EFI functions.
*/
void efi_sync_low_kernel_mappings(void)
{
unsigned num_entries;
pgd_t *pgd_k, *pgd_efi;
pud_t *pud_k, *pud_efi;
unsigned num_pgds;
pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
if (efi_enabled(EFI_OLD_MEMMAP))
return;
/*
* We can share all PGD entries apart from the one entry that
* covers the EFI runtime mapping space.
*
* Make sure the EFI runtime region mappings are guaranteed to
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
pgd_k = pgd_offset_k(PAGE_OFFSET);
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET);
/*
* We share all the PUD entries apart from those that map the
* EFI regions. Copy around them.
*/
BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
pud_efi = pud_offset(pgd_efi, 0);
pgd_k = pgd_offset_k(EFI_VA_END);
pud_k = pud_offset(pgd_k, 0);
num_entries = pud_index(EFI_VA_END);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
pud_efi = pud_offset(pgd_efi, EFI_VA_START);
pud_k = pud_offset(pgd_k, EFI_VA_START);
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
memcpy(pgd + pgd_index(PAGE_OFFSET),
init_mm.pgd + pgd_index(PAGE_OFFSET),
sizeof(pgd_t) * num_pgds);
}
int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
......@@ -220,8 +149,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
pgd = efi_pgd;
efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
pgd = __va(efi_scratch.efi_pgt);
/*
* It can happen that the physical address of new_memmap lands in memory
......@@ -267,14 +196,16 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
{
kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
}
static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
unsigned long flags = 0;
unsigned long pfn;
pgd_t *pgd = efi_pgd;
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
......@@ -383,7 +314,9 @@ void __init efi_runtime_mkexec(void)
void __init efi_dump_pagetable(void)
{
#ifdef CONFIG_EFI_PGT_DUMP
ptdump_walk_pgd_level(NULL, efi_pgd);
pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
ptdump_walk_pgd_level(NULL, pgd);
#endif
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment