Commit 1d966eb4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes:

   - A rather involved set of memory hardware encryption fixes to
     support the early loading of microcode files via the initrd. These
     are larger than what we normally take at such a late -rc stage, but
     there are two mitigating factors: 1) much of the changes are
     limited to the SME code itself 2) being able to early load
     microcode has increased importance in the post-Meltdown/Spectre
     era.

   - An IRQ vector allocator fix

   - An Intel RDT driver use-after-free fix

   - An APIC driver bug fix/revert to make certain older systems boot
     again

   - A pkeys ABI fix

   - TSC calibration fixes

   - A kdump fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic/vector: Fix off by one in error path
  x86/intel_rdt/cqm: Prevent use after free
  x86/mm: Encrypt the initrd earlier for BSP microcode update
  x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption
  x86/mm: Centralize PMD flags in sme_encrypt_kernel()
  x86/mm: Use a struct to reduce parameters for SME PGD mapping
  x86/mm: Clean up register saving in the __enc_copy() assembly code
  x86/idt: Mark IDT tables __initconst
  Revert "x86/apic: Remove init_bsp_APIC()"
  x86/mm/pkeys: Fix fill_sig_info_pkey
  x86/tsc: Print tsc_khz, when it differs from cpu_khz
  x86/tsc: Fix erroneous TSC rate on Skylake Xeon
  x86/tsc: Future-proof native_calibrate_tsc()
  kdump: Write the correct address of mem_section into vmcoreinfo
parents 9a4ba2ab 45d55e7b
...@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); ...@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void); extern void disable_local_APIC(void);
extern void lapic_shutdown(void); extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void); extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void apic_intr_mode_init(void); extern void apic_intr_mode_init(void);
extern void setup_local_APIC(void); extern void setup_local_APIC(void);
extern void init_apic_mappings(void); extern void init_apic_mappings(void);
......
...@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data); ...@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void); void __init sme_early_init(void);
void __init sme_encrypt_kernel(void); void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp); void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
...@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } ...@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { } static inline void __init sme_early_init(void) { }
static inline void __init sme_encrypt_kernel(void) { } static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { } static inline void __init sme_enable(struct boot_params *bp) { }
static inline bool sme_active(void) { return false; } static inline bool sme_active(void) { return false; }
......
...@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void) ...@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
return APIC_SYMMETRIC_IO; return APIC_SYMMETRIC_IO;
} }
/*
* An initial setup of the virtual wire mode.
*/
void __init init_bsp_APIC(void)
{
unsigned int value;
/*
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
* Do not trust the local APIC being empty at bootup.
*/
clear_local_APIC();
/*
* Enable APIC.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
value |= APIC_SPIV_APIC_ENABLED;
#ifdef CONFIG_X86_32
/* This bit is reserved on P4/Xeon and should be cleared */
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
(boot_cpu_data.x86 == 15))
value &= ~APIC_SPIV_FOCUS_DISABLED;
else
#endif
value |= APIC_SPIV_FOCUS_DISABLED;
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
/*
* Set up the virtual wire mode.
*/
apic_write(APIC_LVT0, APIC_DM_EXTINT);
value = APIC_DM_NMI;
if (!lapic_is_integrated()) /* 82489DX */
value |= APIC_LVT_LEVEL_TRIGGER;
if (apic_extnmi == APIC_EXTNMI_NONE)
value |= APIC_LVT_MASKED;
apic_write(APIC_LVT1, value);
}
/* Init the interrupt delivery mode for the BSP */ /* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void) void __init apic_intr_mode_init(void)
{ {
......
...@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, ...@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
err = assign_irq_vector_policy(irqd, info); err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err); trace_vector_setup(virq + i, false, err);
if (err) if (err) {
irqd->chip_data = NULL;
free_apic_chip_data(apicd);
goto error; goto error;
} }
}
return 0; return 0;
error: error:
x86_vector_free_irqs(domain, virq, i + 1); x86_vector_free_irqs(domain, virq, i);
return err; return err;
} }
......
...@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) ...@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
*/ */
if (static_branch_unlikely(&rdt_mon_enable_key)) if (static_branch_unlikely(&rdt_mon_enable_key))
rmdir_mondata_subdir_allrdtgrp(r, d->id); rmdir_mondata_subdir_allrdtgrp(r, d->id);
kfree(d->ctrl_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
list_del(&d->list); list_del(&d->list);
if (is_mbm_enabled()) if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over); cancel_delayed_work(&d->mbm_over);
...@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) ...@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo); cancel_delayed_work(&d->cqm_limbo);
} }
kfree(d->ctrl_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d); kfree(d);
return; return;
} }
......
...@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
p = fixup_pointer(&phys_base, physaddr); p = fixup_pointer(&phys_base, physaddr);
*p += load_delta - sme_get_me_mask(); *p += load_delta - sme_get_me_mask();
/* Encrypt the kernel (if SME is active) */ /* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(); sme_encrypt_kernel(bp);
/* /*
* Return the SME encryption mask (if SME is active) to be used as a * Return the SME encryption mask (if SME is active) to be used as a
......
...@@ -56,7 +56,7 @@ struct idt_data { ...@@ -56,7 +56,7 @@ struct idt_data {
* Early traps running on the DEFAULT_STACK because the other interrupt * Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init(). * stacks work only after cpu_init().
*/ */
static const __initdata struct idt_data early_idts[] = { static const __initconst struct idt_data early_idts[] = {
INTG(X86_TRAP_DB, debug), INTG(X86_TRAP_DB, debug),
SYSG(X86_TRAP_BP, int3), SYSG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = { ...@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
* the traps which use them are reinitialized with IST after cpu_init() has * the traps which use them are reinitialized with IST after cpu_init() has
* set up TSS. * set up TSS.
*/ */
static const __initdata struct idt_data def_idts[] = { static const __initconst struct idt_data def_idts[] = {
INTG(X86_TRAP_DE, divide_error), INTG(X86_TRAP_DE, divide_error),
INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_NMI, nmi),
INTG(X86_TRAP_BR, bounds), INTG(X86_TRAP_BR, bounds),
...@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = { ...@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
/* /*
* The APIC and SMP idt entries * The APIC and SMP idt entries
*/ */
static const __initdata struct idt_data apic_idts[] = { static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
INTG(RESCHEDULE_VECTOR, reschedule_interrupt), INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), INTG(CALL_FUNCTION_VECTOR, call_function_interrupt),
...@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = { ...@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
* Early traps running on the DEFAULT_STACK because the other interrupt * Early traps running on the DEFAULT_STACK because the other interrupt
* stacks work only after cpu_init(). * stacks work only after cpu_init().
*/ */
static const __initdata struct idt_data early_pf_idts[] = { static const __initconst struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF, page_fault), INTG(X86_TRAP_PF, page_fault),
}; };
...@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = { ...@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
* Override for the debug_idt. Same as the default, but with interrupt * Override for the debug_idt. Same as the default, but with interrupt
* stack set to DEFAULT_STACK (0). Required for NMI trap handling. * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
*/ */
static const __initdata struct idt_data dbg_idts[] = { static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug), INTG(X86_TRAP_DB, debug),
INTG(X86_TRAP_BP, int3), INTG(X86_TRAP_BP, int3),
}; };
...@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; ...@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* The exceptions which use Interrupt stacks. They are setup after * The exceptions which use Interrupt stacks. They are setup after
* cpu_init() when the TSS has been initialized. * cpu_init() when the TSS has been initialized.
*/ */
static const __initdata struct idt_data ist_idts[] = { static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
SISTG(X86_TRAP_BP, int3, DEBUG_STACK), SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
......
...@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) ...@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip; struct irq_chip *chip = legacy_pic->chip;
int i; int i;
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
init_bsp_APIC();
#endif
legacy_pic->init(0); legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++) for (i = 0; i < nr_legacy_irqs(); i++)
......
...@@ -364,16 +364,6 @@ static void __init reserve_initrd(void) ...@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
!ramdisk_image || !ramdisk_size) !ramdisk_image || !ramdisk_size)
return; /* No initrd provided by bootloader */ return; /* No initrd provided by bootloader */
/*
* If SME is active, this memory will be marked encrypted by the
* kernel when it is accessed (including relocation). However, the
* ramdisk image was loaded decrypted by the bootloader, so make
* sure that it is encrypted before accessing it. For SEV the
* ramdisk will already be encrypted, so only do this for SME.
*/
if (sme_active())
sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
initrd_start = 0; initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped); mapped_size = memblock_mem_size(max_pfn_mapped);
......
...@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void) ...@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
case INTEL_FAM6_KABYLAKE_DESKTOP: case INTEL_FAM6_KABYLAKE_DESKTOP:
crystal_khz = 24000; /* 24.0 MHz */ crystal_khz = 24000; /* 24.0 MHz */
break; break;
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_DENVERTON: case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */ crystal_khz = 25000; /* 25.0 MHz */
break; break;
...@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void) ...@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
} }
} }
if (crystal_khz == 0)
return 0;
/* /*
* TSC frequency determined by CPUID is a "hardware reported" * TSC frequency determined by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This * frequency and is the most accurate one so far we have. This
...@@ -1315,6 +1316,12 @@ void __init tsc_init(void) ...@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000); (unsigned long)cpu_khz % 1000);
if (cpu_khz != tsc_khz) {
pr_info("Detected %lu.%03lu MHz TSC",
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
}
/* Sanitize TSC ADJUST before cyc2ns gets initialized */ /* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true); tsc_store_and_check_tsc_adjust(true);
......
...@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) ...@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4. * faulted on a pte with its pkey=4.
*/ */
static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
u32 *pkey)
{ {
/* This is effectively an #ifdef */ /* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE)) if (!boot_cpu_has(X86_FEATURE_OSPKE))
return; return;
/* Fault not from Protection Keys: nothing to do */ /* Fault not from Protection Keys: nothing to do */
if (si_code != SEGV_PKUERR) if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
return; return;
/* /*
* force_sig_info_fault() is called from a number of * force_sig_info_fault() is called from a number of
...@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, ...@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT; lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb; info.si_addr_lsb = lsb;
fill_sig_info_pkey(si_code, &info, pkey); fill_sig_info_pkey(si_signo, si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk); force_sig_info(si_signo, &info, tsk);
} }
......
...@@ -464,19 +464,29 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) ...@@ -464,19 +464,29 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
} }
static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, struct sme_populate_pgd_data {
unsigned long end) void *pgtable_area;
pgd_t *pgd;
pmdval_t pmd_flags;
pteval_t pte_flags;
unsigned long paddr;
unsigned long vaddr;
unsigned long vaddr_end;
};
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{ {
unsigned long pgd_start, pgd_end, pgd_size; unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p; pgd_t *pgd_p;
pgd_start = start & PGDIR_MASK; pgd_start = ppd->vaddr & PGDIR_MASK;
pgd_end = end & PGDIR_MASK; pgd_end = ppd->vaddr_end & PGDIR_MASK;
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
pgd_size *= sizeof(pgd_t);
pgd_p = pgd_base + pgd_index(start); pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
memset(pgd_p, 0, pgd_size); memset(pgd_p, 0, pgd_size);
} }
...@@ -484,17 +494,32 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, ...@@ -484,17 +494,32 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
#define PGD_FLAGS _KERNPG_TABLE_NOENC #define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC #define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC #define PUD_FLAGS _KERNPG_TABLE_NOENC
#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) #define PMD_FLAGS _KERNPG_TABLE_NOENC
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
(_PAGE_PAT | _PAGE_PWT))
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, #define PTE_FLAGS_DEC PTE_FLAGS
unsigned long vaddr, pmdval_t pmd_val) #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
(_PAGE_PAT | _PAGE_PWT))
#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{ {
pgd_t *pgd_p; pgd_t *pgd_p;
p4d_t *p4d_p; p4d_t *p4d_p;
pud_t *pud_p; pud_t *pud_p;
pmd_t *pmd_p; pmd_t *pmd_p;
pgd_p = pgd_base + pgd_index(vaddr); pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
if (native_pgd_val(*pgd_p)) { if (native_pgd_val(*pgd_p)) {
if (IS_ENABLED(CONFIG_X86_5LEVEL)) if (IS_ENABLED(CONFIG_X86_5LEVEL))
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
...@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, ...@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
pgd_t pgd; pgd_t pgd;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_p = pgtable_area; p4d_p = ppd->pgtable_area;
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
} else { } else {
pud_p = pgtable_area; pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
} }
...@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, ...@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
} }
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_p += p4d_index(vaddr); p4d_p += p4d_index(ppd->vaddr);
if (native_p4d_val(*p4d_p)) { if (native_p4d_val(*p4d_p)) {
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
} else { } else {
p4d_t p4d; p4d_t p4d;
pud_p = pgtable_area; pud_p = ppd->pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
native_set_p4d(p4d_p, p4d); native_set_p4d(p4d_p, p4d);
} }
} }
pud_p += pud_index(vaddr); pud_p += pud_index(ppd->vaddr);
if (native_pud_val(*pud_p)) { if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE) if (native_pud_val(*pud_p) & _PAGE_PSE)
goto out; return NULL;
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else { } else {
pud_t pud; pud_t pud;
pmd_p = pgtable_area; pmd_p = ppd->pgtable_area;
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
native_set_pud(pud_p, pud); native_set_pud(pud_p, pud);
} }
pmd_p += pmd_index(vaddr); return pmd_p;
}
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pmd_t *pmd_p;
pmd_p = sme_prepare_pgd(ppd);
if (!pmd_p)
return;
pmd_p += pmd_index(ppd->vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
native_set_pmd(pmd_p, native_make_pmd(pmd_val)); native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
}
out: static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
return pgtable_area; {
pmd_t *pmd_p;
pte_t *pte_p;
pmd_p = sme_prepare_pgd(ppd);
if (!pmd_p)
return;
pmd_p += pmd_index(ppd->vaddr);
if (native_pmd_val(*pmd_p)) {
if (native_pmd_val(*pmd_p) & _PAGE_PSE)
return;
pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
} else {
pmd_t pmd;
pte_p = ppd->pgtable_area;
memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
native_set_pmd(pmd_p, pmd);
}
pte_p += pte_index(ppd->vaddr);
if (!native_pte_val(*pte_p))
native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
}
static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
ppd->vaddr += PMD_PAGE_SIZE;
ppd->paddr += PMD_PAGE_SIZE;
}
}
static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
ppd->vaddr += PAGE_SIZE;
ppd->paddr += PAGE_SIZE;
}
}
static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
ppd->pmd_flags = pmd_flags;
ppd->pte_flags = pte_flags;
/* Save original end value since we modify the struct value */
vaddr_end = ppd->vaddr_end;
/* If start is not 2MB aligned, create PTE entries */
ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
__sme_map_range_pte(ppd);
/* Create PMD entries */
ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
__sme_map_range_pmd(ppd);
/* If end is not 2MB aligned, create PTE entries */
ppd->vaddr_end = vaddr_end;
__sme_map_range_pte(ppd);
}
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
} }
static unsigned long __init sme_pgtable_calc(unsigned long len) static unsigned long __init sme_pgtable_calc(unsigned long len)
{ {
unsigned long p4d_size, pud_size, pmd_size; unsigned long p4d_size, pud_size, pmd_size, pte_size;
unsigned long total; unsigned long total;
/* /*
* Perform a relatively simplistic calculation of the pagetable * Perform a relatively simplistic calculation of the pagetable
* entries that are needed. That mappings will be covered by 2MB * entries that are needed. Those mappings will be covered mostly
* PMD entries so we can conservatively calculate the required * by 2MB PMD entries so we can conservatively calculate the required
* number of P4D, PUD and PMD structures needed to perform the * number of P4D, PUD and PMD structures needed to perform the
* mappings. Incrementing the count for each covers the case where * mappings. For mappings that are not 2MB aligned, PTE mappings
* the addresses cross entries. * would be needed for the start and end portion of the address range
* that fall outside of the 2MB alignment. This results in, at most,
* two extra pages to hold PTE entries for each range that is mapped.
* Incrementing the count for each covers the case where the addresses
* cross entries.
*/ */
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
...@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) ...@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
} }
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
total = p4d_size + pud_size + pmd_size; total = p4d_size + pud_size + pmd_size + pte_size;
/* /*
* Now calculate the added pagetable structures needed to populate * Now calculate the added pagetable structures needed to populate
...@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) ...@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total; return total;
} }
void __init sme_encrypt_kernel(void) void __init sme_encrypt_kernel(struct boot_params *bp)
{ {
unsigned long workarea_start, workarea_end, workarea_len; unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len; unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len; unsigned long kernel_start, kernel_end, kernel_len;
unsigned long initrd_start, initrd_end, initrd_len;
struct sme_populate_pgd_data ppd;
unsigned long pgtable_area_len; unsigned long pgtable_area_len;
unsigned long paddr, pmd_flags;
unsigned long decrypted_base; unsigned long decrypted_base;
void *pgtable_area;
pgd_t *pgd;
if (!sme_active()) if (!sme_active())
return; return;
/* /*
* Prepare for encrypting the kernel by building new pagetables with * Prepare for encrypting the kernel and initrd by building new
* the necessary attributes needed to encrypt the kernel in place. * pagetables with the necessary attributes needed to encrypt the
* kernel in place.
* *
* One range of virtual addresses will map the memory occupied * One range of virtual addresses will map the memory occupied
* by the kernel as encrypted. * by the kernel and initrd as encrypted.
* *
* Another range of virtual addresses will map the memory occupied * Another range of virtual addresses will map the memory occupied
* by the kernel as decrypted and write-protected. * by the kernel and initrd as decrypted and write-protected.
* *
* The use of write-protect attribute will prevent any of the * The use of write-protect attribute will prevent any of the
* memory from being cached. * memory from being cached.
...@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void) ...@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start; kernel_len = kernel_end - kernel_start;
initrd_start = 0;
initrd_end = 0;
initrd_len = 0;
#ifdef CONFIG_BLK_DEV_INITRD
initrd_len = (unsigned long)bp->hdr.ramdisk_size |
((unsigned long)bp->ext_ramdisk_size << 32);
if (initrd_len) {
initrd_start = (unsigned long)bp->hdr.ramdisk_image |
((unsigned long)bp->ext_ramdisk_image << 32);
initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
initrd_len = initrd_end - initrd_start;
}
#endif
/* Set the encryption workarea to be immediately after the kernel */ /* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end; workarea_start = kernel_end;
...@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void) ...@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
*/ */
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
if (initrd_len)
pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
/* PUDs and PMDs needed in the current pagetables for the workarea */ /* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
/* /*
* The total workarea includes the executable encryption area and * The total workarea includes the executable encryption area and
* the pagetable area. * the pagetable area. The start of the workarea is already 2MB
* aligned, align the end of the workarea on a 2MB boundary so that
* we don't try to create/allocate PTE entries from the workarea
* before it is mapped.
*/ */
workarea_len = execute_len + pgtable_area_len; workarea_len = execute_len + pgtable_area_len;
workarea_end = workarea_start + workarea_len; workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
/* /*
* Set the address to the start of where newly created pagetable * Set the address to the start of where newly created pagetable
...@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void) ...@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
* pagetables and when the new encrypted and decrypted kernel * pagetables and when the new encrypted and decrypted kernel
* mappings are populated. * mappings are populated.
*/ */
pgtable_area = (void *)execute_end; ppd.pgtable_area = (void *)execute_end;
/* /*
* Make sure the current pagetable structure has entries for * Make sure the current pagetable structure has entries for
* addressing the workarea. * addressing the workarea.
*/ */
pgd = (pgd_t *)native_read_cr3_pa(); ppd.pgd = (pgd_t *)native_read_cr3_pa();
paddr = workarea_start; ppd.paddr = workarea_start;
while (paddr < workarea_end) { ppd.vaddr = workarea_start;
pgtable_area = sme_populate_pgd(pgd, pgtable_area, ppd.vaddr_end = workarea_end;
paddr, sme_map_range_decrypted(&ppd);
paddr + PMD_FLAGS);
paddr += PMD_PAGE_SIZE;
}
/* Flush the TLB - no globals so cr3 is enough */ /* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3()); native_write_cr3(__native_read_cr3());
/* /*
* A new pagetable structure is being built to allow for the kernel * A new pagetable structure is being built to allow for the kernel
* to be encrypted. It starts with an empty PGD that will then be * and initrd to be encrypted. It starts with an empty PGD that will
* populated with new PUDs and PMDs as the encrypted and decrypted * then be populated with new PUDs and PMDs as the encrypted and
* kernel mappings are created. * decrypted kernel mappings are created.
*/ */
pgd = pgtable_area; ppd.pgd = ppd.pgtable_area;
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
/* Add encrypted kernel (identity) mappings */
pmd_flags = PMD_FLAGS | _PAGE_ENC;
paddr = kernel_start;
while (paddr < kernel_end) {
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr,
paddr + pmd_flags);
paddr += PMD_PAGE_SIZE;
}
/* /*
* A different PGD index/entry must be used to get different * A different PGD index/entry must be used to get different
...@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void) ...@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
* the base of the mapping. * the base of the mapping.
*/ */
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
if (initrd_len) {
unsigned long check_base;
check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
decrypted_base = max(decrypted_base, check_base);
}
decrypted_base <<= PGDIR_SHIFT; decrypted_base <<= PGDIR_SHIFT;
/* Add encrypted kernel (identity) mappings */
ppd.paddr = kernel_start;
ppd.vaddr = kernel_start;
ppd.vaddr_end = kernel_end;
sme_map_range_encrypted(&ppd);
/* Add decrypted, write-protected kernel (non-identity) mappings */ /* Add decrypted, write-protected kernel (non-identity) mappings */
pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); ppd.paddr = kernel_start;
paddr = kernel_start; ppd.vaddr = kernel_start + decrypted_base;
while (paddr < kernel_end) { ppd.vaddr_end = kernel_end + decrypted_base;
pgtable_area = sme_populate_pgd(pgd, pgtable_area, sme_map_range_decrypted_wp(&ppd);
paddr + decrypted_base,
paddr + pmd_flags); if (initrd_len) {
/* Add encrypted initrd (identity) mappings */
paddr += PMD_PAGE_SIZE; ppd.paddr = initrd_start;
ppd.vaddr = initrd_start;
ppd.vaddr_end = initrd_end;
sme_map_range_encrypted(&ppd);
/*
* Add decrypted, write-protected initrd (non-identity) mappings
*/
ppd.paddr = initrd_start;
ppd.vaddr = initrd_start + decrypted_base;
ppd.vaddr_end = initrd_end + decrypted_base;
sme_map_range_decrypted_wp(&ppd);
} }
/* Add decrypted workarea mappings to both kernel mappings */ /* Add decrypted workarea mappings to both kernel mappings */
paddr = workarea_start; ppd.paddr = workarea_start;
while (paddr < workarea_end) { ppd.vaddr = workarea_start;
pgtable_area = sme_populate_pgd(pgd, pgtable_area, ppd.vaddr_end = workarea_end;
paddr, sme_map_range_decrypted(&ppd);
paddr + PMD_FLAGS);
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr + decrypted_base,
paddr + PMD_FLAGS);
paddr += PMD_PAGE_SIZE; ppd.paddr = workarea_start;
} ppd.vaddr = workarea_start + decrypted_base;
ppd.vaddr_end = workarea_end + decrypted_base;
sme_map_range_decrypted(&ppd);
/* Perform the encryption */ /* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
kernel_len, workarea_start, (unsigned long)pgd); kernel_len, workarea_start, (unsigned long)ppd.pgd);
if (initrd_len)
sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
initrd_len, workarea_start,
(unsigned long)ppd.pgd);
/* /*
* At this point we are running encrypted. Remove the mappings for * At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove * the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries. * the PGD entry/entries.
*/ */
sme_clear_pgd(pgd, kernel_start + decrypted_base, ppd.vaddr = kernel_start + decrypted_base;
kernel_end + decrypted_base); ppd.vaddr_end = kernel_end + decrypted_base;
sme_clear_pgd(&ppd);
if (initrd_len) {
ppd.vaddr = initrd_start + decrypted_base;
ppd.vaddr_end = initrd_end + decrypted_base;
sme_clear_pgd(&ppd);
}
sme_clear_pgd(pgd, workarea_start + decrypted_base, ppd.vaddr = workarea_start + decrypted_base;
workarea_end + decrypted_base); ppd.vaddr_end = workarea_end + decrypted_base;
sme_clear_pgd(&ppd);
/* Flush the TLB - no globals so cr3 is enough */ /* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3()); native_write_cr3(__native_read_cr3());
......
...@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute) ...@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
/* /*
* Entry parameters: * Entry parameters:
* RDI - virtual address for the encrypted kernel mapping * RDI - virtual address for the encrypted mapping
* RSI - virtual address for the decrypted kernel mapping * RSI - virtual address for the decrypted mapping
* RDX - length of kernel * RDX - length to encrypt
* RCX - virtual address of the encryption workarea, including: * RCX - virtual address of the encryption workarea, including:
* - stack page (PAGE_SIZE) * - stack page (PAGE_SIZE)
* - encryption routine page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE)
...@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute) ...@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
addq $PAGE_SIZE, %rax /* Workarea encryption routine */ addq $PAGE_SIZE, %rax /* Workarea encryption routine */
push %r12 push %r12
movq %rdi, %r10 /* Encrypted kernel */ movq %rdi, %r10 /* Encrypted area */
movq %rsi, %r11 /* Decrypted kernel */ movq %rsi, %r11 /* Decrypted area */
movq %rdx, %r12 /* Kernel length */ movq %rdx, %r12 /* Area length */
/* Copy encryption routine into the workarea */ /* Copy encryption routine into the workarea */
movq %rax, %rdi /* Workarea encryption routine */ movq %rax, %rdi /* Workarea encryption routine */
...@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute) ...@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
rep movsb rep movsb
/* Setup registers for call */ /* Setup registers for call */
movq %r10, %rdi /* Encrypted kernel */ movq %r10, %rdi /* Encrypted area */
movq %r11, %rsi /* Decrypted kernel */ movq %r11, %rsi /* Decrypted area */
movq %r8, %rdx /* Pagetables used for encryption */ movq %r8, %rdx /* Pagetables used for encryption */
movq %r12, %rcx /* Kernel length */ movq %r12, %rcx /* Area length */
movq %rax, %r8 /* Workarea encryption routine */ movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
...@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute) ...@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
ENTRY(__enc_copy) ENTRY(__enc_copy)
/* /*
* Routine used to encrypt kernel. * Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since * This routine must be run outside of the kernel proper since
* the kernel will be encrypted during the process. So this * the kernel will be encrypted during the process. So this
* routine is defined here and then copied to an area outside * routine is defined here and then copied to an area outside
...@@ -79,19 +79,19 @@ ENTRY(__enc_copy) ...@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
* during execution. * during execution.
* *
* On entry the registers must be: * On entry the registers must be:
* RDI - virtual address for the encrypted kernel mapping * RDI - virtual address for the encrypted mapping
* RSI - virtual address for the decrypted kernel mapping * RSI - virtual address for the decrypted mapping
* RDX - address of the pagetables to use for encryption * RDX - address of the pagetables to use for encryption
* RCX - length of kernel * RCX - length of area
* R8 - intermediate copy buffer * R8 - intermediate copy buffer
* *
* RAX - points to this routine * RAX - points to this routine
* *
* The kernel will be encrypted by copying from the non-encrypted * The area will be encrypted by copying from the non-encrypted
* kernel space to an intermediate buffer and then copying from the * memory space to an intermediate buffer and then copying from the
* intermediate buffer back to the encrypted kernel space. The physical * intermediate buffer back to the encrypted memory space. The physical
* addresses of the two kernel space mappings are the same which * addresses of the two mappings are the same which results in the area
* results in the kernel being encrypted "in place". * being encrypted "in place".
*/ */
/* Enable the new page tables */ /* Enable the new page tables */
mov %rdx, %cr3 mov %rdx, %cr3
...@@ -103,47 +103,55 @@ ENTRY(__enc_copy) ...@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
orq $X86_CR4_PGE, %rdx orq $X86_CR4_PGE, %rdx
mov %rdx, %cr4 mov %rdx, %cr4
push %r15
push %r12
movq %rcx, %r9 /* Save area length */
movq %rdi, %r10 /* Save encrypted area address */
movq %rsi, %r11 /* Save decrypted area address */
/* Set the PAT register PA5 entry to write-protect */ /* Set the PAT register PA5 entry to write-protect */
push %rcx
movl $MSR_IA32_CR_PAT, %ecx movl $MSR_IA32_CR_PAT, %ecx
rdmsr rdmsr
push %rdx /* Save original PAT value */ mov %rdx, %r15 /* Save original PAT value */
andl $0xffff00ff, %edx /* Clear PA5 */ andl $0xffff00ff, %edx /* Clear PA5 */
orl $0x00000500, %edx /* Set PA5 to WP */ orl $0x00000500, %edx /* Set PA5 to WP */
wrmsr wrmsr
pop %rdx /* RDX contains original PAT value */
pop %rcx
movq %rcx, %r9 /* Save kernel length */
movq %rdi, %r10 /* Save encrypted kernel address */
movq %rsi, %r11 /* Save decrypted kernel address */
wbinvd /* Invalidate any cache entries */ wbinvd /* Invalidate any cache entries */
/* Copy/encrypt 2MB at a time */ /* Copy/encrypt up to 2MB at a time */
movq $PMD_PAGE_SIZE, %r12
1: 1:
movq %r11, %rsi /* Source - decrypted kernel */ cmpq %r12, %r9
jnb 2f
movq %r9, %r12
2:
movq %r11, %rsi /* Source - decrypted area */
movq %r8, %rdi /* Dest - intermediate copy buffer */ movq %r8, %rdi /* Dest - intermediate copy buffer */
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ movq %r12, %rcx
rep movsb rep movsb
movq %r8, %rsi /* Source - intermediate copy buffer */ movq %r8, %rsi /* Source - intermediate copy buffer */
movq %r10, %rdi /* Dest - encrypted kernel */ movq %r10, %rdi /* Dest - encrypted area */
movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ movq %r12, %rcx
rep movsb rep movsb
addq $PMD_PAGE_SIZE, %r11 addq %r12, %r11
addq $PMD_PAGE_SIZE, %r10 addq %r12, %r10
subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ subq %r12, %r9 /* Kernel length decrement */
jnz 1b /* Kernel length not zero? */ jnz 1b /* Kernel length not zero? */
/* Restore PAT register */ /* Restore PAT register */
push %rdx /* Save original PAT value */
movl $MSR_IA32_CR_PAT, %ecx movl $MSR_IA32_CR_PAT, %ecx
rdmsr rdmsr
pop %rdx /* Restore original PAT value */ mov %r15, %rdx /* Restore original PAT value */
wrmsr wrmsr
pop %r12
pop %r15
ret ret
.L__enc_copy_end: .L__enc_copy_end:
ENDPROC(__enc_copy) ENDPROC(__enc_copy)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment