Commit e42b4a50 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvmarm-for-v4.20' of...

Merge tag 'kvmarm-for-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm updates for 4.20

- Improved guest IPA space support (32 to 52 bits)
- RAS event delivery for 32bit
- PMU fixes
- Guest entry hardening
- Various cleanups
parents 1e58e5e5 e4e11cc0
......@@ -123,6 +123,37 @@ memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
flag KVM_VM_MIPS_VZ.
On arm64, the physical address size for a VM (IPA Size limit) is limited
to 40bits by default. The limit can be configured if the host supports the
extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
identifier, where IPA_Bits is the maximum width of any physical
address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
machine type identifier.
e.g, to configure a guest to use 48bit physical address size :
vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
The requested size (IPA_Bits) must be :
0 - Implies default size, 40bits (for backward compatibility)
or
N - Implies N bits, where N is a positive integer such that,
32 <= N <= Host_IPA_Limit
Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
is dependent on the CPU capability and the kernel configuration. The limit can
be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
ioctl() at run-time.
Please note that configuring the IPA size does not affect the capability
exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
size of the address translated by the stage2 level (guest physical to
host physical address translations).
4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
......
......@@ -12260,6 +12260,7 @@ F: Documentation/networking/rds.txt
RDT - RESOURCE ALLOCATION
M: Fenghua Yu <fenghua.yu@intel.com>
M: Reinette Chatre <reinette.chatre@intel.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: arch/x86/kernel/cpu/intel_rdt*
......@@ -15924,6 +15925,7 @@ F: net/x25/
X86 ARCHITECTURE (32-BIT AND 64-BIT)
M: Thomas Gleixner <tglx@linutronix.de>
M: Ingo Molnar <mingo@redhat.com>
M: Borislav Petkov <bp@alien8.de>
R: "H. Peter Anvin" <hpa@zytor.com>
M: x86@kernel.org
L: linux-kernel@vger.kernel.org
......@@ -15952,6 +15954,15 @@ M: Borislav Petkov <bp@alien8.de>
S: Maintained
F: arch/x86/kernel/cpu/microcode/*
X86 MM
M: Dave Hansen <dave.hansen@linux.intel.com>
M: Andy Lutomirski <luto@kernel.org>
M: Peter Zijlstra <peterz@infradead.org>
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
S: Maintained
F: arch/x86/mm/
X86 PLATFORM DRIVERS
M: Darren Hart <dvhart@infradead.org>
M: Andy Shevchenko <andy@infradead.org>
......
......@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 0
EXTRAVERSION = -rc4
EXTRAVERSION = -rc5
NAME = Merciless Moray
# *DOCUMENTATION*
......
......@@ -133,8 +133,7 @@
* space.
*/
#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL))
#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
/* Virtualization Translation Control Register (VTCR) bits */
......
......@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void)
kvm_call_hyp(__init_stage2_translation);
}
static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
return 0;
}
......@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
/*
* On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
* so any non-zero value used as type is illegal.
*/
if (type)
return -EINVAL;
return 0;
}
#endif /* __ARM_KVM_HOST_H__ */
......@@ -35,16 +35,12 @@
addr; \
})
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
*/
#define KVM_MMU_CACHE_MIN_PAGES 2
#ifndef __ASSEMBLY__
#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
......@@ -52,6 +48,13 @@
/* Ensure compatibility with arm64 */
#define VA_BITS 32
#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT
#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL)
#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK
#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
......@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) (addr)
static inline void kvm_set_ipa_limit(void) {}
#endif /* !__ASSEMBLY__ */
#endif /* __ARM_KVM_MMU_H__ */
......@@ -19,43 +19,53 @@
#ifndef __ARM_S2_PGTABLE_H_
#define __ARM_S2_PGTABLE_H_
#define stage2_pgd_none(pgd) pgd_none(pgd)
#define stage2_pgd_clear(pgd) pgd_clear(pgd)
#define stage2_pgd_present(pgd) pgd_present(pgd)
#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(pud) pud_free(NULL, pud)
#define stage2_pud_none(pud) pud_none(pud)
#define stage2_pud_clear(pud) pud_clear(pud)
#define stage2_pud_present(pud) pud_present(pud)
#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
#define stage2_pud_huge(pud) pud_huge(pud)
/*
* kvm_mmu_cache_min_pages() is the number of pages required
* to install a stage-2 translation. We pre-allocate the entry
* level table at VM creation. Since we have a 3 level page-table,
* we need only two pages to add a new mapping.
*/
#define kvm_mmu_cache_min_pages(kvm) 2
#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
#define stage2_pud_none(kvm, pud) pud_none(pud)
#define stage2_pud_clear(kvm, pud) pud_clear(pud)
#define stage2_pud_present(kvm, pud) pud_present(pud)
#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
#define stage2_pud_huge(kvm, pud) pud_huge(pud)
/* Open coded p*d_addr_end that can deal with 64bit addresses */
static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
#define stage2_pud_addr_end(addr, end) (end)
#define stage2_pud_addr_end(kvm, addr, end) (end)
static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
#define stage2_pgd_index(addr) pgd_index(addr)
#define stage2_pgd_index(kvm, addr) pgd_index(addr)
#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
#define stage2_pud_table_empty(pudp) false
#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define stage2_pud_table_empty(kvm, pudp) false
#endif /* __ARM_S2_PGTABLE_H_ */
......@@ -530,6 +530,26 @@ void arm64_set_ssbd_mitigation(bool state);
static inline void arm64_set_ssbd_mitigation(bool state) {}
#endif
static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
{
switch (parange) {
case 0: return 32;
case 1: return 36;
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5: return 48;
case 6: return 52;
/*
* A future PE could use a value unknown to the kernel.
* However, by the "D10.1.4 Principles of the ID scheme
* for fields in ID registers", ARM DDI 0487C.a, any new
* value is guaranteed to be higher than what we know already.
* As a safe limit, we return the limit supported by the kernel.
*/
default: return CONFIG_ARM64_PA_BITS;
}
}
#endif /* __ASSEMBLY__ */
#endif
......@@ -107,6 +107,7 @@
#define VTCR_EL2_RES1 (1 << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
#define VTCR_EL2_TG0_4K TCR_TG0_4K
......@@ -120,62 +121,149 @@
#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
#define VTCR_EL2_SL0_SHIFT 6
#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS_SHIFT 19
#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
/*
* We configure the Stage-2 page tables to always restrict the IPA space to be
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
* (see hyp-init.S).
* The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
*
* The magic numbers used for VTTBR_X in this patch can be found in Tables
* D4-23 and D4-25 in ARM DDI 0487A.b.
*/
#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
* VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
* Interestingly, it depends on the page size.
* See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
*
* -----------------------------------------
* | Entry level | 4K | 16K/64K |
* ------------------------------------------
* | Level: 0 | 2 | - |
* ------------------------------------------
* | Level: 1 | 1 | 2 |
* ------------------------------------------
* | Level: 2 | 0 | 1 |
* ------------------------------------------
* | Level: 3 | - | 0 |
* ------------------------------------------
*
* The table roughly translates to :
*
* SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
*
* Where TGRAN_SL0_BASE is a magic number depending on the page size:
* TGRAN_SL0_BASE(4K) = 2
* TGRAN_SL0_BASE(16K) = 3
* TGRAN_SL0_BASE(64K) = 3
* provided we take care of ruling out the unsupported cases and
* Entry_Level = 4 - Number_of_levels.
*
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 38
#ifdef CONFIG_ARM64_64K_PAGES
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K
#define VTCR_EL2_TGRAN_SL0_BASE 3UL
#elif defined(CONFIG_ARM64_16K_PAGES)
/*
* Stage2 translation configuration:
* 16kB pages (TG0 = 2)
* 2 level page tables (SL = 1)
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 42
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K
#define VTCR_EL2_TGRAN_SL0_BASE 3UL
#else /* 4K */
/*
* Stage2 translation configuration:
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
#define VTTBR_X_TGRAN_MAGIC 37
#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K
#define VTCR_EL2_TGRAN_SL0_BASE 2UL
#endif
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
#define VTCR_EL2_LVLS_TO_SL0(levels) \
((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_SL0_TO_LVLS(sl0) \
((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
#define VTCR_EL2_LVLS(vtcr) \
VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
/*
* ARM VMSAv8-64 defines an algorithm for finding the translation table
* descriptors in section D4.2.8 in ARM DDI 0487C.a.
*
* The algorithm defines the expectations on the translation table
* addresses for each level, based on PAGE_SIZE, entry level
* and the translation table size (T0SZ). The variable "x" in the
* algorithm determines the alignment of a table base address at a given
* level and thus determines the alignment of VTTBR:BADDR for stage2
* page table entry level.
* Since the number of bits resolved at the entry level could vary
* depending on the T0SZ, the value of "x" is defined based on a
* Magic constant for a given PAGE_SIZE and Entry Level. The
* intermediate levels must be always aligned to the PAGE_SIZE (i.e,
* x = PAGE_SHIFT).
*
* The value of "x" for entry level is calculated as :
* x = Magic_N - T0SZ
*
* where Magic_N is an integer depending on the page size and the entry
* level of the page table as below:
*
* --------------------------------------------
* | Entry level | 4K 16K 64K |
* --------------------------------------------
* | Level: 0 (4 levels) | 28 | - | - |
* --------------------------------------------
* | Level: 1 (3 levels) | 37 | 31 | 25 |
* --------------------------------------------
* | Level: 2 (2 levels) | 46 | 42 | 38 |
* --------------------------------------------
* | Level: 3 (1 level) | - | 53 | 51 |
* --------------------------------------------
*
* We have a magic formula for the Magic_N below:
*
* Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
*
* where Number_of_levels = (4 - Level). We are only interested in the
* value for Entry_Level for the stage2 page table.
*
* So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
*
* x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
* = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
*
* Here is one way to explain the Magic Formula:
*
* x = log2(Size_of_Entry_Level_Table)
*
* Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
* PAGE_SHIFT bits in the PTE, we have :
*
* Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
* = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
* where n = number of levels, and since each pointer is 8bytes, we have:
*
* x = Bits_Entry_Level + 3
* = IPA_SHIFT - (PAGE_SHIFT - 3) * n
*
* The only constraint here is that, we have to find the number of page table
* levels for a given IPA size (which we do, see stage2_pt_levels())
*/
#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3)))
#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
......@@ -223,6 +311,13 @@
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*
* We have
* PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
* HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
*/
#define PAR_TO_HPFAR(par) \
(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
#define kvm_arm_exception_type \
{0, "IRQ" }, \
......
......@@ -30,6 +30,7 @@
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_EL1_SERROR 1
#define ARM_EXCEPTION_TRAP 2
#define ARM_EXCEPTION_IL 3
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
......@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
#define __hyp_this_cpu_ptr(sym) \
({ \
......
......@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
struct kvm_arch {
......@@ -61,11 +61,13 @@ struct kvm_arch {
u64 vmid_gen;
u32 vmid;
/* 1-level 2nd stage table, protected by kvm->mmu_lock */
/* stage2 entry level table */
pgd_t *pgd;
/* VTTBR value associated with above pgd and vmid */
u64 vttbr;
/* VTCR_EL2 value for this VM */
u64 vtcr;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
......@@ -440,13 +442,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
static inline void __cpu_init_stage2(void)
{
u32 parange = kvm_call_hyp(__init_stage2_translation);
WARN_ONCE(parange < 40,
"PARange is %d bits, unsupported configuration!", parange);
}
static inline void __cpu_init_stage2(void) {}
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
......@@ -509,8 +505,12 @@ static inline int kvm_arm_have_ssbd(void)
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
void kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
#endif /* __ARM64_KVM_HOST_H__ */
......@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
/*
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
{
write_sysreg(kvm->arch.vtcr, vtcr_el2);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
#endif /* __ARM64_KVM_HYP_H__ */
......@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#include <asm/stage2_pgtable.h>
......@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
......@@ -517,5 +519,29 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
/*
* Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
* With v8.2 LVA extensions, 'x' should be a minimum of 6 with
* 52bit IPS.
*/
static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
{
int x = ARM64_VTTBR_X(ipa_shift, levels);
return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
}
static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
{
unsigned int x = arm64_vttbr_x(ipa_shift, levels);
return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
}
static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
{
return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
}
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
......@@ -25,6 +25,9 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_IL_BIT (1 << 20)
/* AArch32-specific ptrace requests */
#define COMPAT_PTRACE_GETREGS 12
#define COMPAT_PTRACE_SETREGS 13
......
/*
* Copyright (C) 2016 - ARM Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
#define __ARM64_S2_PGTABLE_NOPMD_H_
#include <asm/stage2_pgtable-nopud.h>
#define __S2_PGTABLE_PMD_FOLDED
#define S2_PMD_SHIFT S2_PUD_SHIFT
#define S2_PTRS_PER_PMD 1
#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
#define stage2_pud_none(pud) (0)
#define stage2_pud_present(pud) (1)
#define stage2_pud_clear(pud) do { } while (0)
#define stage2_pud_populate(pud, pmd) do { } while (0)
#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
#define stage2_pmd_free(pmd) do { } while (0)
#define stage2_pmd_addr_end(addr, end) (end)
#define stage2_pud_huge(pud) (0)
#define stage2_pmd_table_empty(pmdp) (0)
#endif
/*
* Copyright (C) 2016 - ARM Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
#define __ARM64_S2_PGTABLE_NOPUD_H_
#define __S2_PGTABLE_PUD_FOLDED
#define S2_PUD_SHIFT S2_PGDIR_SHIFT
#define S2_PTRS_PER_PUD 1
#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
#define stage2_pgd_none(pgd) (0)
#define stage2_pgd_present(pgd) (1)
#define stage2_pgd_clear(pgd) do { } while (0)
#define stage2_pgd_populate(pgd, pud) do { } while (0)
#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
#define stage2_pud_free(x) do { } while (0)
#define stage2_pud_addr_end(addr, end) (end)
#define stage2_pud_table_empty(pmdp) (0)
#endif
......@@ -19,8 +19,16 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_
#include <linux/hugetlb.h>
#include <asm/pgtable.h>
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* and depends on the number of levels in the page table. Compute the
* PGDIR_SHIFT for a given number of levels.
*/
#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
/*
* The hardware supports concatenation of up to 16 tables at stage2 entry level
* and we use the feature whenever possible.
......@@ -29,112 +37,208 @@
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
* This implies, the total number of page table levels at stage2 expected
* by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
* by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
* in normal translations(e.g, stage1), since we cannot have another level in
* the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
* the range (IPA_SHIFT, IPA_SHIFT - 4).
*/
#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
/*
* With all the supported VA_BITs and 40bit guest IPA, the following condition
* is always true:
*
* STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
*
* We base our stage-2 page table walker helpers on this assumption and
* fall back to using the host version of the helper wherever possible.
* i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
* to using the host version, since it is guaranteed it is not folded at host.
*
* If the condition breaks in the future, we can rearrange the host level
* definitions and reuse them for stage2. Till then...
*/
#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
#error "Unsupported combination of guest IPA and host VA_BITS."
#endif
/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
/*
* The number of PTRS across all concatenated stage2 tables given by the
* number of bits resolved at the initial level.
* If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
* in which case, stage2_pgd_ptrs will have one entry.
*/
#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
#define pgd_ptrs_shift(ipa, pgdir_shift) \
((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
#define __s2_pgd_ptrs(ipa, lvls) \
(1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
* levels in addition to the PGD.
* kvm_mmmu_cache_min_pages() is the number of pages required to install
* a stage-2 translation. We pre-allocate the entry level page table at
* the VM creation.
*/
#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
#if STAGE2_PGTABLE_LEVELS > 3
/* Stage2 PUD definitions when the level is present */
static inline bool kvm_stage2_has_pud(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
}
#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
#define stage2_pgd_none(pgd) pgd_none(pgd)
#define stage2_pgd_clear(pgd) pgd_clear(pgd)
#define stage2_pgd_present(pgd) pgd_present(pgd)
#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
#define stage2_pud_free(pud) pud_free(NULL, pud)
static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
{
if (kvm_stage2_has_pud(kvm))
return pgd_none(pgd);
else
return 0;
}
#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
{
if (kvm_stage2_has_pud(kvm))
pgd_clear(pgdp);
}
static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
{
phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
if (kvm_stage2_has_pud(kvm))
return pgd_present(pgd);
else
return 1;
}
return (boundary - 1 < end - 1) ? boundary : end;
static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
pgd_populate(NULL, pgd, pud);
}
static inline pud_t *stage2_pud_offset(struct kvm *kvm,
pgd_t *pgd, unsigned long address)
{
if (kvm_stage2_has_pud(kvm))
return pud_offset(pgd, address);
else
return (pud_t *)pgd;
}
#endif /* STAGE2_PGTABLE_LEVELS > 3 */
static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
pud_free(NULL, pud);
}
static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
{
if (kvm_stage2_has_pud(kvm))
return kvm_page_empty(pudp);
else
return false;
}
#if STAGE2_PGTABLE_LEVELS > 2
static inline phys_addr_t
stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pud(kvm)) {
phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
/* Stage2 PMD definitions when the level is present */
static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
{
return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
}
#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
#define stage2_pud_none(pud) pud_none(pud)
#define stage2_pud_clear(pud) pud_clear(pud)
#define stage2_pud_present(pud) pud_present(pud)
#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_none(pud);
else
return 0;
}
static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pmd(kvm))
pud_clear(pud);
}
#define stage2_pud_huge(pud) pud_huge(pud)
#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_present(pud);
else
return 1;
}
static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
{
phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
if (kvm_stage2_has_pmd(kvm))
pud_populate(NULL, pud, pmd);
}
return (boundary - 1 < end - 1) ? boundary : end;
static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
pud_t *pud, unsigned long address)
{
if (kvm_stage2_has_pmd(kvm))
return pmd_offset(pud, address);
else
return (pmd_t *)pud;
}
#endif /* STAGE2_PGTABLE_LEVELS > 2 */
static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
pmd_free(NULL, pmd);
}
static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
{
if (kvm_stage2_has_pmd(kvm))
return pud_huge(pud);
else
return 0;
}
static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
{
if (kvm_stage2_has_pmd(kvm))
return kvm_page_empty(pmdp);
else
return 0;
}
#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
static inline phys_addr_t
stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
if (kvm_stage2_has_pmd(kvm)) {
phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
#if STAGE2_PGTABLE_LEVELS == 2
#include <asm/stage2_pgtable-nopmd.h>
#elif STAGE2_PGTABLE_LEVELS == 3
#include <asm/stage2_pgtable-nopud.h>
#endif
return (boundary - 1 < end - 1) ? boundary : end;
} else {
return end;
}
}
static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
{
return kvm_page_empty(ptep);
}
#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
{
return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
}
static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
static inline phys_addr_t
stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
return (boundary - 1 < end - 1) ? boundary : end;
}
......
......@@ -338,15 +338,15 @@ int __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_CORTEX_A53;
case ARM_CPU_PART_CORTEX_A57:
return KVM_ARM_TARGET_CORTEX_A57;
};
}
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
case APM_CPU_PART_POTENZA:
return KVM_ARM_TARGET_XGENE_POTENZA;
};
}
break;
};
}
/* Return a default generic target */
return KVM_ARM_TARGET_GENERIC_V8;
......
......@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;
case ARM_EXCEPTION_IL:
/*
* We attempted an illegal exception return. Guest state must
* have been corrupted somehow. Give up.
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return -EINVAL;
default:
kvm_pr_unimpl("Unsupported exception type: %d",
exception_index);
......
......@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
# KVM code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may
......
......@@ -162,6 +162,20 @@ el1_error:
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
el2_sync:
/* Check for illegal exception return, otherwise panic */
mrs x0, spsr_el2
/* if this was something else, then panic! */
tst x0, #PSR_IL_BIT
b.eq __hyp_panic
/* Let's attempt a recovery from the illegal exception return */
get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IL
b __guest_exit
el2_error:
ldp x0, x1, [sp], #16
......@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector)
invalid_vect el2t_fiq_invalid // FIQ EL2t
invalid_vect el2t_error_invalid // Error EL2t
invalid_vect el2h_sync_invalid // Synchronous EL2h
valid_vect el2_sync // Synchronous EL2h
invalid_vect el2h_irq_invalid // IRQ EL2h
invalid_vect el2h_fiq_invalid // FIQ EL2h
valid_vect el2_error // Error EL2h
......
/*
* Copyright (C) 2016 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/types.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
u32 __hyp_text __init_stage2_translation(void)
{
u64 val = VTCR_EL2_FLAGS;
u64 parange;
u64 tmp;
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
* bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
* PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
*/
parange = read_sysreg(id_aa64mmfr0_el1) & 7;
if (parange > ID_AA64MMFR0_PARANGE_MAX)
parange = ID_AA64MMFR0_PARANGE_MAX;
val |= parange << 16;
/* Compute the actual PARange... */
switch (parange) {
case 0:
parange = 32;
break;
case 1:
parange = 36;
break;
case 2:
parange = 40;
break;
case 3:
parange = 42;
break;
case 4:
parange = 44;
break;
case 5:
default:
parange = 48;
break;
}
/*
* ... and clamp it to 40 bits, unless we have some braindead
* HW that implements less than that. In all cases, we'll
* return that value for the rest of the kernel to decide what
* to do.
*/
val |= 64 - (parange > 40 ? 40 : parange);
/*
* Check the availability of Hardware Access Flag / Dirty Bit
* Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
if (tmp)
val |= VTCR_EL2_HA;
/*
* Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
* bit in VTCR_EL2.
*/
tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT;
write_sysreg(val, vtcr_el2);
return parange;
}
......@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
static void __hyp_text __activate_vm(struct kvm *kvm)
{
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
......@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
*hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
*hpfar = PAR_TO_HPFAR(tmp);
return true;
}
......
......@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
static void __hyp_text
__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
u64 pstate = ctxt->gp_regs.regs.pstate;
u64 mode = pstate & PSR_AA32_MODE_MASK;
/*
* Safety check to ensure we're setting the CPU up to enter the guest
* in a less privileged mode.
*
* If we are attempting a return to EL2 or higher in AArch64 state,
* program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
* we'll take an illegal exception state exception immediately after
* the ERET to the guest. Attempts to return to AArch32 Hyp will
* result in an illegal exception return because EL2's execution state
* is determined by SCR_EL3.RW.
*/
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
write_sysreg_el2(pstate, spsr);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
......
......@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
* bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
* let's flip TGE before executing the TLB operation.
*/
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
......@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
{
write_sysreg(kvm->arch.vttbr, vttbr_el2);
__load_guest_stage2(kvm);
isb();
}
......
......@@ -26,6 +26,7 @@
#include <kvm/arm_arch_timer.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
......@@ -33,6 +34,9 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
/* Maximum phys_shift supported for any VM on this host */
static u32 kvm_ipa_limit;
/*
* ARMv8 Reset Values
*/
......@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void)
}
/**
* kvm_arch_dev_ioctl_check_extension
* kvm_arch_vm_ioctl_check_extension
*
* We currently assume that the number of HW registers is uniform
* across all CPUs (see cpuinfo_sanity_check).
*/
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
......@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
case KVM_CAP_ARM_VM_IPA_SIZE:
r = kvm_ipa_limit;
break;
default:
r = 0;
}
......@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu);
}
void kvm_set_ipa_limit(void)
{
unsigned int ipa_max, pa_max, va_max, parange;
parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
/* Clamp the IPA limit to the PA size supported by the kernel */
ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
/*
* Since our stage2 table is dependent on the stage1 page table code,
* we must always honor the following condition:
*
* Number of levels in Stage1 >= Number of levels in Stage2.
*
* So clamp the ipa limit further down to limit the number of levels.
* Since we can concatenate upto 16 tables at entry level, we could
* go upto 4bits above the maximum VA addressible with the current
* number of levels.
*/
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
va_max += 4;
if (va_max < ipa_max)
ipa_max = va_max;
/*
* If the final limit is lower than the real physical address
* limit of the CPUs, report the reason.
*/
if (ipa_max < pa_max)
pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
(va_max < pa_max) ? "Virtual" : "Physical");
WARN(ipa_max < KVM_PHYS_SHIFT,
"KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
kvm_ipa_limit = ipa_max;
kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
}
/*
* Configure the VTCR_EL2 for this VM. The VTCR value is common
* across all the physical CPUs on the system. We use system wide
* sanitised values to fill in different fields, except for Hardware
* Management of Access Flags. HA Flag is set unconditionally on
* all CPUs, as it is safe to run with or without the feature and
* the bit is RES0 on CPUs that don't support it.
*/
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
{
u64 vtcr = VTCR_EL2_FLAGS;
u32 parange, phys_shift;
u8 lvls;
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
return -EINVAL;
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
if (phys_shift) {
if (phys_shift > kvm_ipa_limit ||
phys_shift < 32)
return -EINVAL;
} else {
phys_shift = KVM_PHYS_SHIFT;
}
parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
if (parange > ID_AA64MMFR0_PARANGE_MAX)
parange = ID_AA64MMFR0_PARANGE_MAX;
vtcr |= parange << VTCR_EL2_PS_SHIFT;
vtcr |= VTCR_EL2_T0SZ(phys_shift);
/*
* Use a minimum 2 level page table to prevent splitting
* host PMD huge pages at stage2.
*/
lvls = stage2_pgtable_levels(phys_shift);
if (lvls < 2)
lvls = 2;
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
/*
* Enable the Hardware Access Flag management, unconditionally
* on all CPUs. The features is RES0 on CPUs without the support
* and must be ignored by the CPUs.
*/
vtcr |= VTCR_EL2_HA;
/* Set the vmid bits */
vtcr |= (kvm_get_vmid_bits() == 16) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT;
kvm->arch.vtcr = vtcr;
return 0;
}
......@@ -14,6 +14,16 @@
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
/*
* Exposed to assembly code for setting up initial page tables. Cannot be
* calculated in assembly code (fixmap entries are an enum), but is sanity
* checked in the actual fixmap C code to make sure that the fixmap is
* covered fully.
*/
#define FIXMAP_PMD_NUM 2
/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
#define FIXMAP_PMD_TOP 507
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
#include <asm/acpi.h>
......
......@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
void __init mem_encrypt_free_decrypted_mem(void);
bool sme_active(void);
bool sev_active(void);
#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0ULL
......@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
#define __bss_decrypted
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
......@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
......@@ -14,6 +14,7 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
#include <asm/fixmap.h>
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
......@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512];
extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
......
......@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
e <= QOS_L3_MBM_LOCAL_EVENT_ID);
}
struct rdt_parse_data {
struct rdtgroup *rdtgrp;
char *buf;
};
/**
* struct rdt_resource - attributes of an RDT resource
* @rid: The index of the resource
......@@ -423,16 +428,19 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
const char *format_str;
int (*parse_ctrlval) (void *data, struct rdt_resource *r,
struct rdt_domain *d);
int (*parse_ctrlval)(struct rdt_parse_data *data,
struct rdt_resource *r,
struct rdt_domain *d);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
unsigned long fflags;
};
int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
......@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
void free_rmid(u32 rmid);
......
......@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
{
unsigned long data;
char *buf = _buf;
unsigned long bw_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
}
if (!bw_validate(buf, &data, r))
if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
d->new_ctrl = data;
d->new_ctrl = bw_val;
d->have_new_ctrl = true;
return 0;
......@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
struct rdt_cbm_parse_data {
struct rdtgroup *rdtgrp;
char *buf;
};
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
struct rdt_domain *d)
{
struct rdt_cbm_parse_data *data = _data;
struct rdtgroup *rdtgrp = data->rdtgrp;
u32 cbm_val;
......@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
static int parse_line(char *line, struct rdt_resource *r,
struct rdtgroup *rdtgrp)
{
struct rdt_cbm_parse_data data;
struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
unsigned long dom_id;
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
r->rid == RDT_RESOURCE_MBA) {
rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
return -EINVAL;
}
next:
if (!line || line[0] == '\0')
return 0;
......
......@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
* limited as the number of resources grows.
*/
static int closid_free_map;
static int closid_free_map_len;
int closids_supported(void)
{
return closid_free_map_len;
}
static void closid_init(void)
{
......@@ -111,6 +117,7 @@ static void closid_init(void)
/* CLOSID 0 is always reserved for the default group */
closid_free_map &= ~1;
closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
......@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
for (i = 0; i < r->num_closid; i++, ctrl++) {
for (i = 0; i < closids_supported(); i++, ctrl++) {
if (!closid_allocated(i))
continue;
mode = rdtgroup_mode_by_closid(i);
......@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
for (i = 0; i < r->num_closid; i++, ctrl++) {
for (i = 0; i < closids_supported(); i++, ctrl++) {
ctrl_b = (unsigned long *)ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
......@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
struct rdt_resource *r;
bool has_cache = false;
struct rdt_domain *d;
for_each_alloc_enabled_rdt_resource(r) {
if (r->rid == RDT_RESOURCE_MBA)
continue;
has_cache = true;
list_for_each_entry(d, &r->domains, list) {
if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
rdtgrp->closid, false))
rdtgrp->closid, false)) {
rdt_last_cmd_puts("schemata overlaps\n");
return false;
}
}
}
if (!has_cache) {
rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
return false;
}
return true;
}
......@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
rdtgrp->mode = RDT_MODE_SHAREABLE;
} else if (!strcmp(buf, "exclusive")) {
if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
rdt_last_cmd_printf("schemata overlaps\n");
ret = -EINVAL;
goto out;
}
......@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
bool sep = false;
u32 cbm;
bool sep;
u32 ctrl;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
......@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
for_each_alloc_enabled_rdt_resource(r) {
sep = false;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
......@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
cbm = d->ctrl_val[rdtgrp->closid];
size = rdtgroup_cbm_to_size(r, d, cbm);
ctrl = (!is_mba_sc(r) ?
d->ctrl_val[rdtgrp->closid] :
d->mbps_val[rdtgrp->closid]);
if (r->rid == RDT_RESOURCE_MBA)
size = ctrl;
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
seq_printf(s, "%d=%u", d->id, size);
sep = true;
......@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
/*
* Only initialize default allocations for CBM cache
* resources
*/
if (r->rid == RDT_RESOURCE_MBA)
continue;
list_for_each_entry(d, &r->domains, list) {
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
ctrl = d->ctrl_val;
for (i = 0; i < r->num_closid; i++, ctrl++) {
for (i = 0; i < closids_supported(); i++, ctrl++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
......@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
}
for_each_alloc_enabled_rdt_resource(r) {
/*
* Only initialize default allocations for CBM cache
* resources
*/
if (r->rid == RDT_RESOURCE_MBA)
continue;
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("failed to initialize allocations\n");
......
......@@ -35,6 +35,7 @@
#include <asm/bootparam_utils.h>
#include <asm/microcode.h>
#include <asm/kasan.h>
#include <asm/fixmap.h>
/*
* Manage page tables very early on.
......@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
......@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pud[511] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
pmd[506] += load_delta;
for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
pmd[i] += load_delta;
/*
* Set up the identity mapping for the switchover. These
......@@ -234,6 +237,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
/*
* Clear the memory encryption mask from the .bss..decrypted section.
* The bss section will be memset to zero later in the initialization so
* there is no need to zero it after changing the memory encryption
* attribute.
*/
if (mem_encrypt_active()) {
vaddr = (unsigned long)__start_bss_decrypted;
vaddr_end = (unsigned long)__end_bss_decrypted;
for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
i = pmd_index(vaddr);
pmd[i] -= sme_get_me_mask();
}
}
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
......
......@@ -24,6 +24,7 @@
#include "../entry/calling.h"
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/fixmap.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
......@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_fixmap_pgt)
.fill 506,8,0
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
.fill 5,8,0
.fill (512 - 4 - FIXMAP_PMD_NUM),8,0
pgtno = 0
.rept (FIXMAP_PMD_NUM)
.quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
+ _PAGE_TABLE_NOENC;
pgtno = pgtno + 1
.endr
/* 6 MB reserved space + a 2MB hole */
.fill 4,8,0
NEXT_PAGE(level1_fixmap_pgt)
.rept (FIXMAP_PMD_NUM)
.fill 512,8,0
.endr
#undef PMDS
......
......@@ -28,6 +28,7 @@
#include <linux/sched/clock.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/set_memory.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
......@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
static struct pvclock_wall_clock wall_clock;
hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
static struct pvclock_wall_clock wall_clock __bss_decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
static struct pvclock_vsyscall_time_info *hvclock_mem;
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
......@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
static void __init kvmclock_init_mem(void)
{
unsigned long ncpus;
unsigned int order;
struct page *p;
int r;
if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
return;
ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
order = get_order(ncpus * sizeof(*hvclock_mem));
p = alloc_pages(GFP_KERNEL, order);
if (!p) {
pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
return;
}
hvclock_mem = page_address(p);
/*
* hvclock is shared between the guest and the hypervisor, must
* be mapped decrypted.
*/
if (sev_active()) {
r = set_memory_decrypted((unsigned long) hvclock_mem,
1UL << order);
if (r) {
__free_pages(p, order);
hvclock_mem = NULL;
pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
return;
}
}
memset(hvclock_mem, 0, PAGE_SIZE << order);
}
static int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
......@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
kvmclock_init_mem();
return 0;
}
early_initcall(kvm_setup_vsyscall_timeinfo);
......@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
else if (hvclock_mem)
p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
else
p = kzalloc(sizeof(*p), GFP_KERNEL);
return -ENOMEM;
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
......
......@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
......@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
......
......@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
/*
* This section contains data which will be mapped as decrypted. Memory
* encryption operates on a page basis. Make this section PMD-aligned
* to avoid splitting the pages while mapping the section early.
*
* Note: We use a separate section so that only this section gets
* decrypted to avoid exposing more than we wish.
*/
#define BSS_DECRYPTED \
. = ALIGN(PMD_SIZE); \
__start_bss_decrypted = .; \
*(.bss..decrypted); \
. = ALIGN(PAGE_SIZE); \
__start_bss_decrypted_unused = .; \
. = ALIGN(PMD_SIZE); \
__end_bss_decrypted = .; \
#else
#define X86_ALIGN_RODATA_BEGIN
......@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
#define BSS_DECRYPTED
#endif
......@@ -355,6 +373,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
}
......
......@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}
void __weak mem_encrypt_free_decrypted_mem(void) { }
void __ref free_initmem(void)
{
e820__reallocate_tables();
mem_encrypt_free_decrypted_mem();
free_kernel_image_pages(&__init_begin, &__init_end);
}
......
......@@ -348,6 +348,30 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);
/* Architecture __weak replacement functions */
void __init mem_encrypt_free_decrypted_mem(void)
{
unsigned long vaddr, vaddr_end, npages;
int r;
vaddr = (unsigned long)__start_bss_decrypted_unused;
vaddr_end = (unsigned long)__end_bss_decrypted;
npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
/*
* The unused memory range was mapped decrypted, change the encryption
* attribute from decrypted to encrypted before freeing it.
*/
if (mem_encrypt_active()) {
r = set_memory_encrypted(vaddr, npages);
if (r) {
pr_warn("failed to free unused decrypted pages\n");
return;
}
}
free_init_pages("unused decrypted", vaddr, vaddr_end);
}
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)
......
......@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
unsigned long address = __fix_to_virt(idx);
#ifdef CONFIG_X86_64
/*
* Ensure that the static initial page tables are covering the
* fixmap completely.
*/
BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
(FIXMAP_PMD_NUM * PTRS_PER_PTE));
#endif
if (idx >= __end_of_fixed_addresses) {
BUG();
return;
......
......@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
/* L3_k[511][506] -> level1_fixmap_pgt */
/* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
convert_pfn_mfn(level2_fixmap_pgt);
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
......@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
for (i = 0; i < FIXMAP_PMD_NUM; i++) {
set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
PAGE_KERNEL_RO);
}
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
......
......@@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
struct pt_regs regs;
struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();
......
......@@ -1684,7 +1684,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
const int sgrp = op_stat_group(req_op);
int cpu = part_stat_lock();
part_stat_add(cpu, part, ticks[sgrp], duration);
part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
part_round_stats(q, cpu, part);
part_dec_in_flight(q, part, op_is_write(req_op));
......
......@@ -2733,17 +2733,15 @@ void blk_account_io_done(struct request *req, u64 now)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
unsigned long duration;
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
part_stat_inc(cpu, part, ios[sgrp]);
part_stat_add(cpu, part, ticks[sgrp], duration);
part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rq_data_dir(req));
......
......@@ -1343,18 +1343,18 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_read(hd, ios[STAT_READ]),
part_stat_read(hd, merges[STAT_READ]),
part_stat_read(hd, sectors[STAT_READ]),
jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
(unsigned int)part_stat_read_msecs(hd, STAT_READ),
part_stat_read(hd, ios[STAT_WRITE]),
part_stat_read(hd, merges[STAT_WRITE]),
part_stat_read(hd, sectors[STAT_WRITE]),
jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
(unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
inflight[0],
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
part_stat_read(hd, ios[STAT_DISCARD]),
part_stat_read(hd, merges[STAT_DISCARD]),
part_stat_read(hd, sectors[STAT_DISCARD]),
jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
(unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
);
}
disk_part_iter_exit(&piter);
......
......@@ -136,18 +136,18 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, ios[STAT_READ]),
part_stat_read(p, merges[STAT_READ]),
(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
(unsigned int)part_stat_read_msecs(p, STAT_READ),
part_stat_read(p, ios[STAT_WRITE]),
part_stat_read(p, merges[STAT_WRITE]),
(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
(unsigned int)part_stat_read_msecs(p, STAT_WRITE),
inflight[0],
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)),
part_stat_read(p, ios[STAT_DISCARD]),
part_stat_read(p, merges[STAT_DISCARD]),
(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
(unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
}
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
......
......@@ -90,14 +90,17 @@ config EFI_ARMSTUB
config EFI_ARMSTUB_DTB_LOADER
bool "Enable the DTB loader"
depends on EFI_ARMSTUB
default y
help
Select this config option to add support for the dtb= command
line parameter, allowing a device tree blob to be loaded into
memory from the EFI System Partition by the stub.
The device tree is typically provided by the platform or by
the bootloader, so this option is mostly for development
purposes only.
If the device tree is provided by the platform or by
the bootloader this option may not be needed.
But, for various development reasons and to maintain existing
functionality for bootloaders that do not have such support
this option is necessary.
config EFI_BOOTLOADER_CONTROL
tristate "EFI Bootloader Control"
......
......@@ -528,8 +528,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev,
}
static const struct of_device_id usbhs_child_match_table[] = {
{ .compatible = "ti,omap-ehci", },
{ .compatible = "ti,omap-ohci", },
{ .compatible = "ti,ehci-omap", },
{ .compatible = "ti,ohci-omap3", },
{ }
};
......@@ -855,6 +855,7 @@ static struct platform_driver usbhs_omap_driver = {
.pm = &usbhsomap_dev_pm_ops,
.of_match_table = usbhs_omap_dt_ids,
},
.probe = usbhs_omap_probe,
.remove = usbhs_omap_remove,
};
......@@ -864,9 +865,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI");
static int __init omap_usbhs_drvinit(void)
static int omap_usbhs_drvinit(void)
{
return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe);
return platform_driver_register(&usbhs_omap_driver);
}
/*
......@@ -878,7 +879,7 @@ static int __init omap_usbhs_drvinit(void)
*/
fs_initcall_sync(omap_usbhs_drvinit);
static void __exit omap_usbhs_drvexit(void)
static void omap_usbhs_drvexit(void)
{
platform_driver_unregister(&usbhs_omap_driver);
}
......
......@@ -379,7 +379,7 @@ static const struct intel_padgroup cnlh_community1_gpps[] = {
static const struct intel_padgroup cnlh_community3_gpps[] = {
CNL_GPP(0, 155, 178, 192), /* GPP_K */
CNL_GPP(1, 179, 202, 224), /* GPP_H */
CNL_GPP(2, 203, 215, 258), /* GPP_E */
CNL_GPP(2, 203, 215, 256), /* GPP_E */
CNL_GPP(3, 216, 239, 288), /* GPP_F */
CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */
};
......
......@@ -747,13 +747,63 @@ static const struct pinctrl_desc intel_pinctrl_desc = {
.owner = THIS_MODULE,
};
/**
* intel_gpio_to_pin() - Translate from GPIO offset to pin number
* @pctrl: Pinctrl structure
* @offset: GPIO offset from gpiolib
* @commmunity: Community is filled here if not %NULL
* @padgrp: Pad group is filled here if not %NULL
*
* When coming through gpiolib irqchip, the GPIO offset is not
* automatically translated to pinctrl pin number. This function can be
* used to find out the corresponding pinctrl pin.
*/
static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset,
const struct intel_community **community,
const struct intel_padgroup **padgrp)
{
int i;
for (i = 0; i < pctrl->ncommunities; i++) {
const struct intel_community *comm = &pctrl->communities[i];
int j;
for (j = 0; j < comm->ngpps; j++) {
const struct intel_padgroup *pgrp = &comm->gpps[j];
if (pgrp->gpio_base < 0)
continue;
if (offset >= pgrp->gpio_base &&
offset < pgrp->gpio_base + pgrp->size) {
int pin;
pin = pgrp->base + offset - pgrp->gpio_base;
if (community)
*community = comm;
if (padgrp)
*padgrp = pgrp;
return pin;
}
}
}
return -EINVAL;
}
static int intel_gpio_get(struct gpio_chip *chip, unsigned offset)
{
struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
void __iomem *reg;
u32 padcfg0;
int pin;
pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
if (pin < 0)
return -EINVAL;
reg = intel_get_padcfg(pctrl, offset, PADCFG0);
reg = intel_get_padcfg(pctrl, pin, PADCFG0);
if (!reg)
return -EINVAL;
......@@ -770,8 +820,13 @@ static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
unsigned long flags;
void __iomem *reg;
u32 padcfg0;
int pin;
pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
if (pin < 0)
return;
reg = intel_get_padcfg(pctrl, offset, PADCFG0);
reg = intel_get_padcfg(pctrl, pin, PADCFG0);
if (!reg)
return;
......@@ -790,8 +845,13 @@ static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
void __iomem *reg;
u32 padcfg0;
int pin;
reg = intel_get_padcfg(pctrl, offset, PADCFG0);
pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
if (pin < 0)
return -EINVAL;
reg = intel_get_padcfg(pctrl, pin, PADCFG0);
if (!reg)
return -EINVAL;
......@@ -827,51 +887,6 @@ static const struct gpio_chip intel_gpio_chip = {
.set_config = gpiochip_generic_config,
};
/**
* intel_gpio_to_pin() - Translate from GPIO offset to pin number
* @pctrl: Pinctrl structure
* @offset: GPIO offset from gpiolib
* @commmunity: Community is filled here if not %NULL
* @padgrp: Pad group is filled here if not %NULL
*
* When coming through gpiolib irqchip, the GPIO offset is not
* automatically translated to pinctrl pin number. This function can be
* used to find out the corresponding pinctrl pin.
*/
static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset,
const struct intel_community **community,
const struct intel_padgroup **padgrp)
{
int i;
for (i = 0; i < pctrl->ncommunities; i++) {
const struct intel_community *comm = &pctrl->communities[i];
int j;
for (j = 0; j < comm->ngpps; j++) {
const struct intel_padgroup *pgrp = &comm->gpps[j];
if (pgrp->gpio_base < 0)
continue;
if (offset >= pgrp->gpio_base &&
offset < pgrp->gpio_base + pgrp->size) {
int pin;
pin = pgrp->base + offset - pgrp->gpio_base;
if (community)
*community = comm;
if (padgrp)
*padgrp = pgrp;
return pin;
}
}
}
return -EINVAL;
}
static int intel_gpio_irq_reqres(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
......
......@@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
return ret;
for (i = 0; i < count; i++) {
/* Retry eagain maps */
if (map_ops[i].status == GNTST_eagain)
gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
&map_ops[i].status, __func__);
if (map_ops[i].status == GNTST_okay) {
switch (map_ops[i].status) {
case GNTST_okay:
{
struct xen_page_foreign *foreign;
SetPageForeign(pages[i]);
foreign = xen_page_foreign(pages[i]);
foreign->domid = map_ops[i].dom;
foreign->gref = map_ops[i].ref;
break;
}
case GNTST_no_device_space:
pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
break;
case GNTST_eagain:
/* Retry eagain maps */
gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
map_ops + i,
&map_ops[i].status, __func__);
/* Test status in next loop iteration. */
i--;
break;
default:
break;
}
}
......
......@@ -83,10 +83,10 @@ struct partition {
} __attribute__((packed));
struct disk_stats {
u64 nsecs[NR_STAT_GROUPS];
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
unsigned long merges[NR_STAT_GROUPS];
unsigned long ticks[NR_STAT_GROUPS];
unsigned long io_ticks;
unsigned long time_in_queue;
};
......@@ -354,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part)
#endif /* CONFIG_SMP */
#define part_stat_read_msecs(part, which) \
div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
#define part_stat_read_accum(part, field) \
(part_stat_read(part, field[STAT_READ]) + \
part_stat_read(part, field[STAT_WRITE]) + \
......
......@@ -357,6 +357,8 @@
#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
#define GITS_BASER_NR_REGS 8
#define GITS_BASER_VALID (1ULL << 63)
......@@ -388,6 +390,9 @@
#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
#define GITS_BASER_PHYS_52_to_48(phys) \
(((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
#define GITS_BASER_ADDR_48_to_52(baser) \
(((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
#define GITS_BASER_SHAREABILITY_SHIFT (10)
#define GITS_BASER_InnerShareable \
GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
......
......@@ -21,7 +21,7 @@
/*
* Regulator configuration
*/
/* DA9063 regulator IDs */
/* DA9063 and DA9063L regulator IDs */
enum {
/* BUCKs */
DA9063_ID_BCORE1,
......@@ -37,18 +37,20 @@ enum {
DA9063_ID_BMEM_BIO_MERGED,
/* When two BUCKs are merged, they cannot be reused separately */
/* LDOs */
/* LDOs on both DA9063 and DA9063L */
DA9063_ID_LDO3,
DA9063_ID_LDO7,
DA9063_ID_LDO8,
DA9063_ID_LDO9,
DA9063_ID_LDO11,
/* DA9063-only LDOs */
DA9063_ID_LDO1,
DA9063_ID_LDO2,
DA9063_ID_LDO3,
DA9063_ID_LDO4,
DA9063_ID_LDO5,
DA9063_ID_LDO6,
DA9063_ID_LDO7,
DA9063_ID_LDO8,
DA9063_ID_LDO9,
DA9063_ID_LDO10,
DA9063_ID_LDO11,
};
/* Regulators platform data */
......
......@@ -757,6 +757,15 @@ struct kvm_ppc_resize_hpt {
#define KVM_S390_SIE_PAGE_OFFSET 1
/*
* On arm64, machine type can be used to request the physical
* address size for the VM. Bits[7-0] are reserved for the guest
* PA size shift (i.e, log2(PA_Size)). For backward compatibility,
* value 0 implies the default IPA size, 40bits.
*/
#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL
#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \
((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
/*
* ioctls for /dev/kvm fds:
*/
......@@ -965,6 +974,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_COALESCED_PIO 162
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
#define KVM_CAP_EXCEPTION_PAYLOAD 164
#define KVM_CAP_ARM_VM_IPA_SIZE 165
#ifdef KVM_CAP_IRQ_ROUTING
......
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o
......@@ -50,6 +50,7 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
#include "str_error.h"
#ifndef EM_BPF
#define EM_BPF 247
......@@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
char errmsg[STRERR_BUFSIZE];
char *cp = strerror_r(errno, errmsg, sizeof(errmsg));
char *cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
......@@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size, name, idx);
if (err) {
char errmsg[STRERR_BUFSIZE];
char *cp = strerror_r(-err, errmsg,
sizeof(errmsg));
char *cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to alloc program %s (%s): %s",
name, obj->path, cp);
......@@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, errno);
create_attr.btf_fd = 0;
......@@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name, cp);
for (j = 0; j < i; j++)
......@@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
}
ret = -LIBBPF_ERRNO__LOAD;
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
if (log_buf && log_buf[0] != '\0') {
......@@ -1654,7 +1654,7 @@ static int check_path(const char *path)
dir = dirname(dname);
if (statfs(dir, &st_fs)) {
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to statfs %s: %s\n", dir, cp);
err = -errno;
}
......@@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin program: %s\n", cp);
return -errno;
}
......@@ -1708,7 +1708,7 @@ static int make_dir(const char *path)
err = -errno;
if (err) {
cp = strerror_r(-err, errmsg, sizeof(errmsg));
cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to mkdir %s: %s\n", path, cp);
}
return err;
......@@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
}
if (bpf_obj_pin(map->fd, path)) {
cp = strerror_r(errno, errmsg, sizeof(errmsg));
cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin map: %s\n", cp);
return -errno;
}
......
// SPDX-License-Identifier: LGPL-2.1
#undef _GNU_SOURCE
#include <string.h>
#include <stdio.h>
#include "str_error.h"
/*
* Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
* libc, while checking strerror_r() return to avoid having to check this in
* all places calling it.
*/
char *str_error(int err, char *dst, int len)
{
int ret = strerror_r(err, dst, len);
if (ret)
snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
return dst;
}
// SPDX-License-Identifier: LGPL-2.1
#ifndef BPF_STR_ERROR
#define BPF_STR_ERROR
char *str_error(int err, char *dst, int len);
#endif // BPF_STR_ERROR
......@@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
mv $@+ $@
ifdef USE_ASCIIDOCTOR
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b manpage -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
......
......@@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret, cpu;
if (type)
return -EINVAL;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
if (!kvm->arch.last_vcpu_ran)
......@@ -212,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
......@@ -240,7 +242,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
default:
r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
break;
}
return r;
......@@ -544,7 +546,7 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
......@@ -1295,8 +1297,6 @@ static void cpu_init_hyp_mode(void *dummy)
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
kvm_arm_init_debug();
}
static void cpu_hyp_reset(void)
......@@ -1309,16 +1309,12 @@ static void cpu_hyp_reinit(void)
{
cpu_hyp_reset();
if (is_kernel_in_hyp_mode()) {
/*
* __cpu_init_stage2() is safe to call even if the PM
* event was cancelled before the CPU was reset.
*/
__cpu_init_stage2();
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
} else {
else
cpu_init_hyp_mode(NULL);
}
kvm_arm_init_debug();
if (vgic_present)
kvm_vgic_init_cpu_hardware();
......@@ -1412,6 +1408,8 @@ static int init_common_resources(void)
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
kvm_set_ipa_limit();
return 0;
}
......
This diff is collapsed.
......@@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
list_for_each_entry(dev, &(its)->device_list, dev_list) \
list_for_each_entry(ite, &(dev)->itt_head, ite_list)
/*
* We only implement 48 bits of PA at the moment, although the ITS
* supports more. Let's be restrictive here.
*/
#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
#define GIC_LPI_OFFSET 8192
#define VITS_TYPER_IDBITS 16
......@@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
{
int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
int esz = GITS_BASER_ENTRY_SIZE(baser);
int index;
gfn_t gfn;
......@@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
if (id >= (l1_tbl_size / esz))
return false;
addr = BASER_ADDRESS(baser) + id * esz;
addr = base + id * esz;
gfn = addr >> PAGE_SHIFT;
if (eaddr)
......@@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
/* Each 1st level entry is represented by a 64-bit value. */
if (kvm_read_guest_lock(its->dev->kvm,
BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
base + index * sizeof(indirect_ptr),
&indirect_ptr, sizeof(indirect_ptr)))
return false;
......@@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
if (!(indirect_ptr & BIT_ULL(63)))
return false;
/*
* Mask the guest physical address and calculate the frame number.
* Any address beyond our supported 48 bits of PA will be caught
* by the actual check in the final step.
*/
/* Mask the guest physical address and calculate the frame number. */
indirect_ptr &= GENMASK_ULL(51, 16);
/* Find the address of the actual entry */
......@@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg)
GITS_BASER_OUTER_CACHEABILITY_SHIFT,
vgic_sanitise_outer_cacheability);
/* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
reg &= ~GENMASK_ULL(15, 12);
/* We support only one (ITS) page size: 64K */
reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
......@@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg)
GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
vgic_sanitise_outer_cacheability);
/*
* Sanitise the physical address to be 64k aligned.
* Also limit the physical addresses to 48 bits.
*/
reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
/* Sanitise the physical address to be 64k aligned. */
reg &= ~GENMASK_ULL(15, 12);
return reg;
}
......@@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
if (!its->enabled)
return;
cbaser = CBASER_ADDRESS(its->cbaser);
cbaser = GITS_CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
......@@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
if (!(baser & GITS_BASER_VALID))
return 0;
l1_gpa = BASER_ADDRESS(baser);
l1_gpa = GITS_BASER_ADDR_48_to_52(baser);
if (baser & GITS_BASER_INDIRECT) {
l1_esz = GITS_LVL1_ENTRY_SIZE;
......@@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
{
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
u64 baser = its->baser_coll_table;
gpa_t gpa = BASER_ADDRESS(baser);
gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
struct its_collection *collection;
u64 val;
size_t max_size, filled = 0;
......@@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
if (!(baser & GITS_BASER_VALID))
return 0;
gpa = BASER_ADDRESS(baser);
gpa = GITS_BASER_ADDR_48_to_52(baser);
max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
......
......@@ -25,7 +25,7 @@
int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
phys_addr_t addr, phys_addr_t alignment)
{
if (addr & ~KVM_PHYS_MASK)
if (addr & ~kvm_phys_mask(kvm))
return -E2BIG;
if (!IS_ALIGNED(addr, alignment))
......
......@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg)
vgic_sanitise_outer_cacheability);
reg &= ~PENDBASER_RES0_MASK;
reg &= ~GENMASK_ULL(51, 48);
return reg;
}
......@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg)
vgic_sanitise_outer_cacheability);
reg &= ~PROPBASER_RES0_MASK;
reg &= ~GENMASK_ULL(51, 48);
return reg;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment