Commit 167c76e0 authored by Paul Mackerras's avatar Paul Mackerras

Merge remote-tracking branch 'remotes/powerpc/topic/ppc-kvm' into kvm-ppc-next

This merges in the POWER9 radix MMU host and guest support, which
was put into a topic branch because it touches both powerpc and
KVM code.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
parents fcd4f3c6 8cf4ecc0
...@@ -3201,6 +3201,71 @@ struct kvm_reinject_control { ...@@ -3201,6 +3201,71 @@ struct kvm_reinject_control {
pit_reinject = 0 (!reinject mode) is recommended, unless running an old pit_reinject = 0 (!reinject mode) is recommended, unless running an old
operating system that uses the PIT for timing (e.g. Linux 2.4.x). operating system that uses the PIT for timing (e.g. Linux 2.4.x).
4.99 KVM_PPC_CONFIGURE_V3_MMU
Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_ppc_mmuv3_cfg (in)
Returns: 0 on success,
-EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
-EINVAL if the configuration is invalid
This ioctl controls whether the guest will use radix or HPT (hashed
page table) translation, and sets the pointer to the process table for
the guest.
struct kvm_ppc_mmuv3_cfg {
__u64 flags;
__u64 process_table;
};
There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest
to use radix tree translation, and if clear, to use HPT translation.
KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
to be able to use the global TLB and SLB invalidation instructions;
if clear, the guest may not use these instructions.
The process_table field specifies the address and size of the guest
process table, which is in the guest's space. This field is formatted
as the second doubleword of the partition table entry, as defined in
the Power ISA V3.00, Book III section 5.7.6.1.
4.100 KVM_PPC_GET_RMMU_INFO
Capability: KVM_CAP_PPC_RADIX_MMU
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_ppc_rmmu_info (out)
Returns: 0 on success,
-EFAULT if struct kvm_ppc_rmmu_info cannot be written,
-EINVAL if no useful information can be returned
This ioctl returns a structure containing two things: (a) a list
containing supported radix tree geometries, and (b) a list that maps
page sizes to put in the "AP" (actual page size) field for the tlbie
(TLB invalidate entry) instruction.
struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};
The geometries[] field gives up to 8 supported geometries for the
radix page table, in terms of the log base 2 of the smallest page
size, and the number of bits indexed at each level of the tree, from
the PTE level up to the PGD level in that order. Any unused entries
will have 0 in the page_shift field.
The ap_encodings gives the supported page sizes and their AP field
encodings, encoded with the AP value in the top 3 bits and the log
base 2 of the page size in the bottom 6 bits.
5. The kvm_run structure 5. The kvm_run structure
------------------------ ------------------------
...@@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this ...@@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this
capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
will disable the use of APIC hardware virtualization even if supported will disable the use of APIC hardware virtualization even if supported
by the CPU, as it's incompatible with SynIC auto-EOI behavior. by the CPU, as it's incompatible with SynIC auto-EOI behavior.
8.3 KVM_CAP_PPC_RADIX_MMU
Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
processor).
8.4 KVM_CAP_PPC_HASH_MMU_V3
Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
hashed page table MMU defined in Power ISA V3.00 (as implemented in
the POWER9 processor), including in-memory segment tables.
...@@ -44,10 +44,20 @@ struct patb_entry { ...@@ -44,10 +44,20 @@ struct patb_entry {
}; };
extern struct patb_entry *partition_tb; extern struct patb_entry *partition_tb;
/* Bits in patb0 field */
#define PATB_HR (1UL << 63) #define PATB_HR (1UL << 63)
#define PATB_GR (1UL << 63)
#define RPDB_MASK 0x0ffffffffffff00fUL #define RPDB_MASK 0x0ffffffffffff00fUL
#define RPDB_SHIFT (1UL << 8) #define RPDB_SHIFT (1UL << 8)
#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
#define RTS1_MASK (3UL << RTS1_SHIFT)
#define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */
#define RTS2_MASK (7UL << RTS2_SHIFT)
#define RPDS_MASK 0x1f /* root page dir. size field */
/* Bits in patb1 field */
#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
#define PRTS_MASK 0x1f /* process table size field */
/* /*
* Limit process table to PAGE_SIZE table. This * Limit process table to PAGE_SIZE table. This
* also limit the max pid we can support. * also limit the max pid we can support.
...@@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, ...@@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
extern int (*register_process_table)(unsigned long base, unsigned long page_size, extern int (*register_process_table)(unsigned long base, unsigned long page_size,
unsigned long tbl_size); unsigned long tbl_size);
#ifdef CONFIG_PPC_PSERIES
extern void radix_init_pseries(void);
#else
static inline void radix_init_pseries(void) { };
#endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
...@@ -97,6 +97,15 @@ ...@@ -97,6 +97,15 @@
ld reg,PACAKBASE(r13); \ ld reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l; ori reg,reg,(ABS_ADDR(label))@l;
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
#define __LOAD_FAR_HANDLER(reg, label) \
ld reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l; \
addis reg,reg,(ABS_ADDR(label))@h;
/* Exception register prefixes */ /* Exception register prefixes */
#define EXC_HV H #define EXC_HV H
#define EXC_STD #define EXC_STD
...@@ -227,13 +236,41 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ...@@ -227,13 +236,41 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \ mtctr reg; \
bctr bctr
/*
* KVM requires __LOAD_FAR_HANDLER.
*
* __BRANCH_TO_KVM_EXIT branches are also a special case because they
* explicitly use r9 then reload it from PACA before branching. Hence
* the double-underscore.
*/
#define __BRANCH_TO_KVM_EXIT(area, label) \
mfctr r9; \
std r9,HSTATE_SCRATCH1(r13); \
__LOAD_FAR_HANDLER(r9, label); \
mtctr r9; \
ld r9,area+EX_R9(r13); \
bctr
#define BRANCH_TO_KVM(reg, label) \
__LOAD_FAR_HANDLER(reg, label); \
mtctr reg; \
bctr
#else #else
#define BRANCH_TO_COMMON(reg, label) \ #define BRANCH_TO_COMMON(reg, label) \
b label b label
#define BRANCH_TO_KVM(reg, label) \
b label
#define __BRANCH_TO_KVM_EXIT(area, label) \
ld r9,area+EX_R9(r13); \
b label
#endif #endif
#define __KVM_HANDLER_PROLOG(area, n) \
#define __KVM_HANDLER(area, h, n) \
BEGIN_FTR_SECTION_NESTED(947) \ BEGIN_FTR_SECTION_NESTED(947) \
ld r10,area+EX_CFAR(r13); \ ld r10,area+EX_CFAR(r13); \
std r10,HSTATE_CFAR(r13); \ std r10,HSTATE_CFAR(r13); \
...@@ -243,30 +280,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ...@@ -243,30 +280,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,HSTATE_PPR(r13); \ std r10,HSTATE_PPR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
ld r10,area+EX_R10(r13); \ ld r10,area+EX_R10(r13); \
stw r9,HSTATE_SCRATCH1(r13); \
ld r9,area+EX_R9(r13); \
std r12,HSTATE_SCRATCH0(r13); \ std r12,HSTATE_SCRATCH0(r13); \
sldi r12,r9,32; \
#define __KVM_HANDLER(area, h, n) \ ori r12,r12,(n); \
__KVM_HANDLER_PROLOG(area, n) \ /* This reloads r9 before branching to kvmppc_interrupt */ \
li r12,n; \ __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt)
b kvmppc_interrupt
#define __KVM_HANDLER_SKIP(area, h, n) \ #define __KVM_HANDLER_SKIP(area, h, n) \
cmpwi r10,KVM_GUEST_MODE_SKIP; \ cmpwi r10,KVM_GUEST_MODE_SKIP; \
ld r10,area+EX_R10(r13); \
beq 89f; \ beq 89f; \
stw r9,HSTATE_SCRATCH1(r13); \
BEGIN_FTR_SECTION_NESTED(948) \ BEGIN_FTR_SECTION_NESTED(948) \
ld r9,area+EX_PPR(r13); \ ld r10,area+EX_PPR(r13); \
std r9,HSTATE_PPR(r13); \ std r10,HSTATE_PPR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
ld r9,area+EX_R9(r13); \ ld r10,area+EX_R10(r13); \
std r12,HSTATE_SCRATCH0(r13); \ std r12,HSTATE_SCRATCH0(r13); \
li r12,n; \ sldi r12,r9,32; \
b kvmppc_interrupt; \ ori r12,r12,(n); \
/* This reloads r9 before branching to kvmppc_interrupt */ \
__BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \
89: mtocrf 0x80,r9; \ 89: mtocrf 0x80,r9; \
ld r9,area+EX_R9(r13); \ ld r9,area+EX_R9(r13); \
ld r10,area+EX_R10(r13); \
b kvmppc_skip_##h##interrupt b kvmppc_skip_##h##interrupt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
...@@ -393,12 +428,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ...@@ -393,12 +428,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD)
#define STD_RELON_EXCEPTION_HV(loc, vec, label) \ #define STD_RELON_EXCEPTION_HV(loc, vec, label) \
/* No guest interrupts come through here */ \
SET_SCRATCH0(r13); /* save r13 */ \ SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \
EXC_HV, KVMTEST_HV, vec);
#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/* This associate vector numbers with bits in paca->irq_happened */ /* This associate vector numbers with bits in paca->irq_happened */
...@@ -475,10 +510,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ...@@ -475,10 +510,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
EXC_HV, SOFTEN_NOTEST_HV) EXC_HV, SOFTEN_TEST_HV)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
/* /*
......
...@@ -218,7 +218,7 @@ end_##sname: ...@@ -218,7 +218,7 @@ end_##sname:
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#define TRAMP_KVM_BEGIN(name) \ #define TRAMP_KVM_BEGIN(name) \
TRAMP_REAL_BEGIN(name) TRAMP_VIRT_BEGIN(name)
#else #else
#define TRAMP_KVM_BEGIN(name) #define TRAMP_KVM_BEGIN(name)
#endif #endif
......
...@@ -276,6 +276,7 @@ ...@@ -276,6 +276,7 @@
#define H_GET_MPP_X 0x314 #define H_GET_MPP_X 0x314
#define H_SET_MODE 0x31C #define H_SET_MODE 0x31C
#define H_CLEAR_HPT 0x358 #define H_CLEAR_HPT 0x358
#define H_REGISTER_PROC_TBL 0x37C
#define H_SIGNAL_SYS_RESET 0x380 #define H_SIGNAL_SYS_RESET 0x380
#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET #define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
...@@ -313,6 +314,16 @@ ...@@ -313,6 +314,16 @@
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */ /* >= 0 values are CPU number */
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10
#define PROC_TABLE_NEW 0x18
#define PROC_TABLE_TYPE_MASK 0x06
#define PROC_TABLE_HPT_SLB 0x00
#define PROC_TABLE_HPT_PT 0x02
#define PROC_TABLE_RADIX 0x04
#define PROC_TABLE_GTSE 0x01
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/** /**
......
...@@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, ...@@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
unsigned long status); unsigned long status);
extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
unsigned long slb_v, unsigned long valid); unsigned long slb_v, unsigned long valid);
extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long gpa, gva_t ea, int is_store);
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
...@@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void); ...@@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void);
extern int kvmppc_mmu_hv_init(void); extern int kvmppc_mmu_hv_init(void);
extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr);
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite);
extern int kvmppc_init_vm_radix(struct kvm *kvm);
extern void kvmppc_free_radix(struct kvm *kvm);
extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void);
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
/* XXX remove this export when load_last_inst() is generic */ /* XXX remove this export when load_last_inst() is generic */
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
...@@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn, unsigned long pte_index, unsigned long avpn,
unsigned long *hpret); unsigned long *hpret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map); struct kvm_memory_slot *memslot, unsigned long *map);
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map);
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask); unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
......
...@@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) ...@@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#endif #endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static inline bool kvm_is_radix(struct kvm *kvm)
{
return kvm->arch.radix;
}
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif #endif
......
...@@ -263,7 +263,11 @@ struct kvm_arch { ...@@ -263,7 +263,11 @@ struct kvm_arch {
unsigned long hpt_mask; unsigned long hpt_mask;
atomic_t hpte_mod_interest; atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush; cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
int hpt_cma_alloc; int hpt_cma_alloc;
u8 radix;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir; struct dentry *debugfs_dir;
struct dentry *htab_dentry; struct dentry *htab_dentry;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
...@@ -603,6 +607,7 @@ struct kvm_vcpu_arch { ...@@ -603,6 +607,7 @@ struct kvm_vcpu_arch {
ulong fault_dar; ulong fault_dar;
u32 fault_dsisr; u32 fault_dsisr;
unsigned long intr_msr; unsigned long intr_msr;
ulong fault_gpa; /* guest real address of page fault (POWER9) */
#endif #endif
#ifdef CONFIG_BOOKE #ifdef CONFIG_BOOKE
...@@ -657,6 +662,7 @@ struct kvm_vcpu_arch { ...@@ -657,6 +662,7 @@ struct kvm_vcpu_arch {
int state; int state;
int ptid; int ptid;
int thread_cpu; int thread_cpu;
int prev_cpu;
bool timer_running; bool timer_running;
wait_queue_head_t cpu_run; wait_queue_head_t cpu_run;
......
...@@ -291,6 +291,8 @@ struct kvmppc_ops { ...@@ -291,6 +291,8 @@ struct kvmppc_ops {
struct irq_bypass_producer *); struct irq_bypass_producer *);
void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
struct irq_bypass_producer *); struct irq_bypass_producer *);
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
}; };
extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_hv_ops;
......
...@@ -121,6 +121,8 @@ struct of_drconf_cell { ...@@ -121,6 +121,8 @@ struct of_drconf_cell {
#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ #define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */
#define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */
/* Option vector 2: Open Firmware options supported */ /* Option vector 2: Open Firmware options supported */
#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
...@@ -151,10 +153,17 @@ struct of_drconf_cell { ...@@ -151,10 +153,17 @@ struct of_drconf_cell {
#define OV5_XCMO 0x0440 /* Page Coalescing */ #define OV5_XCMO 0x0440 /* Page Coalescing */
#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */
#define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */
#define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ #define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */
#define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
#define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */
#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */
#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */
#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */
#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */
#define OV5_MMU_SLB 0x1800 /* always use SLB */
#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */
/* Option Vector 6: IBM PAPR hints */ /* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */ #define OV6_LINUX 0x02 /* Linux is our OS */
......
...@@ -274,10 +274,14 @@ ...@@ -274,10 +274,14 @@
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
#define DSISR_NOHPTE 0x40000000 /* no translation found */ #define DSISR_NOHPTE 0x40000000 /* no translation found */
#define DSISR_PROTFAULT 0x08000000 /* protection fault */ #define DSISR_PROTFAULT 0x08000000 /* protection fault */
#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
#define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_ISSTORE 0x02000000 /* access was a store */
#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ #define DSISR_NOSEGMENT 0x00200000 /* SLB miss */
#define DSISR_KEYFAULT 0x00200000 /* Key fault */ #define DSISR_KEYFAULT 0x00200000 /* Key fault */
#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */
#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */
#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */
......
...@@ -413,6 +413,26 @@ struct kvm_get_htab_header { ...@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
__u16 n_invalid; __u16 n_invalid;
}; };
/* For KVM_PPC_CONFIGURE_V3_MMU */
struct kvm_ppc_mmuv3_cfg {
__u64 flags;
__u64 process_table; /* second doubleword of partition table entry */
};
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
/* For KVM_PPC_GET_RMMU_INFO */
struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};
/* Per-vcpu XICS interrupt controller state */ /* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
......
...@@ -498,6 +498,7 @@ int main(void) ...@@ -498,6 +498,7 @@ int main(void)
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
...@@ -537,6 +538,7 @@ int main(void) ...@@ -537,6 +538,7 @@ int main(void)
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa));
DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
......
...@@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) ...@@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
lbz r0,HSTATE_HWTHREAD_REQ(r13) lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0 cmpwi r0,0
beq 1f beq 1f
b kvm_start_guest BRANCH_TO_KVM(r10, kvm_start_guest)
1: 1:
#endif #endif
...@@ -717,13 +717,9 @@ hardware_interrupt_hv: ...@@ -717,13 +717,9 @@ hardware_interrupt_hv:
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
EXC_HV, SOFTEN_TEST_HV) EXC_HV, SOFTEN_TEST_HV)
do_kvm_H0x500:
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
FTR_SECTION_ELSE FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
EXC_STD, SOFTEN_TEST_PR) EXC_STD, SOFTEN_TEST_PR)
do_kvm_0x500:
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
EXC_REAL_END(hardware_interrupt, 0x500, 0x600) EXC_REAL_END(hardware_interrupt, 0x500, 0x600)
...@@ -737,6 +733,8 @@ hardware_interrupt_relon_hv: ...@@ -737,6 +733,8 @@ hardware_interrupt_relon_hv:
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600)
TRAMP_KVM(PACA_EXGEN, 0x500)
TRAMP_KVM_HV(PACA_EXGEN, 0x500)
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
...@@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) ...@@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00)
TRAMP_KVM(PACA_EXGEN, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00)
EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
* that support it) before changing to HMT_MEDIUM. That allows the KVM
* code to save that value into the guest state (it is the guest's PPR
* value). Otherwise just change to HMT_MEDIUM as userspace has
* already saved the PPR.
*/
#define SYSCALL_KVMTEST \
SET_SCRATCH0(r13); \
GET_PACA(r13); \
std r9,PACA_EXGEN+EX_R9(r13); \
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
HMT_MEDIUM; \
std r10,PACA_EXGEN+EX_R10(r13); \
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \
mfcr r9; \
KVMTEST_PR(0xc00); \
GET_SCRATCH0(r13)
#else
#define SYSCALL_KVMTEST \
HMT_MEDIUM
#endif
#define LOAD_SYSCALL_HANDLER(reg) \ #define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common) __LOAD_HANDLER(reg, system_call_common)
...@@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ ...@@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#endif #endif
EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) EXC_REAL_BEGIN(system_call, 0xc00, 0xd00)
/* SYSCALL_KVMTEST
* If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
* that support it) before changing to HMT_MEDIUM. That allows the KVM
* code to save that value into the guest state (it is the guest's PPR
* value). Otherwise just change to HMT_MEDIUM as userspace has
* already saved the PPR.
*/
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
SET_SCRATCH0(r13)
GET_PACA(r13)
std r9,PACA_EXGEN+EX_R9(r13)
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);
HMT_MEDIUM;
std r10,PACA_EXGEN+EX_R10(r13)
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR);
mfcr r9
KVMTEST_PR(0xc00)
GET_SCRATCH0(r13)
#else
HMT_MEDIUM;
#endif
SYSCALL_PSERIES_1 SYSCALL_PSERIES_1
SYSCALL_PSERIES_2_RFID SYSCALL_PSERIES_2_RFID
SYSCALL_PSERIES_3 SYSCALL_PSERIES_3
EXC_REAL_END(system_call, 0xc00, 0xd00) EXC_REAL_END(system_call, 0xc00, 0xd00)
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00)
HMT_MEDIUM SYSCALL_KVMTEST
SYSCALL_PSERIES_1 SYSCALL_PSERIES_1
SYSCALL_PSERIES_2_DIRECT SYSCALL_PSERIES_2_DIRECT
SYSCALL_PSERIES_3 SYSCALL_PSERIES_3
...@@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00) ...@@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00)
EXC_COMMON(single_step_common, 0xd00, single_step_exception) EXC_COMMON(single_step_common, 0xd00, single_step_exception)
EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20)
EXC_VIRT_NONE(0x4e00, 0x4e20) EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x4e20, 0xe00)
TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
EXC_COMMON_BEGIN(h_data_storage_common) EXC_COMMON_BEGIN(h_data_storage_common)
mfspr r10,SPRN_HDAR mfspr r10,SPRN_HDAR
...@@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) ...@@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common)
EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40)
EXC_VIRT_NONE(0x4e20, 0x4e40) EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x4e40, 0xe20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe20) TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
......
...@@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void) ...@@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void)
struct option_vector1 { struct option_vector1 {
u8 byte1; u8 byte1;
u8 arch_versions; u8 arch_versions;
u8 arch_versions3;
} __packed; } __packed;
struct option_vector2 { struct option_vector2 {
...@@ -691,6 +692,9 @@ struct option_vector5 { ...@@ -691,6 +692,9 @@ struct option_vector5 {
u8 reserved2; u8 reserved2;
__be16 reserved3; __be16 reserved3;
u8 subprocessors; u8 subprocessors;
u8 byte22;
u8 intarch;
u8 mmu;
} __packed; } __packed;
struct option_vector6 { struct option_vector6 {
...@@ -700,7 +704,7 @@ struct option_vector6 { ...@@ -700,7 +704,7 @@ struct option_vector6 {
} __packed; } __packed;
struct ibm_arch_vec { struct ibm_arch_vec {
struct { u32 mask, val; } pvrs[10]; struct { u32 mask, val; } pvrs[12];
u8 num_vectors; u8 num_vectors;
...@@ -749,6 +753,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { ...@@ -749,6 +753,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.mask = cpu_to_be32(0xffff0000), /* POWER8 */ .mask = cpu_to_be32(0xffff0000), /* POWER8 */
.val = cpu_to_be32(0x004d0000), .val = cpu_to_be32(0x004d0000),
}, },
{
.mask = cpu_to_be32(0xffff0000), /* POWER9 */
.val = cpu_to_be32(0x004e0000),
},
{
.mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
.val = cpu_to_be32(0x0f000005),
},
{ {
.mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
.val = cpu_to_be32(0x0f000004), .val = cpu_to_be32(0x0f000004),
...@@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { ...@@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.byte1 = 0, .byte1 = 0,
.arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
.arch_versions3 = OV1_PPC_3_00,
}, },
.vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
...@@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { ...@@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.reserved2 = 0, .reserved2 = 0,
.reserved3 = 0, .reserved3 = 0,
.subprocessors = 1, .subprocessors = 1,
.intarch = 0,
.mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
}, },
/* option vector 6: IBM PAPR hints */ /* option vector 6: IBM PAPR hints */
......
...@@ -70,7 +70,8 @@ endif ...@@ -70,7 +70,8 @@ endif
kvm-hv-y += \ kvm-hv-y += \
book3s_hv.o \ book3s_hv.o \
book3s_hv_interrupts.o \ book3s_hv_interrupts.o \
book3s_64_mmu_hv.o book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o book3s_hv_rm_xics.o
......
...@@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, ...@@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
kvmppc_set_dsisr(vcpu, flags); kvmppc_set_dsisr(vcpu, flags);
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
} }
EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */
void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
{ {
......
...@@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
long err = -EBUSY; long err = -EBUSY;
long order; long order;
if (kvm_is_radix(kvm))
return -EINVAL;
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
if (kvm->arch.hpte_setup_done) { if (kvm->arch.hpte_setup_done) {
kvm->arch.hpte_setup_done = 0; kvm->arch.hpte_setup_done = 0;
...@@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) ...@@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
void kvmppc_free_hpt(struct kvm *kvm) void kvmppc_free_hpt(struct kvm *kvm)
{ {
kvmppc_free_lpid(kvm->arch.lpid);
vfree(kvm->arch.revmap); vfree(kvm->arch.revmap);
if (kvm->arch.hpt_cma_alloc) if (kvm->arch.hpt_cma_alloc)
kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
1 << (kvm->arch.hpt_order - PAGE_SHIFT)); 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
else else if (kvm->arch.hpt_virt)
free_pages(kvm->arch.hpt_virt, free_pages(kvm->arch.hpt_virt,
kvm->arch.hpt_order - PAGE_SHIFT); kvm->arch.hpt_order - PAGE_SHIFT);
} }
...@@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr) ...@@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr)
return (instr & mask) != 0; return (instr & mask) != 0;
} }
static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long gpa, gva_t ea, int is_store) unsigned long gpa, gva_t ea, int is_store)
{ {
u32 last_inst; u32 last_inst;
...@@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long rcbits; unsigned long rcbits;
long mmio_update; long mmio_update;
if (kvm_is_radix(kvm))
return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
/* /*
* Real-mode code has already searched the HPT and found the * Real-mode code has already searched the HPT and found the
* entry we're interested in. Lock the entry and check that * entry we're interested in. Lock the entry and check that
...@@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm) ...@@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, srcu_idx); srcu_read_unlock(&kvm->srcu, srcu_idx);
} }
typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
static int kvm_handle_hva_range(struct kvm *kvm, static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long start, unsigned long start,
unsigned long end, unsigned long end,
int (*handler)(struct kvm *kvm, hva_handler_fn handler)
unsigned long *rmapp,
unsigned long gfn))
{ {
int ret; int ret;
int retval = 0; int retval = 0;
...@@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, ...@@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
for (; gfn < gfn_end; ++gfn) { for (; gfn < gfn_end; ++gfn) {
gfn_t gfn_offset = gfn - memslot->base_gfn; ret = handler(kvm, memslot, gfn);
ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
retval |= ret; retval |= ret;
} }
} }
...@@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm, ...@@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm,
} }
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int (*handler)(struct kvm *kvm, unsigned long *rmapp, hva_handler_fn handler)
unsigned long gfn))
{ {
return kvm_handle_hva_range(kvm, hva, hva + 1, handler); return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
} }
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn) unsigned long gfn)
{ {
struct revmap_entry *rev = kvm->arch.revmap; struct revmap_entry *rev = kvm->arch.revmap;
unsigned long h, i, j; unsigned long h, i, j;
__be64 *hptep; __be64 *hptep;
unsigned long ptel, psize, rcbits; unsigned long ptel, psize, rcbits;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
for (;;) { for (;;) {
lock_rmap(rmapp); lock_rmap(rmapp);
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
...@@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
{ {
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
kvm_handle_hva(kvm, hva, handler);
return 0; return 0;
} }
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{ {
kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
kvm_handle_hva_range(kvm, start, end, handler);
return 0; return 0;
} }
void kvmppc_core_flush_memslot_hv(struct kvm *kvm, void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot) struct kvm_memory_slot *memslot)
{ {
unsigned long *rmapp;
unsigned long gfn; unsigned long gfn;
unsigned long n; unsigned long n;
unsigned long *rmapp;
rmapp = memslot->arch.rmap;
gfn = memslot->base_gfn; gfn = memslot->base_gfn;
for (n = memslot->npages; n; --n) { rmapp = memslot->arch.rmap;
for (n = memslot->npages; n; --n, ++gfn) {
if (kvm_is_radix(kvm)) {
kvm_unmap_radix(kvm, memslot, gfn);
continue;
}
/* /*
* Testing the present bit without locking is OK because * Testing the present bit without locking is OK because
* the memslot has been marked invalid already, and hence * the memslot has been marked invalid already, and hence
...@@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, ...@@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
* thus the present bit can't go from 0 to 1. * thus the present bit can't go from 0 to 1.
*/ */
if (*rmapp & KVMPPC_RMAP_PRESENT) if (*rmapp & KVMPPC_RMAP_PRESENT)
kvm_unmap_rmapp(kvm, rmapp, gfn); kvm_unmap_rmapp(kvm, memslot, gfn);
++rmapp; ++rmapp;
++gfn;
} }
} }
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn) unsigned long gfn)
{ {
struct revmap_entry *rev = kvm->arch.revmap; struct revmap_entry *rev = kvm->arch.revmap;
unsigned long head, i, j; unsigned long head, i, j;
__be64 *hptep; __be64 *hptep;
int ret = 0; int ret = 0;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
retry: retry:
lock_rmap(rmapp); lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_REFERENCED) { if (*rmapp & KVMPPC_RMAP_REFERENCED) {
...@@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{ {
return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
return kvm_handle_hva_range(kvm, start, end, handler);
} }
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn) unsigned long gfn)
{ {
struct revmap_entry *rev = kvm->arch.revmap; struct revmap_entry *rev = kvm->arch.revmap;
unsigned long head, i, j; unsigned long head, i, j;
unsigned long *hp; unsigned long *hp;
int ret = 1; int ret = 1;
unsigned long *rmapp;
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
if (*rmapp & KVMPPC_RMAP_REFERENCED) if (*rmapp & KVMPPC_RMAP_REFERENCED)
return 1; return 1;
...@@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
{ {
return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
return kvm_handle_hva(kvm, hva, handler);
} }
void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
{ {
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); hva_handler_fn handler;
handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
kvm_handle_hva(kvm, hva, handler);
} }
static int vcpus_running(struct kvm *kvm) static int vcpus_running(struct kvm *kvm)
...@@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) ...@@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
return npages_dirty; return npages_dirty;
} }
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot, struct kvm_memory_slot *memslot,
unsigned long *map) unsigned long *map)
{ {
...@@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, ...@@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
__set_bit_le(gfn - memslot->base_gfn, map); __set_bit_le(gfn - memslot->base_gfn, map);
} }
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
unsigned long *map) struct kvm_memory_slot *memslot, unsigned long *map)
{ {
unsigned long i, j; unsigned long i, j;
unsigned long *rmapp; unsigned long *rmapp;
struct kvm_vcpu *vcpu;
preempt_disable(); preempt_disable();
rmapp = memslot->arch.rmap; rmapp = memslot->arch.rmap;
...@@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le(j, map); __set_bit_le(j, map);
++rmapp; ++rmapp;
} }
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu(i, vcpu, kvm) {
spin_lock(&vcpu->arch.vpa_update_lock);
harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
preempt_enable(); preempt_enable();
return 0; return 0;
} }
...@@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, ...@@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
srcu_idx = srcu_read_lock(&kvm->srcu); srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn); memslot = gfn_to_memslot(kvm, gfn);
if (memslot) { if (memslot) {
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; if (!kvm_is_radix(kvm)) {
lock_rmap(rmap); rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
*rmap |= KVMPPC_RMAP_CHANGED; lock_rmap(rmap);
unlock_rmap(rmap); *rmap |= KVMPPC_RMAP_CHANGED;
unlock_rmap(rmap);
} else if (memslot->dirty_bitmap) {
mark_page_dirty(kvm, gfn);
}
} }
srcu_read_unlock(&kvm->srcu, srcu_idx); srcu_read_unlock(&kvm->srcu, srcu_idx);
} }
...@@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) ...@@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; if (kvm_is_radix(vcpu->kvm))
mmu->xlate = kvmppc_mmu_radix_xlate;
else
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
......
This diff is collapsed.
This diff is collapsed.
...@@ -29,6 +29,11 @@ ...@@ -29,6 +29,11 @@
#include <asm/opal.h> #include <asm/opal.h>
#include <asm/smp.h> #include <asm/smp.h>
static bool in_realmode(void)
{
return !(mfmsr() & MSR_IR);
}
#define KVM_CMA_CHUNK_ORDER 18 #define KVM_CMA_CHUNK_ORDER 18
/* /*
...@@ -200,7 +205,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val) ...@@ -200,7 +205,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
/* /*
* Send an interrupt or message to another CPU. * Send an interrupt or message to another CPU.
* This can only be called in real mode.
* The caller needs to include any barrier needed to order writes * The caller needs to include any barrier needed to order writes
* to memory vs. the IPI/message. * to memory vs. the IPI/message.
*/ */
...@@ -226,7 +230,9 @@ void kvmhv_rm_send_ipi(int cpu) ...@@ -226,7 +230,9 @@ void kvmhv_rm_send_ipi(int cpu)
/* Else poke the target with an IPI */ /* Else poke the target with an IPI */
xics_phys = paca[cpu].kvm_hstate.xics_phys; xics_phys = paca[cpu].kvm_hstate.xics_phys;
if (xics_phys) if (!in_realmode())
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
else if (xics_phys)
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
else else
opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
...@@ -412,14 +418,15 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -412,14 +418,15 @@ static long kvmppc_read_one_intr(bool *again)
/* Now read the interrupt from the ICP */ /* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys; xics_phys = local_paca->kvm_hstate.xics_phys;
if (!xics_phys) { rc = 0;
/* Use OPAL to read the XIRR */ if (!in_realmode())
rc = opal_int_get_xirr(&xirr, false);
else if (!xics_phys)
rc = opal_rm_int_get_xirr(&xirr, false); rc = opal_rm_int_get_xirr(&xirr, false);
if (rc < 0) else
return 1;
} else {
xirr = _lwzcix(xics_phys + XICS_XIRR); xirr = _lwzcix(xics_phys + XICS_XIRR);
} if (rc < 0)
return 1;
/* /*
* Save XIRR for later. Since we get control in reverse endian * Save XIRR for later. Since we get control in reverse endian
...@@ -445,15 +452,19 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -445,15 +452,19 @@ static long kvmppc_read_one_intr(bool *again)
* If it is an IPI, clear the MFRR and EOI it. * If it is an IPI, clear the MFRR and EOI it.
*/ */
if (xisr == XICS_IPI) { if (xisr == XICS_IPI) {
if (xics_phys) { rc = 0;
if (!in_realmode()) {
opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
rc = opal_int_eoi(h_xirr);
} else if (xics_phys) {
_stbcix(xics_phys + XICS_MFRR, 0xff); _stbcix(xics_phys + XICS_MFRR, 0xff);
_stwcix(xics_phys + XICS_XIRR, xirr); _stwcix(xics_phys + XICS_XIRR, xirr);
} else { } else {
opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
rc = opal_rm_int_eoi(h_xirr); rc = opal_rm_int_eoi(h_xirr);
/* If rc > 0, there is another interrupt pending */
*again = rc > 0;
} }
/* If rc > 0, there is another interrupt pending */
*again = rc > 0;
/* /*
* Need to ensure side effects of above stores * Need to ensure side effects of above stores
...@@ -471,7 +482,10 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -471,7 +482,10 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host, /* We raced with the host,
* we need to resend that IPI, bummer * we need to resend that IPI, bummer
*/ */
if (xics_phys) if (!in_realmode())
opal_int_set_mfrr(hard_smp_processor_id(),
IPI_PRIORITY);
else if (xics_phys)
_stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
else else
opal_rm_int_set_mfrr(hard_smp_processor_id(), opal_rm_int_set_mfrr(hard_smp_processor_id(),
......
...@@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x) ...@@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x)
static int global_invalidates(struct kvm *kvm, unsigned long flags) static int global_invalidates(struct kvm *kvm, unsigned long flags)
{ {
int global; int global;
int cpu;
/* /*
* If there is only one vcore, and it's currently running, * If there is only one vcore, and it's currently running,
...@@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags) ...@@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
/* any other core might now have stale TLB entries... */ /* any other core might now have stale TLB entries... */
smp_wmb(); smp_wmb();
cpumask_setall(&kvm->arch.need_tlb_flush); cpumask_setall(&kvm->arch.need_tlb_flush);
cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
&kvm->arch.need_tlb_flush); /*
* On POWER9, threads are independent but the TLB is shared,
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
cpu = cpu_first_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
} }
return global; return global;
...@@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
unsigned long mmu_seq; unsigned long mmu_seq;
unsigned long rcbits, irq_flags = 0; unsigned long rcbits, irq_flags = 0;
if (kvm_is_radix(kvm))
return H_FUNCTION;
psize = hpte_page_size(pteh, ptel); psize = hpte_page_size(pteh, ptel);
if (!psize) if (!psize)
return H_PARAMETER; return H_PARAMETER;
...@@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, ...@@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
struct revmap_entry *rev; struct revmap_entry *rev;
u64 pte, orig_pte, pte_r; u64 pte, orig_pte, pte_r;
if (kvm_is_radix(kvm))
return H_FUNCTION;
if (pte_index >= kvm->arch.hpt_npte) if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER; return H_PARAMETER;
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
...@@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
struct revmap_entry *rev, *revs[4]; struct revmap_entry *rev, *revs[4];
u64 hp0, hp1; u64 hp0, hp1;
if (kvm_is_radix(kvm))
return H_FUNCTION;
global = global_invalidates(kvm, 0); global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) { for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0; n = 0;
...@@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long v, r, rb, mask, bits; unsigned long v, r, rb, mask, bits;
u64 pte_v, pte_r; u64 pte_v, pte_r;
if (kvm_is_radix(kvm))
return H_FUNCTION;
if (pte_index >= kvm->arch.hpt_npte) if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER; return H_PARAMETER;
...@@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
int i, n = 1; int i, n = 1;
struct revmap_entry *rev = NULL; struct revmap_entry *rev = NULL;
if (kvm_is_radix(kvm))
return H_FUNCTION;
if (pte_index >= kvm->arch.hpt_npte) if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER; return H_PARAMETER;
if (flags & H_READ_4) { if (flags & H_READ_4) {
...@@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long *rmap; unsigned long *rmap;
long ret = H_NOT_FOUND; long ret = H_NOT_FOUND;
if (kvm_is_radix(kvm))
return H_FUNCTION;
if (pte_index >= kvm->arch.hpt_npte) if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER; return H_PARAMETER;
...@@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long *rmap; unsigned long *rmap;
long ret = H_NOT_FOUND; long ret = H_NOT_FOUND;
if (kvm_is_radix(kvm))
return H_FUNCTION;
if (pte_index >= kvm->arch.hpt_npte) if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER; return H_PARAMETER;
......
...@@ -62,11 +62,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) ...@@ -62,11 +62,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift; hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
if (paca[hcpu].kvm_hstate.xics_phys) kvmppc_set_host_ipi(hcpu, 1);
icp_native_cause_ipi_rm(hcpu); smp_mb();
else kvmhv_rm_send_ipi(hcpu);
opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
IPI_PRIORITY);
} }
#else #else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
......
...@@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
addi r1, r1, 112 addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13) ld r7, HSTATE_HOST_MSR(r13)
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
* little easier to get back to the caller.
*/
mfmsr r0
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 11f beq 11f
...@@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_HSRR1, r7 mtspr SPRN_HSRR1, r7
ba 0xe80 ba 0xe80
/* Virtual-mode return - can't get here for HMI or machine check */
.Lvirt_return:
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq 16f
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
beq 17f
andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
beq 18f
mtmsrd r7, 1 /* if so then re-enable them */
18: mtlr r8
blr
16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
mtspr SPRN_HSRR1, r7
b exc_virt_0x4500_hardware_interrupt
17: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
b exc_virt_0x4e80_h_doorbell
kvmppc_primary_no_guest: kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */ /* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
...@@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Stack frame offsets */ /* Stack frame offsets */
#define STACK_SLOT_TID (112-16) #define STACK_SLOT_TID (112-16)
#define STACK_SLOT_PSSCR (112-24) #define STACK_SLOT_PSSCR (112-24)
#define STACK_SLOT_PID (112-32)
.global kvmppc_hv_entry .global kvmppc_hv_entry
kvmppc_hv_entry: kvmppc_hv_entry:
...@@ -530,6 +560,7 @@ kvmppc_hv_entry: ...@@ -530,6 +560,7 @@ kvmppc_hv_entry:
* R1 = host R1 * R1 = host R1
* R2 = TOC * R2 = TOC
* all other volatile GPRS = free * all other volatile GPRS = free
* Does not preserve non-volatile GPRs or CR fields
*/ */
mflr r0 mflr r0
std r0, PPC_LR_STKOFF(r1) std r0, PPC_LR_STKOFF(r1)
...@@ -549,32 +580,38 @@ kvmppc_hv_entry: ...@@ -549,32 +580,38 @@ kvmppc_hv_entry:
bl kvmhv_start_timing bl kvmhv_start_timing
1: 1:
#endif #endif
/* Clear out SLB */
/* Use cr7 as an indication of radix mode */
ld r5, HSTATE_KVM_VCORE(r13)
ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
lbz r0, KVM_RADIX(r9)
cmpwi cr7, r0, 0
/* Clear out SLB if hash */
bne cr7, 2f
li r6,0 li r6,0
slbmte r6,r6 slbmte r6,r6
slbia slbia
ptesync ptesync
2:
/* /*
* POWER7/POWER8 host -> guest partition switch code. * POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies, * We don't have to lock against concurrent tlbies,
* but we do have to coordinate across hardware threads. * but we do have to coordinate across hardware threads.
*/ */
/* Set bit in entry map iff exit map is zero. */ /* Set bit in entry map iff exit map is zero. */
ld r5, HSTATE_KVM_VCORE(r13)
li r7, 1 li r7, 1
lbz r6, HSTATE_PTID(r13) lbz r6, HSTATE_PTID(r13)
sld r7, r7, r6 sld r7, r7, r6
addi r9, r5, VCORE_ENTRY_EXIT addi r8, r5, VCORE_ENTRY_EXIT
21: lwarx r3, 0, r9 21: lwarx r3, 0, r8
cmpwi r3, 0x100 /* any threads starting to exit? */ cmpwi r3, 0x100 /* any threads starting to exit? */
bge secondary_too_late /* if so we're too late to the party */ bge secondary_too_late /* if so we're too late to the party */
or r3, r3, r7 or r3, r3, r7
stwcx. r3, 0, r9 stwcx. r3, 0, r8
bne 21b bne 21b
/* Primary thread switches to guest partition. */ /* Primary thread switches to guest partition. */
ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
cmpwi r6,0 cmpwi r6,0
bne 10f bne 10f
lwz r7,KVM_LPID(r9) lwz r7,KVM_LPID(r9)
...@@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) ...@@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* See if we need to flush the TLB */ /* See if we need to flush the TLB */
lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
BEGIN_FTR_SECTION
/*
* On POWER9, individual threads can come in here, but the
* TLB is shared between the 4 threads in a core, hence
* invalidating on one thread invalidates for all.
* Thus we make all 4 threads use the same bit here.
*/
clrrdi r6,r6,2
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrldi r7,r6,64-6 /* extract bit number (6 bits) */ clrldi r7,r6,64-6 /* extract bit number (6 bits) */
srdi r6,r6,6 /* doubleword number */ srdi r6,r6,6 /* doubleword number */
sldi r6,r6,3 /* address offset */ sldi r6,r6,3 /* address offset */
add r6,r6,r9 add r6,r6,r9
addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
li r0,1 li r8,1
sld r0,r0,r7 sld r8,r8,r7
ld r7,0(r6) ld r7,0(r6)
and. r7,r7,r0 and. r7,r7,r8
beq 22f beq 22f
23: ldarx r7,0,r6 /* if set, clear the bit */
andc r7,r7,r0
stdcx. r7,0,r6
bne 23b
/* Flush the TLB of any entries for this LPID */ /* Flush the TLB of any entries for this LPID */
lwz r6,KVM_TLB_SETS(r9) lwz r0,KVM_TLB_SETS(r9)
li r0,0 /* RS for P9 version of tlbiel */ mtctr r0
mtctr r6
li r7,0x800 /* IS field = 0b10 */ li r7,0x800 /* IS field = 0b10 */
ptesync ptesync
28: tlbiel r7 li r0,0 /* RS for P9 version of tlbiel */
bne cr7, 29f
28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
addi r7,r7,0x1000 addi r7,r7,0x1000
bdnz 28b bdnz 28b
ptesync b 30f
29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */
addi r7,r7,0x1000
bdnz 29b
30: ptesync
23: ldarx r7,0,r6 /* clear the bit after TLB flushed */
andc r7,r7,r8
stdcx. r7,0,r6
bne 23b
/* Add timebase offset onto timebase */ /* Add timebase offset onto timebase */
22: ld r8,VCORE_TB_OFFSET(r5) 22: ld r8,VCORE_TB_OFFSET(r5)
...@@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
beq kvmppc_primary_no_guest beq kvmppc_primary_no_guest
kvmppc_got_guest: kvmppc_got_guest:
/* Load up guest SLB entries */ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4) lwz r5,VCPU_SLB_MAX(r4)
cmpwi r5,0 cmpwi r5,0
beq 9f beq 9f
...@@ -696,8 +747,10 @@ kvmppc_got_guest: ...@@ -696,8 +747,10 @@ kvmppc_got_guest:
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
std r5, STACK_SLOT_TID(r1) std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1) std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
...@@ -823,6 +876,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ...@@ -823,6 +876,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_BESCR, r6 mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7 mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8 mtspr SPRN_WORT, r8
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
/* POWER8-only registers */ /* POWER8-only registers */
ld r5, VCPU_TCSCR(r4) ld r5, VCPU_TCSCR(r4)
...@@ -1057,13 +1113,13 @@ hdec_soon: ...@@ -1057,13 +1113,13 @@ hdec_soon:
kvmppc_interrupt_hv: kvmppc_interrupt_hv:
/* /*
* Register contents: * Register contents:
* R12 = interrupt vector * R12 = (guest CR << 32) | interrupt vector
* R13 = PACA * R13 = PACA
* guest CR, R12 saved in shadow VCPU SCRATCH1/0 * guest R12 saved in shadow VCPU SCRATCH0
* guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
* guest R13 saved in SPRN_SCRATCH0 * guest R13 saved in SPRN_SCRATCH0
*/ */
std r9, HSTATE_SCRATCH2(r13) std r9, HSTATE_SCRATCH2(r13)
lbz r9, HSTATE_IN_GUEST(r13) lbz r9, HSTATE_IN_GUEST(r13)
cmpwi r9, KVM_GUEST_MODE_HOST_HV cmpwi r9, KVM_GUEST_MODE_HOST_HV
beq kvmppc_bad_host_intr beq kvmppc_bad_host_intr
...@@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv: ...@@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv:
std r10, VCPU_GPR(R10)(r9) std r10, VCPU_GPR(R10)(r9)
std r11, VCPU_GPR(R11)(r9) std r11, VCPU_GPR(R11)(r9)
ld r3, HSTATE_SCRATCH0(r13) ld r3, HSTATE_SCRATCH0(r13)
lwz r4, HSTATE_SCRATCH1(r13)
std r3, VCPU_GPR(R12)(r9) std r3, VCPU_GPR(R12)(r9)
/* CR is in the high half of r12 */
srdi r4, r12, 32
stw r4, VCPU_CR(r9) stw r4, VCPU_CR(r9)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
ld r3, HSTATE_CFAR(r13) ld r3, HSTATE_CFAR(r13)
...@@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfspr r11, SPRN_SRR1 mfspr r11, SPRN_SRR1
std r10, VCPU_SRR0(r9) std r10, VCPU_SRR0(r9)
std r11, VCPU_SRR1(r9) std r11, VCPU_SRR1(r9)
/* trap is in the low half of r12, clear CR from the high half */
clrldi r12, r12, 32
andi. r0, r12, 2 /* need to read HSRR0/1? */ andi. r0, r12, 2 /* need to read HSRR0/1? */
beq 1f beq 1f
mfspr r10, SPRN_HSRR0 mfspr r10, SPRN_HSRR0
...@@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
11: stw r3,VCPU_HEIR(r9) 11: stw r3,VCPU_HEIR(r9)
/* these are volatile across C function calls */ /* these are volatile across C function calls */
#ifdef CONFIG_RELOCATABLE
ld r3, HSTATE_SCRATCH1(r13)
mtctr r3
#else
mfctr r3 mfctr r3
#endif
mfxer r4 mfxer r4
std r3, VCPU_CTR(r9) std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9) std r4, VCPU_XER(r9)
...@@ -1285,11 +1349,15 @@ mc_cont: ...@@ -1285,11 +1349,15 @@ mc_cont:
mtspr SPRN_CTRLT,r6 mtspr SPRN_CTRLT,r6
4: 4:
/* Read the guest SLB and save it away */ /* Read the guest SLB and save it away */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi r0, 0
li r5, 0
bne 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0 mtctr r0
li r6,0 li r6,0
addi r7,r9,VCPU_SLB addi r7,r9,VCPU_SLB
li r5,0
1: slbmfee r8,r6 1: slbmfee r8,r6
andis. r0,r8,SLB_ESID_V@h andis. r0,r8,SLB_ESID_V@h
beq 2f beq 2f
...@@ -1301,7 +1369,7 @@ mc_cont: ...@@ -1301,7 +1369,7 @@ mc_cont:
addi r5,r5,1 addi r5,r5,1
2: addi r6,r6,1 2: addi r6,r6,1
bdnz 1b bdnz 1b
stw r5,VCPU_SLB_MAX(r9) 3: stw r5,VCPU_SLB_MAX(r9)
/* /*
* Save the guest PURR/SPURR * Save the guest PURR/SPURR
...@@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1) ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1) ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
mtspr SPRN_TIDR, r5 mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6 mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
/* /*
* POWER7/POWER8 guest -> host partition switch code. * POWER7/POWER8 guest -> host partition switch code.
...@@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
isync isync
/* load host SLB entries */ /* load host SLB entries */
BEGIN_MMU_FTR_SECTION
b 0f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13) ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED .rept SLB_NUM_BOLTED
...@@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbmte r6,r5 slbmte r6,r5
1: addi r8,r8,16 1: addi r8,r8,16
.endr .endr
0:
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
/* Finish timing, if we have a vcpu */ /* Finish timing, if we have a vcpu */
ld r4, HSTATE_KVM_VCPU(r13) ld r4, HSTATE_KVM_VCPU(r13)
...@@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* reflect the HDSI to the guest as a DSI. * reflect the HDSI to the guest as a DSI.
*/ */
kvmppc_hdsi: kvmppc_hdsi:
ld r3, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r3)
cmpwi r0, 0
mfspr r4, SPRN_HDAR mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR mfspr r6, SPRN_HDSISR
bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */ /* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */ beq 1f /* if not, send it to the guest */
BEGIN_FTR_SECTION
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
b 4f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
andi. r0, r11, MSR_DR /* data relocation enabled? */ andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f beq 3f
clrrdi r0, r4, 28 clrrdi r0, r4, 28
...@@ -1776,13 +1860,29 @@ fast_interrupt_c_return: ...@@ -1776,13 +1860,29 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13) stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont b guest_exit_cont
.Lradix_hdsi:
std r4, VCPU_FAULT_DAR(r9)
stw r6, VCPU_FAULT_DSISR(r9)
.Lradix_hisi:
mfspr r5, SPRN_ASDR
std r5, VCPU_FAULT_GPA(r9)
b guest_exit_cont
/* /*
* Similarly for an HISI, reflect it to the guest as an ISI unless * Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out. * it is an HPTE not found fault for a page that we have paged out.
*/ */
kvmppc_hisi: kvmppc_hisi:
ld r3, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r3)
cmpwi r0, 0
bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f beq 1f
BEGIN_FTR_SECTION
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
b 4f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
andi. r0, r11, MSR_IR /* instruction relocation enabled? */ andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f beq 3f
clrrdi r0, r10, 28 clrrdi r0, r10, 28
......
...@@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end: ...@@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end:
* * * *
*****************************************************************************/ *****************************************************************************/
.global kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit:
.global kvmppc_interrupt_pr .global kvmppc_interrupt_pr
kvmppc_interrupt_pr: kvmppc_interrupt_pr:
/* 64-bit entry. Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
* HSTATE.SCRATCH1 = guest CTR if RELOCATABLE
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
#ifdef CONFIG_RELOCATABLE
std r9, HSTATE_SCRATCH2(r13)
ld r9, HSTATE_SCRATCH1(r13)
mtctr r9
ld r9, HSTATE_SCRATCH2(r13)
#endif
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
#endif
.global kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit:
/* Register usage at this point: /* Register usage at this point:
* *
* SPRG_SCRATCH0 = guest R13 * SPRG_SCRATCH0 = guest R13
* R12 = exit handler id * R12 = exit handler id
* R13 = shadow vcpu (32-bit) or PACA (64-bit) * R13 = shadow vcpu (32-bit) or PACA (64-bit)
* HSTATE.SCRATCH0 = guest R12 * HSTATE.SCRATCH0 = guest R12
* HSTATE.SCRATCH1 = guest CR * HSTATE.SCRATCH1 = guest CR
*
*/ */
/* Save registers */ /* Save registers */
......
...@@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_HWRNG: case KVM_CAP_PPC_HWRNG:
r = kvmppc_hwrng_present(); r = kvmppc_hwrng_present();
break; break;
case KVM_CAP_PPC_MMU_RADIX:
r = !!(hv_enabled && radix_enabled());
break;
case KVM_CAP_PPC_MMU_HASH_V3:
r = !!(hv_enabled && !radix_enabled() &&
cpu_has_feature(CPU_FTR_ARCH_300));
break;
#endif #endif
case KVM_CAP_SYNC_MMU: case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
...@@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp, ...@@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_rtas_define_token(kvm, argp); r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
break; break;
} }
case KVM_PPC_CONFIGURE_V3_MMU: {
struct kvm *kvm = filp->private_data;
struct kvm_ppc_mmuv3_cfg cfg;
r = -EINVAL;
if (!kvm->arch.kvm_ops->configure_mmu)
goto out;
r = -EFAULT;
if (copy_from_user(&cfg, argp, sizeof(cfg)))
goto out;
r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
break;
}
case KVM_PPC_GET_RMMU_INFO: {
struct kvm *kvm = filp->private_data;
struct kvm_ppc_rmmu_info info;
r = -EINVAL;
if (!kvm->arch.kvm_ops->get_rmmu_info)
goto out;
r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
r = -EFAULT;
break;
}
default: { default: {
struct kvm *kvm = filp->private_data; struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
......
...@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr) ...@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr)
} }
struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
/* /*
* Create a kmem_cache() for pagetables. This is not used for PTE * Create a kmem_cache() for pagetables. This is not used for PTE
...@@ -82,7 +83,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) ...@@ -82,7 +83,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
pgtable_cache[shift - 1] = new; pgtable_cache[shift - 1] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift); pr_debug("Allocated pgtable cache for order %d\n", shift);
} }
EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
void pgtable_cache_init(void) void pgtable_cache_init(void)
{ {
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
#include <linux/memblock.h> #include <linux/memblock.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/page.h> #include <asm/page.h>
...@@ -344,12 +346,45 @@ static int __init parse_disable_radix(char *p) ...@@ -344,12 +346,45 @@ static int __init parse_disable_radix(char *p)
} }
early_param("disable_radix", parse_disable_radix); early_param("disable_radix", parse_disable_radix);
/*
* If we're running under a hypervisor, we need to check the contents of
* /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
* radix. If not, we clear the radix feature bit so we fall back to hash.
*/
static void early_check_vec5(void)
{
unsigned long root, chosen;
int size;
const u8 *vec5;
root = of_get_flat_dt_root();
chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
if (chosen == -FDT_ERR_NOTFOUND)
return;
vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
if (!vec5)
return;
if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
!(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
/* Hypervisor doesn't support radix */
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
}
void __init mmu_early_init_devtree(void) void __init mmu_early_init_devtree(void)
{ {
/* Disable radix mode based on kernel command line. */ /* Disable radix mode based on kernel command line. */
if (disable_radix) if (disable_radix)
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
/*
* Check /chosen/ibm,architecture-vec-5 if running as a guest.
* When running bare-metal, we can use radix if we like
* even though the ibm,architecture-vec-5 property created by
* skiboot doesn't have the necessary bits set.
*/
if (early_radix_enabled() && !(mfmsr() & MSR_HV))
early_check_vec5();
if (early_radix_enabled()) if (early_radix_enabled())
radix__early_init_devtree(); radix__early_init_devtree();
else else
......
...@@ -401,6 +401,8 @@ void __init radix__early_init_mmu(void) ...@@ -401,6 +401,8 @@ void __init radix__early_init_mmu(void)
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table(); radix_init_partition_table();
radix_init_amor(); radix_init_amor();
} else {
radix_init_pseries();
} }
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
......
...@@ -454,13 +454,23 @@ void __init mmu_partition_table_init(void) ...@@ -454,13 +454,23 @@ void __init mmu_partition_table_init(void)
void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
unsigned long dw1) unsigned long dw1)
{ {
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
partition_tb[lpid].patb0 = cpu_to_be64(dw0); partition_tb[lpid].patb0 = cpu_to_be64(dw0);
partition_tb[lpid].patb1 = cpu_to_be64(dw1); partition_tb[lpid].patb1 = cpu_to_be64(dw1);
/* Global flush of TLBs and partition table caches for this lpid */ /*
* Global flush of TLBs and partition table caches for this lpid.
* The type of flush (hash or radix) depends on what the previous
* use of this partition ID was, not the new use.
*/
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : if (old & PATB_HR)
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
else
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory"); asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} }
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
......
...@@ -126,7 +126,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) ...@@ -126,7 +126,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
index = OV5_INDX(vec5_fw_features_table[i].feature); index = OV5_INDX(vec5_fw_features_table[i].feature);
feat = OV5_FEAT(vec5_fw_features_table[i].feature); feat = OV5_FEAT(vec5_fw_features_table[i].feature);
if (vec5[index] & feat) if (index < len && (vec5[index] & feat))
powerpc_firmware_features |= powerpc_firmware_features |=
vec5_fw_features_table[i].val; vec5_fw_features_table[i].val;
} }
......
...@@ -609,6 +609,29 @@ static int __init disable_bulk_remove(char *str) ...@@ -609,6 +609,29 @@ static int __init disable_bulk_remove(char *str)
__setup("bulk_remove=", disable_bulk_remove); __setup("bulk_remove=", disable_bulk_remove);
/* Actually only used for radix, so far */
static int pseries_lpar_register_process_table(unsigned long base,
unsigned long page_size, unsigned long table_size)
{
long rc;
unsigned long flags = PROC_TABLE_NEW;
if (radix_enabled())
flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
for (;;) {
rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
page_size, table_size);
if (!H_IS_LONG_BUSY(rc))
break;
mdelay(get_longbusy_msecs(rc));
}
if (rc != H_SUCCESS) {
pr_err("Failed to register process table (rc=%ld)\n", rc);
BUG();
}
return rc;
}
void __init hpte_init_pseries(void) void __init hpte_init_pseries(void)
{ {
mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
...@@ -622,6 +645,12 @@ void __init hpte_init_pseries(void) ...@@ -622,6 +645,12 @@ void __init hpte_init_pseries(void)
mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
} }
void radix_init_pseries(void)
{
pr_info("Using radix MMU under hypervisor\n");
register_process_table = pseries_lpar_register_process_table;
}
#ifdef CONFIG_PPC_SMLPAR #ifdef CONFIG_PPC_SMLPAR
#define CMO_FREE_HINT_DEFAULT 1 #define CMO_FREE_HINT_DEFAULT 1
static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
......
...@@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info { ...@@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_USER_INSTR0 130 #define KVM_CAP_S390_USER_INSTR0 130
#define KVM_CAP_MSI_DEVID 131 #define KVM_CAP_MSI_DEVID 131
#define KVM_CAP_PPC_HTM 132 #define KVM_CAP_PPC_HTM 132
#define KVM_CAP_PPC_MMU_RADIX 134
#define KVM_CAP_PPC_MMU_HASH_V3 135
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
...@@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping { ...@@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping {
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */ /* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
/* Available with KVM_CAP_PPC_RADIX_MMU */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* ioctl for vm fd */ /* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment