Commit 7dd2157c authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-ppc-next-4.20-1' of...

Merge tag 'kvm-ppc-next-4.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

PPC KVM update for 4.20.

The major new feature here is nested HV KVM support.  This allows the
HV KVM module to load inside a radix guest on POWER9 and run radix
guests underneath it.  These nested guests can run in supervisor mode
and don't require any additional instructions to be emulated, unlike
with PR KVM, and so performance is much better than with PR KVM, and
is very close to the performance of a non-nested guest.  A nested
hypervisor (a guest with nested guests) can be migrated to another
host and will bring all its nested guests along with it.  A nested
guest can also itself run guests, and so on down to any desired depth
of nesting.

Apart from that there are a series of updates for IOMMU handling from
Alexey Kardashevskiy, a "one VM per core" mode for HV KVM for
security-paranoid applications, and a small fix for PR KVM.
parents dd5bd0a6 901f8c3f
...@@ -1922,6 +1922,7 @@ registers, find a list below: ...@@ -1922,6 +1922,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_TIDR | 64 PPC | KVM_REG_PPC_TIDR | 64
PPC | KVM_REG_PPC_PSSCR | 64 PPC | KVM_REG_PPC_PSSCR | 64
PPC | KVM_REG_PPC_DEC_EXPIRY | 64 PPC | KVM_REG_PPC_DEC_EXPIRY | 64
PPC | KVM_REG_PPC_PTCR | 64
PPC | KVM_REG_PPC_TM_GPR0 | 64 PPC | KVM_REG_PPC_TM_GPR0 | 64
... ...
PPC | KVM_REG_PPC_TM_GPR31 | 64 PPC | KVM_REG_PPC_TM_GPR31 | 64
...@@ -2269,6 +2270,10 @@ The supported flags are: ...@@ -2269,6 +2270,10 @@ The supported flags are:
The emulated MMU supports 1T segments in addition to the The emulated MMU supports 1T segments in addition to the
standard 256M ones. standard 256M ones.
- KVM_PPC_NO_HASH
This flag indicates that HPT guests are not supported by KVM,
thus all guests must use radix MMU mode.
The "slb_size" field indicates how many SLB entries are supported The "slb_size" field indicates how many SLB entries are supported
The "sps" array contains 8 entries indicating the supported base The "sps" array contains 8 entries indicating the supported base
...@@ -4531,6 +4536,20 @@ With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise, ...@@ -4531,6 +4536,20 @@ With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
a #GP would be raised when the guest tries to access. Currently, this a #GP would be raised when the guest tries to access. Currently, this
capability does not enable write permissions of this MSR for the guest. capability does not enable write permissions of this MSR for the guest.
7.16 KVM_CAP_PPC_NESTED_HV
Architectures: ppc
Parameters: none
Returns: 0 on success, -EINVAL when the implementation doesn't support
nested-HV virtualization.
HV-KVM on POWER9 and later systems allows for "nested-HV"
virtualization, which provides a way for a guest VM to run guests that
can run using the CPU's supervisor mode (privileged non-hypervisor
state). Enabling this capability on a VM depends on the CPU having
the necessary functionality and on the facility being enabled with a
kvm-hv module parameter.
8. Other capabilities. 8. Other capabilities.
---------------------- ----------------------
......
...@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache; ...@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache; extern long flush_count_cache;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
#else
static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
bool preserve_nv) { }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
void kvmhv_save_host_pmu(void);
void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
unsigned long dabrx);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
...@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) ...@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG(); BUG();
} }
static inline unsigned int ap_to_shift(unsigned long ap)
{
int psize;
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
if (mmu_psize_defs[psize].ap == ap)
return mmu_psize_defs[psize].shift;
}
return -1;
}
static inline unsigned long get_sllp_encoding(int psize) static inline unsigned long get_sllp_encoding(int psize)
{ {
unsigned long sllp; unsigned long sllp;
......
...@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid, ...@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr, unsigned long addr,
unsigned long page_size); unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid); extern void radix__flush_pwc_lpid(unsigned int lpid);
extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid); extern void radix__local_flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
......
...@@ -322,6 +322,11 @@ ...@@ -322,6 +322,11 @@
#define H_GET_24X7_DATA 0xF07C #define H_GET_24X7_DATA 0xF07C
#define H_GET_PERF_COUNTER_INFO 0xF080 #define H_GET_PERF_COUNTER_INFO 0xF080
/* Platform-specific hcalls used for nested HV KVM */
#define H_SET_PARTITION_TABLE 0xF800
#define H_ENTER_NESTED 0xF804
#define H_TLB_INVALIDATE 0xF808
/* Values for 2nd argument to H_SET_MODE */ /* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1 #define H_SET_MODE_RESOURCE_SET_CIABR 1
#define H_SET_MODE_RESOURCE_SET_DAWR 2 #define H_SET_MODE_RESOURCE_SET_DAWR 2
...@@ -461,6 +466,42 @@ struct h_cpu_char_result { ...@@ -461,6 +466,42 @@ struct h_cpu_char_result {
u64 behaviour; u64 behaviour;
}; };
/* Register state for entering a nested guest with H_ENTER_NESTED */
struct hv_guest_state {
u64 version; /* version of this structure layout */
u32 lpid;
u32 vcpu_token;
/* These registers are hypervisor privileged (at least for writing) */
u64 lpcr;
u64 pcr;
u64 amor;
u64 dpdes;
u64 hfscr;
s64 tb_offset;
u64 dawr0;
u64 dawrx0;
u64 ciabr;
u64 hdec_expiry;
u64 purr;
u64 spurr;
u64 ic;
u64 vtb;
u64 hdar;
u64 hdsisr;
u64 heir;
u64 asdr;
/* These are OS privileged but need to be set late in guest entry */
u64 srr0;
u64 srr1;
u64 sprg[4];
u64 pidr;
u64 cfar;
u64 ppr;
};
/* Latest version of hv_guest_state structure */
#define HV_GUEST_STATE_VERSION 1
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */ #endif /* _ASM_POWERPC_HVCALL_H */
...@@ -84,7 +84,6 @@ ...@@ -84,7 +84,6 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400 #define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480 #define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500 #define BOOK3S_INTERRUPT_EXTERNAL 0x500
#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502 #define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600 #define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700 #define BOOK3S_INTERRUPT_PROGRAM 0x700
...@@ -134,8 +133,7 @@ ...@@ -134,8 +133,7 @@
#define BOOK3S_IRQPRIO_EXTERNAL 14 #define BOOK3S_IRQPRIO_EXTERNAL 14
#define BOOK3S_IRQPRIO_DECREMENTER 15 #define BOOK3S_IRQPRIO_DECREMENTER 15
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17 #define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_IRQPRIO_MAX 18
#define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2 #define BOOK3S_HFLAG_SLB 0x2
......
...@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); ...@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr); unsigned long ea, unsigned long dsisr);
extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 root,
u64 *pte_ret_p);
extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 table,
int table_index, u64 *pte_ret_p);
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite); struct kvmppc_pte *gpte, bool data, bool iswrite);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
unsigned int shift, struct kvm_memory_slot *memslot,
unsigned int lpid);
extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
bool writing, unsigned long gpa,
unsigned int lpid);
extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
unsigned long gpa,
struct kvm_memory_slot *memslot,
bool writing, bool kvm_ro,
pte_t *inserted_pte, unsigned int *levelp);
extern int kvmppc_init_vm_radix(struct kvm *kvm); extern int kvmppc_init_vm_radix(struct kvm *kvm);
extern void kvmppc_free_radix(struct kvm *kvm); extern void kvmppc_free_radix(struct kvm *kvm);
extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
unsigned int lpid);
extern int kvmppc_radix_init(void); extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void); extern void kvmppc_radix_exit(void);
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
unsigned long gpa, unsigned int shift,
struct kvm_memory_slot *memslot,
unsigned int lpid);
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
...@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {} ...@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {} static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
#endif #endif
long kvmhv_nested_init(void);
void kvmhv_nested_exit(void);
void kvmhv_vm_nested_init(struct kvm *kvm);
long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
struct hv_guest_state *hr);
long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
extern int kvm_irq_bypass; extern int kvm_irq_bypass;
...@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) ...@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{ {
vcpu->arch.cr = val; vcpu->arch.regs.ccr = val;
} }
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.cr; return vcpu->arch.regs.ccr;
} }
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
...@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu); ...@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
/* TO = 31 for unconditional trap */ /* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008 #define INS_TW 0x7fe00008
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
#define SPLIT_HACK_MASK 0xff000000 #define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000 #define SPLIT_HACK_OFFS 0xfb000000
......
...@@ -23,6 +23,108 @@ ...@@ -23,6 +23,108 @@
#include <linux/string.h> #include <linux/string.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h> #include <asm/book3s/64/mmu-hash.h>
#include <asm/cpu_has_feature.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_PPC_PSERIES
static inline bool kvmhv_on_pseries(void)
{
return !cpu_has_feature(CPU_FTR_HVMODE);
}
#else
static inline bool kvmhv_on_pseries(void)
{
return false;
}
#endif
/*
* Structure for a nested guest, that is, for a guest that is managed by
* one of our guests.
*/
struct kvm_nested_guest {
struct kvm *l1_host; /* L1 VM that owns this nested guest */
int l1_lpid; /* lpid L1 guest thinks this guest is */
int shadow_lpid; /* real lpid of this nested guest */
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
};
/*
* We define a nested rmap entry as a single 64-bit quantity
* 0xFFF0000000000000 12-bit lpid field
* 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
* 0x0000000000000001 1-bit single entry flag
*/
#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
#define RMAP_NESTED_LPID_SHIFT (52)
#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
/* Structure for a nested guest rmap entry */
struct rmap_nested {
struct llist_node list;
u64 rmap;
};
/*
* for_each_nest_rmap_safe - iterate over the list of nested rmap entries
* safe against removal of the list entry or NULL list
* @pos: a (struct rmap_nested *) to use as a loop cursor
* @node: pointer to the first entry
* NOTE: this can be NULL
* @rmapp: an (unsigned long *) in which to return the rmap entries on each
* iteration
* NOTE: this must point to already allocated memory
*
* The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
* rmap entry in the memslot. The list is always terminated by a "single entry"
* stored in the list element of the final entry of the llist. If there is ONLY
* a single entry then this is itself in the rmap entry of the memslot, not a
* llist head pointer.
*
* Note that the iterator below assumes that a nested rmap entry is always
* non-zero. This is true for our usage because the LPID field is always
* non-zero (zero is reserved for the host).
*
* This should be used to iterate over the list of rmap_nested entries with
* processing done on the u64 rmap value given by each iteration. This is safe
* against removal of list entries and it is always safe to call free on (pos).
*
* e.g.
* struct rmap_nested *cursor;
* struct llist_node *first;
* unsigned long rmap;
* for_each_nest_rmap_safe(cursor, first, &rmap) {
* do_something(rmap);
* free(cursor);
* }
*/
#define for_each_nest_rmap_safe(pos, node, rmapp) \
for ((pos) = llist_entry((node), typeof(*(pos)), list); \
(node) && \
(*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((u64) (node)) : ((pos)->rmap))) && \
(((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
((struct llist_node *) ((pos) = NULL)) : \
(pos)->list.next)), true); \
(pos) = llist_entry((node), typeof(*(pos)), list))
struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
bool create);
void kvmhv_put_nested(struct kvm_nested_guest *gp);
int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
/* Encoding of first parameter for H_TLB_INVALIDATE */
#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
___PPC_R(r))
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */ /* Power architecture requires HPT is at least 256kiB, at most 64TiB */
#define PPC_MIN_HPT_ORDER 18 #define PPC_MIN_HPT_ORDER 18
...@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) ...@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
} }
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
extern void kvmhv_rm_send_ipi(int cpu); extern void kvmhv_rm_send_ipi(int cpu);
...@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr) ...@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.cr = vcpu->arch.cr_tm; vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm; vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm; vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
...@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) ...@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.cr_tm = vcpu->arch.cr; vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer; vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.lr_tm = vcpu->arch.regs.link; vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
...@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) ...@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
} }
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
unsigned long gpa, unsigned int level,
unsigned long mmu_seq, unsigned int lpid,
unsigned long *rmapp, struct rmap_nested **n_rmap);
extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
struct rmap_nested **n_rmap);
extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
struct kvm_memory_slot *memslot,
unsigned long gpa, unsigned long hpa,
unsigned long nbytes);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */ #endif /* __ASM_KVM_BOOK3S_64_H__ */
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#define XICS_MFRR 0xc #define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */ #define XICS_IPI 2 /* interrupt source # for IPIs */
/* LPIDs we support with this build -- runtime limit may be lower */
#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
/* Maximum number of threads per physical core */ /* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8 #define MAX_SMT_THREADS 8
......
...@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) ...@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{ {
vcpu->arch.cr = val; vcpu->arch.regs.ccr = val;
} }
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.cr; return vcpu->arch.regs.ccr;
} }
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */ #include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES) #define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
#else #else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
...@@ -94,6 +95,7 @@ struct dtl_entry; ...@@ -94,6 +95,7 @@ struct dtl_entry;
struct kvmppc_vcpu_book3s; struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu; struct kvmppc_book3s_shadow_vcpu;
struct kvm_nested_guest;
struct kvm_vm_stat { struct kvm_vm_stat {
ulong remote_tlb_flush; ulong remote_tlb_flush;
...@@ -287,10 +289,12 @@ struct kvm_arch { ...@@ -287,10 +289,12 @@ struct kvm_arch {
u8 radix; u8 radix;
u8 fwnmi_enabled; u8 fwnmi_enabled;
bool threads_indep; bool threads_indep;
bool nested_enable;
pgd_t *pgtable; pgd_t *pgtable;
u64 process_table; u64 process_table;
struct dentry *debugfs_dir; struct dentry *debugfs_dir;
struct dentry *htab_dentry; struct dentry *htab_dentry;
struct dentry *radix_dentry;
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */ struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
...@@ -311,6 +315,9 @@ struct kvm_arch { ...@@ -311,6 +315,9 @@ struct kvm_arch {
#endif #endif
struct kvmppc_ops *kvm_ops; struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u64 l1_ptcr;
int max_nested_lpid;
struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
/* This array can grow quite large, keep it at the end */ /* This array can grow quite large, keep it at the end */
struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif #endif
...@@ -360,7 +367,9 @@ struct kvmppc_pte { ...@@ -360,7 +367,9 @@ struct kvmppc_pte {
bool may_write : 1; bool may_write : 1;
bool may_execute : 1; bool may_execute : 1;
unsigned long wimg; unsigned long wimg;
unsigned long rc;
u8 page_size; /* MMU_PAGE_xxx */ u8 page_size; /* MMU_PAGE_xxx */
u8 page_shift;
}; };
struct kvmppc_mmu { struct kvmppc_mmu {
...@@ -537,8 +546,6 @@ struct kvm_vcpu_arch { ...@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
ulong tar; ulong tar;
#endif #endif
u32 cr;
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
ulong hflags; ulong hflags;
ulong guest_owned_ext; ulong guest_owned_ext;
...@@ -707,6 +714,7 @@ struct kvm_vcpu_arch { ...@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
u8 hcall_needed; u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed; u8 epr_needed;
u8 external_oneshot; /* clear external irq after delivery */
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
...@@ -781,6 +789,10 @@ struct kvm_vcpu_arch { ...@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
u32 emul_inst; u32 emul_inst;
u32 online; u32 online;
/* For support of nested guests */
struct kvm_nested_guest *nested;
u32 nested_vcpu_id;
#endif #endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
......
...@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table( ...@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \ (stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS) H_PARAMETER : H_SUCCESS)
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap); unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce); unsigned long idx, unsigned long tce);
...@@ -327,6 +325,7 @@ struct kvmppc_ops { ...@@ -327,6 +325,7 @@ struct kvmppc_ops {
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode, int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
unsigned long flags); unsigned long flags);
void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
int (*enable_nested)(struct kvm *kvm);
}; };
extern struct kvmppc_ops *kvmppc_hv_ops; extern struct kvmppc_ops *kvmppc_hv_ops;
...@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); ...@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status); int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else #else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; } u32 priority) { return -1; }
...@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur ...@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; } int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */ #endif /* CONFIG_KVM_XIVE */
/* /*
...@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, ...@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr); unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
/* /*
* Host-side operations we want to set up while running in real * Host-side operations we want to set up while running in real
......
...@@ -104,6 +104,7 @@ ...@@ -104,6 +104,7 @@
#define OP_31_XOP_LHZUX 311 #define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MSGSNDP 142 #define OP_31_XOP_MSGSNDP 142
#define OP_31_XOP_MSGCLRP 174 #define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_TLBIE 306
#define OP_31_XOP_MFSPR 339 #define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341 #define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343 #define OP_31_XOP_LHAX 343
......
...@@ -415,6 +415,7 @@ ...@@ -415,6 +415,7 @@
#define HFSCR_DSCR __MASK(FSCR_DSCR_LG) #define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) #define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
#define HFSCR_FP __MASK(FSCR_FP_LG) #define HFSCR_FP __MASK(FSCR_FP_LG)
#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
#define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_TAR 0x32f /* Target Address Register */
#define SPRN_LPCR 0x13E /* LPAR Control Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */
#define LPCR_VPM0 ASM_CONST(0x8000000000000000) #define LPCR_VPM0 ASM_CONST(0x8000000000000000)
...@@ -766,6 +767,7 @@ ...@@ -766,6 +767,7 @@
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
#define HSRR1_DENORM 0x00100000 /* Denorm exception */ #define HSRR1_DENORM 0x00100000 /* Denorm exception */
#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */ #define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */ #define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
......
...@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { ...@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
/* Transactional Memory checkpointed state: /* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs * This is all GPRs, all VSX regs and a subset of SPRs
......
...@@ -438,7 +438,7 @@ int main(void) ...@@ -438,7 +438,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S #ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif #endif
OFFSET(VCPU_CR, kvm_vcpu, arch.cr); OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip); OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
...@@ -503,6 +503,7 @@ int main(void) ...@@ -503,6 +503,7 @@ int main(void)
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif #endif
...@@ -695,7 +696,7 @@ int main(void) ...@@ -695,7 +696,7 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */ #else /* CONFIG_PPC_BOOK3S */
OFFSET(VCPU_CR, kvm_vcpu, arch.cr); OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer); OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link); OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr); OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
......
...@@ -147,8 +147,8 @@ __init_hvmode_206: ...@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63 rldicl. r0,r3,4,63
bnelr bnelr
ld r5,CPU_SPEC_FEATURES(r4) ld r5,CPU_SPEC_FEATURES(r4)
LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
xor r5,r5,r6 andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4) std r5,CPU_SPEC_FEATURES(r4)
blr blr
......
...@@ -75,7 +75,8 @@ kvm-hv-y += \ ...@@ -75,7 +75,8 @@ kvm-hv-y += \
book3s_hv.o \ book3s_hv.o \
book3s_hv_interrupts.o \ book3s_hv_interrupts.o \
book3s_64_mmu_hv.o \ book3s_64_mmu_hv.o \
book3s_64_mmu_radix.o book3s_64_mmu_radix.o \
book3s_hv_nested.o
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o book3s_hv_tm.o
......
...@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) ...@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
ulong pc = kvmppc_get_pc(vcpu); ulong pc = kvmppc_get_pc(vcpu);
ulong lr = kvmppc_get_lr(vcpu);
if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
} }
} }
...@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) ...@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break; case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break; case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break; case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break; case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break; case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break; case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
...@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); ...@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq) struct kvm_interrupt *irq)
{ {
unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL; /*
* This case (KVM_INTERRUPT_SET) should never actually arise for
if (irq->irq == KVM_INTERRUPT_SET_LEVEL) * a pseries guest (because pseries guests expect their interrupt
vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; * controllers to continue asserting an external interrupt request
* until it is acknowledged at the interrupt controller), but is
* included to avoid ABI breakage and potentially for other
* sorts of guest.
*
* There is a subtlety here: HV KVM does not test the
* external_oneshot flag in the code that synthesizes
* external interrupts for the guest just before entering
* the guest. That is OK even if userspace did do a
* KVM_INTERRUPT_SET on a pseries guest vcpu, because the
* caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
* which ends up doing a smp_send_reschedule(), which will
* pull the guest all the way out to the host, meaning that
* we will call kvmppc_core_prepare_to_enter() before entering
* the guest again, and that will handle the external_oneshot
* flag correctly.
*/
if (irq->irq == KVM_INTERRUPT_SET)
vcpu->arch.external_oneshot = 1;
kvmppc_book3s_queue_irqprio(vcpu, vec); kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
} }
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{ {
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
} }
void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar, void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
...@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, ...@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
vec = BOOK3S_INTERRUPT_DECREMENTER; vec = BOOK3S_INTERRUPT_DECREMENTER;
break; break;
case BOOK3S_IRQPRIO_EXTERNAL: case BOOK3S_IRQPRIO_EXTERNAL:
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit; deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL; vec = BOOK3S_INTERRUPT_EXTERNAL;
break; break;
...@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) ...@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_DECREMENTER: case BOOK3S_IRQPRIO_DECREMENTER:
/* DEC interrupts get cleared by mtdec */ /* DEC interrupts get cleared by mtdec */
return false; return false;
case BOOK3S_IRQPRIO_EXTERNAL_LEVEL: case BOOK3S_IRQPRIO_EXTERNAL:
/* External interrupts get cleared by userspace */ /*
* External interrupts get cleared by userspace
* except when set by the KVM_INTERRUPT ioctl with
* KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
*/
if (vcpu->arch.external_oneshot) {
vcpu->arch.external_oneshot = 0;
return true;
}
return false; return false;
} }
......
...@@ -268,13 +268,12 @@ int kvmppc_mmu_hv_init(void) ...@@ -268,13 +268,12 @@ int kvmppc_mmu_hv_init(void)
{ {
unsigned long host_lpid, rsvd_lpid; unsigned long host_lpid, rsvd_lpid;
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL; return -EINVAL;
/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */ /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID); host_lpid = mfspr(SPRN_LPID);
rsvd_lpid = LPID_RSVD; rsvd_lpid = LPID_RSVD;
......
This diff is collapsed.
...@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return ret; return ret;
} }
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
return H_SUCCESS;
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS;
}
static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
{ {
unsigned long hpa = 0; unsigned long hpa = 0;
...@@ -401,7 +435,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, ...@@ -401,7 +435,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
long ret; long ret;
if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
return H_HARDWARE; return H_TOO_HARD;
if (dir == DMA_NONE) if (dir == DMA_NONE)
return H_SUCCESS; return H_SUCCESS;
...@@ -449,15 +483,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, ...@@ -449,15 +483,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
return H_TOO_HARD; return H_TOO_HARD;
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
return H_HARDWARE; return H_TOO_HARD;
if (mm_iommu_mapped_inc(mem)) if (mm_iommu_mapped_inc(mem))
return H_CLOSED; return H_TOO_HARD;
ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) { if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem); mm_iommu_mapped_dec(mem);
return H_HARDWARE; return H_TOO_HARD;
} }
if (dir != DMA_NONE) if (dir != DMA_NONE)
...@@ -517,8 +551,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -517,8 +551,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
ret = H_PARAMETER; ret = H_PARAMETER;
goto unlock_exit; goto unlock_exit;
} }
...@@ -533,14 +566,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -533,14 +566,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir); entry, ua, dir);
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue;
if (ret == H_TOO_HARD)
goto unlock_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry); kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
}
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
...@@ -583,7 +612,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -583,7 +612,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret; return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD; ret = H_TOO_HARD;
goto unlock_exit; goto unlock_exit;
} }
...@@ -599,10 +628,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -599,10 +628,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
goto unlock_exit; goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
/*
* This looks unsafe, because we validate, then regrab
* the TCE from userspace which could have been changed by
* another thread.
*
* But it actually is safe, because the relevant checks will be
* re-executed in the following code. If userspace tries to
* change this dodgily it will result in a messier failure mode
* but won't threaten the host.
*/
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
if (kvmppc_gpa_to_ua(vcpu->kvm, if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
return H_PARAMETER; return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
...@@ -610,14 +655,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -610,14 +655,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua, stit->tbl, entry + i, ua,
iommu_tce_direction(tce)); iommu_tce_direction(tce));
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue;
if (ret == H_TOO_HARD)
goto unlock_exit;
WARN_ON_ONCE(1);
kvmppc_clear_tce(stit->tbl, entry); kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
}
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
......
...@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, ...@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
} }
EXPORT_SYMBOL_GPL(kvmppc_find_table); EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* /*
* Validates TCE address. * Validates TCE address.
* At the moment flags and page mask are validated. * At the moment flags and page mask are validated.
...@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table); ...@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
* to the table and user space is supposed to process them), we can skip * to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page * checking other things (such as TCE is a guest RAM address or the page
* was actually allocated). * was actually allocated).
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/ */
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{ {
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce); enum dma_data_direction dir = iommu_tce_direction(tce);
struct kvmppc_spapr_tce_iommu_table *stit;
unsigned long ua = 0;
/* Allow userspace to poison TCE table */ /* Allow userspace to poison TCE table */
if (dir == DMA_NONE) if (dir == DMA_NONE)
...@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) ...@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
if (iommu_tce_check_gpa(stt->page_shift, gpa)) if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER; return H_PARAMETER;
if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
unsigned long hpa = 0;
struct mm_iommu_table_group_mem_t *mem;
long shift = stit->tbl->it_page_shift;
mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
if (!mem)
return H_TOO_HARD;
if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
return H_TOO_HARD;
}
return H_SUCCESS; return H_SUCCESS;
} }
EXPORT_SYMBOL_GPL(kvmppc_tce_validate); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode, /* Note on the use of page_address() in real mode,
* *
...@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, ...@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
} }
EXPORT_SYMBOL_GPL(kvmppc_tce_put); EXPORT_SYMBOL_GPL(kvmppc_tce_put);
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap) unsigned long *ua, unsigned long **prmap)
{ {
unsigned long gfn = gpa >> PAGE_SHIFT; unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn); memslot = search_memslots(kvm_memslots(kvm), gfn);
...@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, ...@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return -EINVAL; return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) | *ua = __gfn_to_hva_memslot(memslot, gfn) |
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap) if (prmap)
...@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, ...@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
...@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, ...@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
&hpa))) &hpa)))
return H_HARDWARE; return H_TOO_HARD;
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED; return H_TOO_HARD;
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) { if (ret) {
...@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
return ret; return ret;
dir = iommu_tce_direction(tce); dir = iommu_tce_direction(tce);
if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
return H_PARAMETER; return H_PARAMETER;
entry = ioba >> stt->page_shift; entry = ioba >> stt->page_shift;
...@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir); stit->tbl, entry, ua, dir);
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue;
if (ret == H_TOO_HARD)
return ret;
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
}
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
...@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/ */
struct mm_iommu_table_group_mem_t *mem; struct mm_iommu_table_group_mem_t *mem;
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD; return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
...@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case * We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte(). * so lock rmap and do __find_linux_pte_or_hugepte().
*/ */
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD; return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap); rmap = (void *) vmalloc_to_phys(rmap);
if (WARN_ON_ONCE_RM(!rmap)) if (WARN_ON_ONCE_RM(!rmap))
return H_HARDWARE; return H_TOO_HARD;
/* /*
* Synchronize with the MMU notifier callbacks in * Synchronize with the MMU notifier callbacks in
...@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i) { for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ret = kvmppc_tce_validate(stt, tce); ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS) if (ret != H_SUCCESS)
goto unlock_exit; goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0; ua = 0;
if (kvmppc_gpa_to_ua(vcpu->kvm, if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
&ua, NULL))
return H_PARAMETER; return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
...@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua, stit->tbl, entry + i, ua,
iommu_tce_direction(tce)); iommu_tce_direction(tce));
if (ret == H_SUCCESS) if (ret != H_SUCCESS) {
continue; kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
entry);
if (ret == H_TOO_HARD)
goto unlock_exit; goto unlock_exit;
}
WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
......
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
#define OP_31_XOP_MTSR 210 #define OP_31_XOP_MTSR 210
#define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIEL 274
#define OP_31_XOP_TLBIE 306
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
#define OP_31_XOP_FAKE_SC1 308 #define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBMTE 402
...@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu) ...@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr; vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
vcpu->arch.tar_tm = vcpu->arch.tar; vcpu->arch.tar_tm = vcpu->arch.tar;
vcpu->arch.lr_tm = vcpu->arch.regs.link; vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.cr_tm = vcpu->arch.cr; vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer; vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.vrsave_tm = vcpu->arch.vrsave; vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
} }
...@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu) ...@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm; vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
vcpu->arch.tar = vcpu->arch.tar_tm; vcpu->arch.tar = vcpu->arch.tar_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm; vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.cr = vcpu->arch.cr_tm; vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm; vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.vrsave = vcpu->arch.vrsave_tm; vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
} }
...@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val) ...@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
uint64_t texasr; uint64_t texasr;
/* CR0 = 0 | MSR[TS] | 0 */ /* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT); << CR0_SHIFT);
...@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) ...@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
tm_abort(ra_val); tm_abort(ra_val);
/* CR0 = 0 | MSR[TS] | 0 */ /* CR0 = 0 | MSR[TS] | 0 */
vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1)) (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT); << CR0_SHIFT);
...@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!(kvmppc_get_msr(vcpu) & MSR_PR)) { if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
preempt_disable(); preempt_disable();
vcpu->arch.cr = (CR0_TBEGIN_FAILURE | vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
(vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT))); (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT | vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
......
This diff is collapsed.
...@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu) ...@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys; void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
/* For a nested hypervisor, use the XICS via hcall */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
IPI_PRIORITY);
return;
}
/* On POWER9 we can use msgsnd for any destination cpu. */ /* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu); msg |= get_hard_smp_processor_id(cpu);
...@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
return 1; return 1;
/* Now read the interrupt from the ICP */ /* Now read the interrupt from the ICP */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
xirr = cpu_to_be32(retbuf[0]);
} else {
xics_phys = local_paca->kvm_hstate.xics_phys; xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0; rc = 0;
if (!xics_phys) if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false); rc = opal_int_get_xirr(&xirr, false);
else else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR); xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
}
if (rc < 0) if (rc < 0)
return 1; return 1;
...@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
*/ */
if (xisr == XICS_IPI) { if (xisr == XICS_IPI) {
rc = 0; rc = 0;
if (xics_phys) { if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(), 0xff);
plpar_hcall_raw(H_EOI, retbuf, h_xirr);
} else if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR); __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR); __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else { } else {
...@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again) ...@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host, /* We raced with the host,
* we need to resend that IPI, bummer * we need to resend that IPI, bummer
*/ */
if (xics_phys) if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(),
IPI_PRIORITY);
} else if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY, __raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR); xics_phys + XICS_MFRR);
else else
...@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) ...@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
smp_mb(); smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL; local_paca->kvm_hstate.kvm_split_mode = NULL;
} }
/*
* Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
* Can we inject a Decrementer or a External interrupt?
*/
void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
{
int ext;
unsigned long vec = 0;
unsigned long lpcr;
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
lpcr |= ext << LPCR_MER_SH;
mtspr(SPRN_LPCR, lpcr);
isync();
if (vcpu->arch.shregs.msr & MSR_EE) {
if (ext) {
vec = BOOK3S_INTERRUPT_EXTERNAL;
} else {
long int dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD))
dec = (int) dec;
if (dec < 0)
vec = BOOK3S_INTERRUPT_DECREMENTER;
}
}
if (vec) {
unsigned long msr, old_msr = vcpu->arch.shregs.msr;
kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
kvmppc_set_srr1(vcpu, old_msr);
kvmppc_set_pc(vcpu, vec);
msr = vcpu->arch.intr_msr;
if (MSR_TM_ACTIVE(old_msr))
msr |= MSR_TS_S;
vcpu->arch.shregs.msr = msr;
}
if (vcpu->arch.doorbell_request) {
mtspr(SPRN_DPDES, 1);
vcpu->arch.vcore->dpdes = 1;
smp_wmb();
vcpu->arch.doorbell_request = 0;
}
}
...@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION ...@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Save host PMU registers */ /* Save host PMU registers */
BEGIN_FTR_SECTION bl kvmhv_save_host_pmu
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31:
/* /*
* Put whatever is in the decrementer into the * Put whatever is in the decrementer into the
...@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ...@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, PPC_LR_STKOFF(r1) ld r0, PPC_LR_STKOFF(r1)
mtlr r0 mtlr r0
blr blr
_GLOBAL(kvmhv_save_host_pmu)
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
li r3, -1
clrrdi r3, r3, 10
mfspr r8, SPRN_MMCR2
mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
isync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3, 1
sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
isync
lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
cmpwi r5, 0
beq 31f /* skip if not */
mfspr r5, SPRN_MMCR1
mfspr r9, SPRN_SIAR
mfspr r10, SPRN_SDAR
std r7, HSTATE_MMCR0(r13)
std r5, HSTATE_MMCR1(r13)
std r6, HSTATE_MMCRA(r13)
std r9, HSTATE_SIAR(r13)
std r10, HSTATE_SDAR(r13)
BEGIN_FTR_SECTION
mfspr r9, SPRN_SIER
std r8, HSTATE_MMCR2(r13)
std r9, HSTATE_SIER(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r3, SPRN_PMC1
mfspr r5, SPRN_PMC2
mfspr r6, SPRN_PMC3
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
stw r3, HSTATE_PMC1(r13)
stw r5, HSTATE_PMC2(r13)
stw r6, HSTATE_PMC3(r13)
stw r7, HSTATE_PMC4(r13)
stw r8, HSTATE_PMC5(r13)
stw r9, HSTATE_PMC6(r13)
31: blr
This diff is collapsed.
...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void) ...@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1; local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void) void kvmppc_subcore_exit_guest(void)
{ {
...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void) ...@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0; local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
} }
EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void) static bool kvmppc_tb_resync_required(void)
{ {
...@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void) ...@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
} else { } else {
wait_for_tb_resync(); wait_for_tb_resync();
} }
/*
* Reset tb_offset_applied so the guest exit code won't try
* to subtract the previous timebase offset from the timebase.
*/
if (local_paca->kvm_hstate.kvm_vcore)
local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
return 0; return 0;
} }
...@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
/* Mark the target VCPU as having an interrupt pending */ /* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++; vcpu->stat.queue_intr++;
set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */ /* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) { if (vcpu == this_vcpu) {
...@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{ {
/* Note: Only called on self ! */ /* Note: Only called on self ! */
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
&vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
} }
...@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) ...@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys; void __iomem *xics_phys;
int64_t rc; int64_t rc;
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
iosync();
plpar_hcall_raw(H_EOI, retbuf, hwirq);
return;
}
rc = pnv_opal_pci_msi_eoi(c, hwirq); rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc) if (rc)
......
This diff is collapsed.
...@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
return RESUME_GUEST; return RESUME_GUEST;
} }
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
/* L=1 => tresume, L=0 => tsuspend */ /* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) { if (instr & (1 << 21)) {
...@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_from_checkpoint(vcpu); copy_from_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST; return RESUME_GUEST;
...@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) ...@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_to_checkpoint(vcpu); copy_to_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */ /* Set CR0 to indicate previous transactional state */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr = msr | MSR_TS_S; vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST; return RESUME_GUEST;
......
...@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) ...@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
if (instr & (1 << 21)) if (instr & (1 << 21))
vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
/* Set CR0 to 0b0010 */ /* Set CR0 to 0b0010 */
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
0x20000000;
return 1; return 1;
} }
...@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) ...@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
vcpu->arch.regs.nip = vcpu->arch.tfhar; vcpu->arch.regs.nip = vcpu->arch.tfhar;
copy_from_checkpoint(vcpu); copy_from_checkpoint(vcpu);
vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
} }
...@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu) ...@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
svcpu->gpr[11] = vcpu->arch.regs.gpr[11]; svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
svcpu->gpr[12] = vcpu->arch.regs.gpr[12]; svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
svcpu->gpr[13] = vcpu->arch.regs.gpr[13]; svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
svcpu->cr = vcpu->arch.cr; svcpu->cr = vcpu->arch.regs.ccr;
svcpu->xer = vcpu->arch.regs.xer; svcpu->xer = vcpu->arch.regs.xer;
svcpu->ctr = vcpu->arch.regs.ctr; svcpu->ctr = vcpu->arch.regs.ctr;
svcpu->lr = vcpu->arch.regs.link; svcpu->lr = vcpu->arch.regs.link;
...@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu) ...@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
vcpu->arch.regs.gpr[11] = svcpu->gpr[11]; vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
vcpu->arch.regs.gpr[12] = svcpu->gpr[12]; vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
vcpu->arch.regs.gpr[13] = svcpu->gpr[13]; vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
vcpu->arch.cr = svcpu->cr; vcpu->arch.regs.ccr = svcpu->cr;
vcpu->arch.regs.xer = svcpu->xer; vcpu->arch.regs.xer = svcpu->xer;
vcpu->arch.regs.ctr = svcpu->ctr; vcpu->arch.regs.ctr = svcpu->ctr;
vcpu->arch.regs.link = svcpu->lr; vcpu->arch.regs.link = svcpu->lr;
...@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST; r = RESUME_GUEST;
break; break;
case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
case BOOK3S_INTERRUPT_EXTERNAL_HV: case BOOK3S_INTERRUPT_EXTERNAL_HV:
case BOOK3S_INTERRUPT_H_VIRT: case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++; vcpu->stat.ext_intr_exits++;
......
...@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp, ...@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
*/ */
if (new.out_ee) { if (new.out_ee) {
kvmppc_book3s_queue_irqprio(icp->vcpu, kvmppc_book3s_queue_irqprio(icp->vcpu,
BOOK3S_INTERRUPT_EXTERNAL_LEVEL); BOOK3S_INTERRUPT_EXTERNAL);
if (!change_self) if (!change_self)
kvmppc_fast_vcpu_kick(icp->vcpu); kvmppc_fast_vcpu_kick(icp->vcpu);
} }
...@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) ...@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
u32 xirr; u32 xirr;
/* First, remove EE from the processor */ /* First, remove EE from the processor */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
/* /*
* ICP State: Accept_Interrupt * ICP State: Accept_Interrupt
...@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) ...@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* We can remove EE from the current processor, the update * We can remove EE from the current processor, the update
* transaction will set it again if needed * transaction will set it again if needed
*/ */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
do { do {
old_state = new_state = READ_ONCE(icp->state); old_state = new_state = READ_ONCE(icp->state);
...@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) ...@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* Deassert the CPU interrupt request. * Deassert the CPU interrupt request.
* icp_try_update will reassert it if necessary. * icp_try_update will reassert it if necessary.
*/ */
kvmppc_book3s_dequeue_irqprio(icp->vcpu, kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
/* /*
* Note that if we displace an interrupt from old_state.xisr, * Note that if we displace an interrupt from old_state.xisr,
...@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) ...@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
} }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (cpu_has_feature(CPU_FTR_ARCH_206)) { if (cpu_has_feature(CPU_FTR_ARCH_206) &&
cpu_has_feature(CPU_FTR_HVMODE)) {
/* Enable real mode support */ /* Enable real mode support */
xics->real_mode = ENABLE_REALMODE; xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE; xics->real_mode_dbg = DEBUG_REALMODE;
......
...@@ -61,6 +61,69 @@ ...@@ -61,6 +61,69 @@
*/ */
#define XIVE_Q_GAP 2 #define XIVE_Q_GAP 2
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurious wakeup on the next H_CEDE, which is not an
* issue.
*/
vcpu->arch.irq_pending = 0;
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
/*
* We have a possible subtle race here: The escalation
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
* theorical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
* Fortunately, solving this is rather easy. If the
* above load setting PQ to 01 returns a previous
* value where P is set, then we know the escalation
* interrupt is somewhere on its way to the host. In
* that case we simply don't clear the xive_esc_on
* flag below. It will be eventually cleared by the
* handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again
* before re-enabling the escalation interrupt, and if
* set, we abort the cede.
*/
if (!(pq & XIVE_ESB_VAL_P))
/* Now P is 0, we can clear the flag */
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/* /*
* This is a simple trigger for a generic XIVE IRQ. This must * This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page * only be called for interrupts that support a trigger page
......
...@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu) ...@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
/* First collect pending bits from HW */ /* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc); GLUE(X_PFX,ack_pending)(xc);
/*
* Cleanup the old-style bits if needed (they may have been
* set by pull or an escalation interrupts).
*/
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
&vcpu->arch.pending_exceptions);
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n", pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr); xc->pending, xc->hw_cppr, xc->cppr);
......
...@@ -182,7 +182,7 @@ ...@@ -182,7 +182,7 @@
*/ */
PPC_LL r4, PACACURRENT(r13) PPC_LL r4, PACACURRENT(r13)
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4) PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
stw r10, VCPU_CR(r4) PPC_STL r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4) PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4) PPC_STL r5, VCPU_GPR(R5)(r4)
PPC_STL r6, VCPU_GPR(R6)(r4) PPC_STL r6, VCPU_GPR(R6)(r4)
...@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) ...@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11) PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, THREAD_NORMSAVE(0)(r10) PPC_LL r4, THREAD_NORMSAVE(0)(r10)
PPC_STL r5, VCPU_GPR(R5)(r11) PPC_STL r5, VCPU_GPR(R5)(r11)
stw r13, VCPU_CR(r11) PPC_STL r13, VCPU_CR(r11)
mfspr r5, \srr0 mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R10)(r11) PPC_STL r3, VCPU_GPR(R10)(r11)
PPC_LL r3, THREAD_NORMSAVE(2)(r10) PPC_LL r3, THREAD_NORMSAVE(2)(r10)
...@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) ...@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11) PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, GPR9(r8) PPC_LL r4, GPR9(r8)
PPC_STL r5, VCPU_GPR(R5)(r11) PPC_STL r5, VCPU_GPR(R5)(r11)
stw r9, VCPU_CR(r11) PPC_STL r9, VCPU_CR(r11)
mfspr r5, \srr0 mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R8)(r11) PPC_STL r3, VCPU_GPR(R8)(r11)
PPC_LL r3, GPR10(r8) PPC_LL r3, GPR10(r8)
...@@ -643,7 +643,7 @@ lightweight_exit: ...@@ -643,7 +643,7 @@ lightweight_exit:
PPC_LL r3, VCPU_LR(r4) PPC_LL r3, VCPU_LR(r4)
PPC_LL r5, VCPU_XER(r4) PPC_LL r5, VCPU_XER(r4)
PPC_LL r6, VCPU_CTR(r4) PPC_LL r6, VCPU_CTR(r4)
lwz r7, VCPU_CR(r4) PPC_LL r7, VCPU_CR(r4)
PPC_LL r8, VCPU_PC(r4) PPC_LL r8, VCPU_PC(r4)
PPC_LD(r9, VCPU_SHARED_MSR, r11) PPC_LD(r9, VCPU_SHARED_MSR, r11)
PPC_LL r0, VCPU_GPR(R0)(r4) PPC_LL r0, VCPU_GPR(R0)(r4)
......
...@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) ...@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = EMULATE_FAIL; emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = vcpu->arch.shared->msr; vcpu->arch.regs.msr = vcpu->arch.shared->msr;
vcpu->arch.regs.ccr = vcpu->arch.cr;
if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
int type = op.type & INSTR_TYPE_MASK; int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type); int size = GETSIZE(op.type);
......
...@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && radix_enabled()); r = !!(hv_enabled && radix_enabled());
break; break;
case KVM_CAP_PPC_MMU_HASH_V3: case KVM_CAP_PPC_MMU_HASH_V3:
r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300)); r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
cpu_has_feature(CPU_FTR_HVMODE));
break;
case KVM_CAP_PPC_NESTED_HV:
r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
!kvmppc_hv_ops->enable_nested(NULL));
break; break;
#endif #endif
case KVM_CAP_SYNC_MMU: case KVM_CAP_SYNC_MMU:
...@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ...@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags); r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break; break;
} }
case KVM_CAP_PPC_NESTED_HV:
r = -EINVAL;
if (!is_kvmppc_hv_enabled(kvm) ||
!kvm->arch.kvm_ops->enable_nested)
break;
r = kvm->arch.kvm_ops->enable_nested(kvm);
break;
#endif #endif
default: default:
r = -EINVAL; r = -EINVAL;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment