Commit 61ec84f1 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-ppc-next' of...

Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc into HEAD

The highlights are:

* Enable VFIO device on PowerPC, from David Gibson
* Optimizations to speed up IPIs between vcpus in HV KVM,
  from Suresh Warrier (who is also Suresh E. Warrier)
* In-kernel handling of IOMMU hypercalls, and support for dynamic DMA
  windows (DDW), from Alexey Kardashevskiy.
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parents 433da860 58ded420
...@@ -3035,6 +3035,63 @@ Returns: 0 on success, -1 on error ...@@ -3035,6 +3035,63 @@ Returns: 0 on success, -1 on error
Queues an SMI on the thread's vcpu. Queues an SMI on the thread's vcpu.
4.97 KVM_CAP_PPC_MULTITCE
Capability: KVM_CAP_PPC_MULTITCE
Architectures: ppc
Type: vm
This capability means the kernel is capable of handling hypercalls
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
space. This significantly accelerates DMA operations for PPC KVM guests.
User space should expect that its handlers for these hypercalls
are not going to be called if user space previously registered LIOBN
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
user space might have to advertise it for the guest. For example,
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
present in the "ibm,hypertas-functions" device-tree property.
The hypercalls mentioned above may or may not be processed successfully
in the kernel based fast path. If they can not be handled by the kernel,
they will get passed on to user space. So user space still has to have
an implementation for these despite the in kernel acceleration.
This capability is always enabled.
4.98 KVM_CREATE_SPAPR_TCE_64
Capability: KVM_CAP_SPAPR_TCE_64
Architectures: powerpc
Type: vm ioctl
Parameters: struct kvm_create_spapr_tce_64 (in)
Returns: file descriptor for manipulating the created TCE table
This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
windows, described in 4.62 KVM_CREATE_SPAPR_TCE
This capability uses extended struct in ioctl interface:
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
The aim of extension is to support an additional bigger DMA window with
a variable page size.
KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
a bus offset of the corresponding DMA window, @size and @offset are numbers
of IOMMU pages.
@flags are not used at the moment.
The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
5. The kvm_run structure 5. The kvm_run structure
------------------------ ------------------------
......
...@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) ...@@ -33,8 +33,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
} }
#endif #endif
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif #endif
......
...@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table { ...@@ -182,7 +182,10 @@ struct kvmppc_spapr_tce_table {
struct list_head list; struct list_head list;
struct kvm *kvm; struct kvm *kvm;
u64 liobn; u64 liobn;
u32 window_size; struct rcu_head rcu;
u32 page_shift;
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct page *pages[0]; struct page *pages[0];
}; };
......
...@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, ...@@ -165,9 +165,25 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args); struct kvm_create_spapr_tce_64 *args);
extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
struct kvm_vcpu *vcpu, unsigned long liobn);
extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages);
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce); unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages);
extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba); unsigned long ioba);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
...@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) ...@@ -437,6 +453,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
} }
extern void kvmppc_alloc_host_rm_ops(void);
extern void kvmppc_free_host_rm_ops(void);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
...@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); ...@@ -445,7 +463,11 @@ extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu); struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xics_ipi_action(void);
extern int h_ipi_redirect;
#else #else
static inline void kvmppc_alloc_host_rm_ops(void) {};
static inline void kvmppc_free_host_rm_ops(void) {};
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; } { return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
...@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) ...@@ -459,6 +481,33 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; } { return 0; }
#endif #endif
/*
* Host-side operations we want to set up while running in real
* mode in the guest operating on the xics.
* Currently only VCPU wakeup is supported.
*/
union kvmppc_rm_state {
unsigned long raw;
struct {
u32 in_host;
u32 rm_action;
};
};
struct kvmppc_host_rm_core {
union kvmppc_rm_state rm_state;
void *rm_data;
char pad[112];
};
struct kvmppc_host_rm_ops {
struct kvmppc_host_rm_core *rm_core;
void (*vcpu_kick)(struct kvm_vcpu *vcpu);
};
extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu) static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{ {
#ifdef CONFIG_KVM_BOOKE_HV #ifdef CONFIG_KVM_BOOKE_HV
......
...@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, ...@@ -78,6 +78,9 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
} }
return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift); return __find_linux_pte_or_hugepte(pgdir, ea, is_thp, shift);
} }
unsigned long vmalloc_to_phys(void *vmalloc_addr);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */ #endif /* _ASM_POWERPC_PGTABLE_H */
...@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu); ...@@ -114,6 +114,9 @@ extern int cpu_to_core_id(int cpu);
#define PPC_MSG_TICK_BROADCAST 2 #define PPC_MSG_TICK_BROADCAST 2
#define PPC_MSG_DEBUGGER_BREAK 3 #define PPC_MSG_DEBUGGER_BREAK 3
/* This is only used by the powernv kernel */
#define PPC_MSG_RM_HOST_ACTION 4
/* for irq controllers that have dedicated ipis per message (4) */ /* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message); extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[]; extern const char *smp_ipi_name[];
...@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[]; ...@@ -121,6 +124,7 @@ extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */ /* for irq controllers with only a single ipi */
extern void smp_muxed_ipi_set_data(int cpu, unsigned long data); extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg); extern void smp_muxed_ipi_message_pass(int cpu, int msg);
extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void); extern irqreturn_t smp_ipi_demux(void);
void smp_init_pSeries(void); void smp_init_pSeries(void);
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#ifdef CONFIG_PPC_ICP_NATIVE #ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void); extern int icp_native_init(void);
extern void icp_native_flush_interrupt(void); extern void icp_native_flush_interrupt(void);
extern void icp_native_cause_ipi_rm(int cpu);
#else #else
static inline int icp_native_init(void) { return -ENODEV; } static inline int icp_native_init(void) { return -ENODEV; }
#endif #endif
......
...@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce { ...@@ -333,6 +333,15 @@ struct kvm_create_spapr_tce {
__u32 window_size; __u32 window_size;
}; };
/* for KVM_CAP_SPAPR_TCE_64 */
struct kvm_create_spapr_tce_64 {
__u64 liobn;
__u32 page_shift;
__u32 flags;
__u64 offset; /* in pages */
__u64 size; /* in pages */
};
/* for KVM_ALLOCATE_RMA */ /* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma { struct kvm_allocate_rma {
__u64 rma_size; __u64 rma_size;
......
...@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg) ...@@ -206,7 +206,7 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI #ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages { struct cpu_messages {
int messages; /* current messages */ long messages; /* current messages */
unsigned long data; /* data for cause ipi */ unsigned long data; /* data for cause ipi */
}; };
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
...@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data) ...@@ -218,7 +218,7 @@ void smp_muxed_ipi_set_data(int cpu, unsigned long data)
info->data = data; info->data = data;
} }
void smp_muxed_ipi_message_pass(int cpu, int msg) void smp_muxed_ipi_set_message(int cpu, int msg)
{ {
struct cpu_messages *info = &per_cpu(ipi_message, cpu); struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages; char *message = (char *)&info->messages;
...@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) ...@@ -228,6 +228,13 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
*/ */
smp_mb(); smp_mb();
message[msg] = 1; message[msg] = 1;
}
void smp_muxed_ipi_message_pass(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
smp_muxed_ipi_set_message(cpu, msg);
/* /*
* cause_ipi functions are required to include a full barrier * cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI. * before doing whatever causes the IPI.
...@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) ...@@ -236,20 +243,31 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
} }
#ifdef __BIG_ENDIAN__ #ifdef __BIG_ENDIAN__
#define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else #else
#define IPI_MESSAGE(A) (1 << (8 * (A))) #define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif #endif
irqreturn_t smp_ipi_demux(void) irqreturn_t smp_ipi_demux(void)
{ {
struct cpu_messages *info = this_cpu_ptr(&ipi_message); struct cpu_messages *info = this_cpu_ptr(&ipi_message);
unsigned int all; unsigned long all;
mb(); /* order any irq clear */ mb(); /* order any irq clear */
do { do {
all = xchg(&info->messages, 0); all = xchg(&info->messages, 0);
#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
/*
* Must check for PPC_MSG_RM_HOST_ACTION messages
* before PPC_MSG_CALL_FUNCTION messages because when
* a VM is destroyed, we call kick_all_cpus_sync()
* to ensure that any pending PPC_MSG_RM_HOST_ACTION
* messages have completed before we free any VCPUs.
*/
if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
kvmppc_xics_ipi_action();
#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt(); generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
......
...@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm ...@@ -8,7 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm KVM := ../../../virt/kvm
common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/eventfd.o $(KVM)/vfio.o
CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu.o := -I.
CFLAGS_e500_mmu_host.o := -I. CFLAGS_e500_mmu_host.o := -I.
......
...@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) ...@@ -807,7 +807,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{ {
#ifdef CONFIG_PPC64 #ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens); INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif #endif
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
* *
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/ */
#include <linux/types.h> #include <linux/types.h>
...@@ -36,28 +37,69 @@ ...@@ -36,28 +37,69 @@
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/kvm_host.h> #include <asm/kvm_host.h>
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
}
static long kvmppc_stt_npages(unsigned long window_size) static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
{ {
return ALIGN((window_size >> SPAPR_TCE_SHIFT) unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
* sizeof(u64), PAGE_SIZE) / PAGE_SIZE; (tce_pages * sizeof(struct page *));
return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
} }
static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
{ {
struct kvm *kvm = stt->kvm; long ret = 0;
int i;
mutex_lock(&kvm->lock); if (!current || !current->mm)
list_del(&stt->list); return ret; /* process exited */
for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
down_write(&current->mm->mmap_sem);
if (inc) {
unsigned long locked, lock_limit;
locked = current->mm->locked_vm + stt_pages;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
ret = -ENOMEM;
else
current->mm->locked_vm += stt_pages;
} else {
if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
stt_pages = current->mm->locked_vm;
current->mm->locked_vm -= stt_pages;
}
pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
inc ? '+' : '-',
stt_pages << PAGE_SHIFT,
current->mm->locked_vm << PAGE_SHIFT,
rlimit(RLIMIT_MEMLOCK),
ret ? " - exceeded" : "");
up_write(&current->mm->mmap_sem);
return ret;
}
static void release_spapr_tce_table(struct rcu_head *head)
{
struct kvmppc_spapr_tce_table *stt = container_of(head,
struct kvmppc_spapr_tce_table, rcu);
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
__free_page(stt->pages[i]); __free_page(stt->pages[i]);
kfree(stt);
mutex_unlock(&kvm->lock);
kvm_put_kvm(kvm); kfree(stt);
} }
static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
...@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -65,7 +107,7 @@ static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
struct page *page; struct page *page;
if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
page = stt->pages[vmf->pgoff]; page = stt->pages[vmf->pgoff];
...@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) ...@@ -88,7 +130,14 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{ {
struct kvmppc_spapr_tce_table *stt = filp->private_data; struct kvmppc_spapr_tce_table *stt = filp->private_data;
release_spapr_tce_table(stt); list_del_rcu(&stt->list);
kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit(
kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
call_rcu(&stt->rcu, release_spapr_tce_table);
return 0; return 0;
} }
...@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = { ...@@ -98,20 +147,29 @@ static const struct file_operations kvm_spapr_tce_fops = {
}; };
long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args) struct kvm_create_spapr_tce_64 *args)
{ {
struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *stt = NULL;
long npages; unsigned long npages, size;
int ret = -ENOMEM; int ret = -ENOMEM;
int i; int i;
if (!args->size)
return -EINVAL;
/* Check this LIOBN hasn't been previously allocated */ /* Check this LIOBN hasn't been previously allocated */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
if (stt->liobn == args->liobn) if (stt->liobn == args->liobn)
return -EBUSY; return -EBUSY;
} }
npages = kvmppc_stt_npages(args->window_size); size = args->size;
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
stt = NULL;
goto fail;
}
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL); GFP_KERNEL);
...@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -119,7 +177,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail; goto fail;
stt->liobn = args->liobn; stt->liobn = args->liobn;
stt->window_size = args->window_size; stt->page_shift = args->page_shift;
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm; stt->kvm = kvm;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
...@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -131,7 +191,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
kvm_get_kvm(kvm); kvm_get_kvm(kvm);
mutex_lock(&kvm->lock); mutex_lock(&kvm->lock);
list_add(&stt->list, &kvm->arch.spapr_tce_tables); list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
...@@ -148,3 +208,59 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, ...@@ -148,3 +208,59 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
} }
return ret; return ret;
} }
long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS, idx;
unsigned long entry, ua = 0;
u64 __user *tces, tce;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
entry = ioba >> stt->page_shift;
/*
* SPAPR spec says that the maximum size of the list is 512 TCEs
* so the whole table fits in 4K page
*/
if (npages > 512)
return H_PARAMETER;
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tces = (u64 __user *) ua;
for (i = 0; i < npages; ++i) {
if (get_user(tce, tces + i)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
tce = be64_to_cpu(tce);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
* *
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com> * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/ */
#include <linux/types.h> #include <linux/types.h>
...@@ -30,76 +31,321 @@ ...@@ -30,76 +31,321 @@
#include <asm/kvm_ppc.h> #include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h> #include <asm/kvm_book3s.h>
#include <asm/mmu-hash64.h> #include <asm/mmu-hash64.h>
#include <asm/mmu_context.h>
#include <asm/hvcall.h> #include <asm/hvcall.h>
#include <asm/synch.h> #include <asm/synch.h>
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include <asm/kvm_host.h> #include <asm/kvm_host.h>
#include <asm/udbg.h> #include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
/* WARNING: This will be called in real-mode on HV KVM and virtual /*
* Finds a TCE table descriptor by LIOBN.
*
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM * mode on PR KVM
*/ */
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
unsigned long ioba, unsigned long tce) unsigned long liobn)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt; struct kvmppc_spapr_tce_table *stt;
list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
if (stt->liobn == liobn)
return stt;
return NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_find_table);
/*
* Validates IO address.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long ioba, unsigned long npages)
{
unsigned long mask = (1ULL << stt->page_shift) - 1;
unsigned long idx = ioba >> stt->page_shift;
if ((ioba & mask) || (idx < stt->offset) ||
(idx - stt->offset + npages > stt->size) ||
(idx + npages < idx))
return H_PARAMETER;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
* As the host kernel does not access those addresses (just puts them
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
{
unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
if (tce & mask)
return H_PARAMETER;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
/* Note on the use of page_address() in real mode,
*
* It is safe to use page_address() in real mode on ppc64 because
* page_address() is always defined as lowmem_page_address()
* which returns __va(PFN_PHYS(page_to_pfn(page))) which is arithmetic
* operation and does not access page struct.
*
* Theoretically page_address() could be defined different
* but either WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL
* would have to be enabled.
* WANT_PAGE_VIRTUAL is never enabled on ppc32/ppc64,
* HASHED_PAGE_VIRTUAL could be enabled for ppc32 only and only
* if CONFIG_HIGHMEM is defined. As CONFIG_SPARSEMEM_VMEMMAP
* is not expected to be enabled on ppc32, page_address()
* is safe for ppc32 as well.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
static u64 *kvmppc_page_address(struct page *page)
{
#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
#error TODO: fix to avoid page_address() here
#endif
return (u64 *) page_address(page);
}
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
* Called in both real and virtual modes.
* Cannot fail so kvmppc_tce_validate must be called before it.
*
* WARNING: This will be called in real-mode on HV KVM and virtual
* mode on PR KVM
*/
void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
u64 *tbl;
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
unsigned long *ua, unsigned long **prmap)
{
unsigned long gfn = gpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn);
if (!memslot)
return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) |
(gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap)
*prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
#endif
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
long ret;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */ /* liobn, ioba, tce); */
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { if (!stt)
if (stt->liobn == liobn) { return H_TOO_HARD;
unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
struct page *page; ret = kvmppc_ioba_validate(stt, ioba, 1);
u64 *tbl; if (ret != H_SUCCESS)
return ret;
/* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
/* liobn, stt, stt->window_size); */
if (ioba >= stt->window_size)
return H_PARAMETER;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = (u64 *)page_address(page);
/* FIXME: Need to validate the TCE itself */
/* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
tbl[idx % TCES_PER_PAGE] = tce;
return H_SUCCESS;
}
}
/* Didn't find the liobn, punt it to userspace */ ret = kvmppc_tce_validate(stt, tce);
return H_TOO_HARD; if (ret != H_SUCCESS)
return ret;
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
return H_SUCCESS;
} }
EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu,
unsigned long ioba) unsigned long ua, unsigned long *phpa)
{
pte_t *ptep, pte;
unsigned shift = 0;
ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift);
if (!ptep || !pte_present(*ptep))
return -ENXIO;
pte = *ptep;
if (!shift)
shift = PAGE_SHIFT;
/* Avoid handling anything potentially complicated in realmode */
if (shift > PAGE_SHIFT)
return -EAGAIN;
if (!pte_young(pte))
return -EAGAIN;
*phpa = (pte_pfn(pte) << PAGE_SHIFT) | (ua & ((1ULL << shift) - 1)) |
(ua & ~PAGE_MASK);
return 0;
}
long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_list, unsigned long npages)
{ {
struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt; struct kvmppc_spapr_tce_table *stt;
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
unsigned long *rmap = NULL;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
entry = ioba >> stt->page_shift;
/*
* The spec says that the maximum size of the list is 512 TCEs
* so the whole table addressed resides in 4K page
*/
if (npages > 512)
return H_PARAMETER;
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
if (stt->liobn == liobn) { return H_TOO_HARD;
unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
struct page *page;
u64 *tbl;
if (ioba >= stt->window_size) rmap = (void *) vmalloc_to_phys(rmap);
return H_PARAMETER;
page = stt->pages[idx / TCES_PER_PAGE]; /*
tbl = (u64 *)page_address(page); * Synchronize with the MMU notifier callbacks in
* book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
* While we have the rmap lock, code running on other CPUs
* cannot finish unmapping the host real page that backs
* this guest real page, so we are OK to access the host
* real page.
*/
lock_rmap(rmap);
if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE]; kvmppc_tce_put(stt, entry + i, tce);
return H_SUCCESS;
}
} }
/* Didn't find the liobn, punt it to userspace */ unlock_exit:
return H_TOO_HARD; unlock_rmap(rmap);
return ret;
}
long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long liobn, unsigned long ioba,
unsigned long tce_value, unsigned long npages)
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
stt = kvmppc_find_table(vcpu, liobn);
if (!stt)
return H_TOO_HARD;
ret = kvmppc_ioba_validate(stt, ioba, npages);
if (ret != H_SUCCESS)
return ret;
/* Check permission bits only to allow userspace poison TCE for debug */
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba)
{
struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
long ret;
unsigned long idx;
struct page *page;
u64 *tbl;
if (!stt)
return H_TOO_HARD;
ret = kvmppc_ioba_validate(stt, ioba, 1);
if (ret != H_SUCCESS)
return ret;
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
tbl = (u64 *)page_address(page);
vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
return H_SUCCESS;
} }
EXPORT_SYMBOL_GPL(kvmppc_h_get_tce); EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
#endif /* KVM_BOOK3S_HV_POSSIBLE */
...@@ -81,6 +81,17 @@ static int target_smt_mode; ...@@ -81,6 +81,17 @@ static int target_smt_mode;
module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
#ifdef CONFIG_KVM_XICS
static struct kernel_param_ops module_param_ops = {
.set = param_set_int,
.get = param_get_int,
};
module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
...@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ...@@ -768,7 +779,31 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (kvmppc_xics_enabled(vcpu)) { if (kvmppc_xics_enabled(vcpu)) {
ret = kvmppc_xics_hcall(vcpu, req); ret = kvmppc_xics_hcall(vcpu, req);
break; break;
} /* fallthrough */ }
return RESUME_HOST;
case H_PUT_TCE:
ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_PUT_TCE_INDIRECT:
ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_STUFF_TCE:
ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
default: default:
return RESUME_HOST; return RESUME_HOST;
} }
...@@ -2278,6 +2313,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) ...@@ -2278,6 +2313,46 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
spin_unlock(&vc->lock); spin_unlock(&vc->lock);
} }
/*
* Clear core from the list of active host cores as we are about to
* enter the guest. Only do this if it is the primary thread of the
* core (not if a subcore) that is entering the guest.
*/
static inline void kvmppc_clear_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here as we will do a smp_wmb()
* later in kvmppc_start_thread and we need ensure that state is
* visible to other CPUs only after we enter guest.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
}
/*
* Advertise this core as an active host core since we exited the guest
* Only need to do this if it is the primary thread of the core that is
* exiting.
*/
static inline void kvmppc_set_host_core(int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
return;
/*
* Memory barrier can be omitted here because we do a spin_unlock
* immediately after this which provides the memory barrier.
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
}
/* /*
* Run a set of guest threads on a physical core. * Run a set of guest threads on a physical core.
* Called with vc->lock held. * Called with vc->lock held.
...@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2390,6 +2465,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
} }
} }
kvmppc_clear_host_core(pcpu);
/* Start all the threads */ /* Start all the threads */
active = 0; active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) { for (sub = 0; sub < core_info.n_subcores; ++sub) {
...@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2486,6 +2563,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_ipi_thread(pcpu + i); kvmppc_ipi_thread(pcpu + i);
} }
kvmppc_set_host_core(pcpu);
spin_unlock(&vc->lock); spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */ /* make sure updates to secondary vcpu structs are visible now */
...@@ -2984,6 +3063,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -2984,6 +3063,114 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto out_srcu; goto out_srcu;
} }
#ifdef CONFIG_KVM_XICS
static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
void *hcpu)
{
unsigned long cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
kvmppc_set_host_core(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
kvmppc_clear_host_core(cpu);
break;
#endif
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block kvmppc_cpu_notifier = {
.notifier_call = kvmppc_cpu_notify,
};
/*
* Allocate a per-core structure for managing state about which cores are
* running in the host versus the guest and for exchanging data between
* real mode KVM and CPU running in the host.
* This is only done for the first VM.
* The allocated structure stays even if all VMs have stopped.
* It is only freed when the kvm-hv module is unloaded.
* It's OK for this routine to fail, we just don't support host
* core operations like redirecting H_IPI wakeups.
*/
void kvmppc_alloc_host_rm_ops(void)
{
struct kvmppc_host_rm_ops *ops;
unsigned long l_ops;
int cpu, core;
int size;
/* Not the first time here ? */
if (kvmppc_host_rm_ops_hv != NULL)
return;
ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
if (!ops)
return;
size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
ops->rm_core = kzalloc(size, GFP_KERNEL);
if (!ops->rm_core) {
kfree(ops);
return;
}
get_online_cpus();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
continue;
core = cpu >> threads_shift;
ops->rm_core[core].rm_state.in_host = 1;
}
ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
/*
* Make the contents of the kvmppc_host_rm_ops structure visible
* to other CPUs before we assign it to the global variable.
* Do an atomic assignment (no locks used here), but if someone
* beats us to it, just free our copy and return.
*/
smp_wmb();
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
put_online_cpus();
kfree(ops->rm_core);
kfree(ops);
return;
}
register_cpu_notifier(&kvmppc_cpu_notifier);
put_online_cpus();
}
void kvmppc_free_host_rm_ops(void)
{
if (kvmppc_host_rm_ops_hv) {
unregister_cpu_notifier(&kvmppc_cpu_notifier);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
}
}
#endif
static int kvmppc_core_init_vm_hv(struct kvm *kvm) static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{ {
unsigned long lpcr, lpid; unsigned long lpcr, lpid;
...@@ -2996,6 +3183,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) ...@@ -2996,6 +3183,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
return -ENOMEM; return -ENOMEM;
kvm->arch.lpid = lpid; kvm->arch.lpid = lpid;
kvmppc_alloc_host_rm_ops();
/* /*
* Since we don't flush the TLB when tearing down a VM, * Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used, * and this lpid might have previously been used,
...@@ -3229,6 +3418,7 @@ static int kvmppc_book3s_init_hv(void) ...@@ -3229,6 +3418,7 @@ static int kvmppc_book3s_init_hv(void)
static void kvmppc_book3s_exit_hv(void) static void kvmppc_book3s_exit_hv(void)
{ {
kvmppc_free_host_rm_ops();
kvmppc_hv_ops = NULL; kvmppc_hv_ops = NULL;
} }
......
...@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap) ...@@ -283,3 +283,6 @@ void kvmhv_commence_exit(int trap)
kvmhv_interrupt_vcore(vc, ee); kvmhv_interrupt_vcore(vc, ee);
} }
} }
struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
...@@ -17,12 +17,16 @@ ...@@ -17,12 +17,16 @@
#include <asm/xics.h> #include <asm/xics.h>
#include <asm/debug.h> #include <asm/debug.h>
#include <asm/synch.h> #include <asm/synch.h>
#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h> #include <asm/ppc-opcode.h>
#include "book3s_xics.h" #include "book3s_xics.h"
#define DEBUG_PASSUP #define DEBUG_PASSUP
int h_ipi_redirect = 1;
EXPORT_SYMBOL(h_ipi_redirect);
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq); u32 new_irq);
...@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics, ...@@ -50,11 +54,84 @@ static void ics_rm_check_resend(struct kvmppc_xics *xics,
/* -- ICP routines -- */ /* -- ICP routines -- */
#ifdef CONFIG_SMP
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
{
int hcpu;
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
icp_native_cause_ipi_rm(hcpu);
}
#else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
#endif
/*
* We start the search from our current CPU Id in the core map
* and go in a circle until we get back to our ID looking for a
* core that is running in host context and that hasn't already
* been targeted for another rm_host_ops.
*
* In the future, could consider using a fairer algorithm (one
* that distributes the IPIs better)
*
* Returns -1, if no CPU could be found in the host
* Else, returns a CPU Id which has been reserved for use
*/
static inline int grab_next_hostcore(int start,
struct kvmppc_host_rm_core *rm_core, int max, int action)
{
bool success;
int core;
union kvmppc_rm_state old, new;
for (core = start + 1; core < max; core++) {
old = new = READ_ONCE(rm_core[core].rm_state);
if (!old.in_host || old.rm_action)
continue;
/* Try to grab this host core if not taken already. */
new.rm_action = action;
success = cmpxchg64(&rm_core[core].rm_state.raw,
old.raw, new.raw) == old.raw;
if (success) {
/*
* Make sure that the store to the rm_action is made
* visible before we return to caller (and the
* subsequent store to rm_data) to synchronize with
* the IPI handler.
*/
smp_wmb();
return core;
}
}
return -1;
}
static inline int find_available_hostcore(int action)
{
int core;
int my_core = smp_processor_id() >> threads_shift;
struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
if (core == -1)
core = grab_next_hostcore(core, rm_core, my_core, action);
return core;
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu) struct kvm_vcpu *this_vcpu)
{ {
struct kvmppc_icp *this_icp = this_vcpu->arch.icp; struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
int cpu; int cpu;
int hcore;
/* Mark the target VCPU as having an interrupt pending */ /* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++; vcpu->stat.queue_intr++;
...@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, ...@@ -66,11 +143,22 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return; return;
} }
/* Check if the core is loaded, if not, too hard */ /*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
* if we can't find one, set up state to eventually return too hard.
*/
cpu = vcpu->arch.thread_cpu; cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) { if (cpu < 0 || cpu >= nr_cpu_ids) {
this_icp->rm_action |= XICS_RM_KICK_VCPU; hcore = -1;
this_icp->rm_kick_target = vcpu; if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
if (hcore != -1) {
icp_send_hcore_msg(hcore, vcpu);
} else {
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
}
return; return;
} }
...@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) ...@@ -623,3 +711,40 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
bail: bail:
return check_too_hard(xics, icp); return check_too_hard(xics, icp);
} }
/* --- Non-real mode XICS-related built-in routines --- */
/**
* Host Operations poked by RM KVM
*/
static void rm_host_ipi_action(int action, void *data)
{
switch (action) {
case XICS_RM_KICK_VCPU:
kvmppc_host_rm_ops_hv->vcpu_kick(data);
break;
default:
WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
break;
}
}
void kvmppc_xics_ipi_action(void)
{
int core;
unsigned int cpu = smp_processor_id();
struct kvmppc_host_rm_core *rm_corep;
core = cpu >> threads_shift;
rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
if (rm_corep->rm_data) {
rm_host_ipi_action(rm_corep->rm_state.rm_action,
rm_corep->rm_data);
/* Order these stores against the real mode KVM */
rm_corep->rm_data = NULL;
smp_wmb();
rm_corep->rm_state.rm_action = 0;
}
}
...@@ -2006,8 +2006,8 @@ hcall_real_table: ...@@ -2006,8 +2006,8 @@ hcall_real_table:
.long 0 /* 0x12c */ .long 0 /* 0x12c */
.long 0 /* 0x130 */ .long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
.long 0 /* 0x138 */ .long DOTSYM(kvmppc_h_stuff_tce) - hcall_real_table
.long 0 /* 0x13c */ .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
.long 0 /* 0x140 */ .long 0 /* 0x140 */
.long 0 /* 0x144 */ .long 0 /* 0x144 */
.long 0 /* 0x148 */ .long 0 /* 0x148 */
......
...@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) ...@@ -280,6 +280,37 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
return EMULATE_DONE; return EMULATE_DONE;
} }
static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
tce, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
unsigned long npages = kvmppc_get_gpr(vcpu, 7);
long rc;
rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ {
long rc = kvmppc_xics_hcall(vcpu, cmd); long rc = kvmppc_xics_hcall(vcpu, cmd);
...@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) ...@@ -306,6 +337,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu); return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE: case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu); return kvmppc_h_pr_put_tce(vcpu);
case H_PUT_TCE_INDIRECT:
return kvmppc_h_pr_put_tce_indirect(vcpu);
case H_STUFF_TCE:
return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE: case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu); kvm_vcpu_block(vcpu);
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/cputhreads.h> #include <asm/cputhreads.h>
#include <asm/irqflags.h> #include <asm/irqflags.h>
#include <asm/iommu.h>
#include "timing.h" #include "timing.h"
#include "irq.h" #include "irq.h"
#include "../mm/mmu_decl.h" #include "../mm/mmu_decl.h"
...@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -437,6 +438,16 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
unsigned int i; unsigned int i;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
#ifdef CONFIG_KVM_XICS
/*
* We call kick_all_cpus_sync() to ensure that all
* CPUs have executed any pending IPIs before we
* continue and free VCPUs structures below.
*/
if (is_kvmppc_hv_enabled(kvm))
kick_all_cpus_sync();
#endif
kvm_for_each_vcpu(i, vcpu, kvm) kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_free(vcpu); kvm_arch_vcpu_free(vcpu);
...@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -509,6 +520,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_FIXUP_HCALL:
...@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -569,6 +581,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_SMMU_INFO: case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1; r = 1;
break; break;
case KVM_CAP_SPAPR_MULTITCE:
r = 1;
break;
#endif #endif
default: default:
r = 0; r = 0;
...@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp, ...@@ -1331,13 +1346,34 @@ long kvm_arch_vm_ioctl(struct file *filp,
break; break;
} }
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
goto out;
if (create_tce_64.flags) {
r = -EINVAL;
goto out;
}
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
case KVM_CREATE_SPAPR_TCE: { case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce; struct kvm_create_spapr_tce create_tce;
struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT; r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce))) if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out; goto out;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
create_tce_64.liobn = create_tce.liobn;
create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
create_tce_64.offset = 0;
create_tce_64.size = create_tce.window_size >>
IOMMU_PAGE_SHIFT_4K;
create_tce_64.flags = 0;
r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out; goto out;
} }
case KVM_PPC_GET_SMMU_INFO: { case KVM_PPC_GET_SMMU_INFO: {
......
...@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) ...@@ -243,3 +243,11 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
} }
#endif /* CONFIG_DEBUG_VM */ #endif /* CONFIG_DEBUG_VM */
unsigned long vmalloc_to_phys(void *va)
{
unsigned long pfn = vmalloc_to_pfn(va);
BUG_ON(!pfn);
return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
}
EXPORT_SYMBOL_GPL(vmalloc_to_phys);
...@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event) ...@@ -493,14 +493,6 @@ static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
} }
} }
static unsigned long vmalloc_to_phys(void *v)
{
struct page *p = vmalloc_to_page(v);
BUG_ON(!p);
return page_to_phys(p) + offset_in_page(v);
}
/* */ /* */
struct event_uniq { struct event_uniq {
struct rb_node node; struct rb_node node;
......
...@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) ...@@ -159,6 +159,27 @@ static void icp_native_cause_ipi(int cpu, unsigned long data)
icp_native_set_qirr(cpu, IPI_PRIORITY); icp_native_set_qirr(cpu, IPI_PRIORITY);
} }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void icp_native_cause_ipi_rm(int cpu)
{
/*
* Currently not used to send IPIs to another CPU
* on the same core. Only caller is KVM real mode.
* Need the physical address of the XICS to be
* previously saved in kvm_hstate in the paca.
*/
unsigned long xics_phys;
/*
* Just like the cause_ipi functions, it is required to
* include a full barrier (out8 includes a sync) before
* causing the IPI.
*/
xics_phys = paca[cpu].kvm_hstate.xics_phys;
out_rm8((u8 *)(xics_phys + XICS_MFRR), IPI_PRIORITY);
}
#endif
/* /*
* Called when an interrupt is received on an off-line CPU to * Called when an interrupt is received on an off-line CPU to
* clear the interrupt, so that the CPU can go back to nap mode. * clear the interrupt, so that the CPU can go back to nap mode.
......
...@@ -862,6 +862,7 @@ struct kvm_ppc_smmu_info { ...@@ -862,6 +862,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
#define KVM_CAP_HYPERV_SYNIC 123 #define KVM_CAP_HYPERV_SYNIC 123
#define KVM_CAP_S390_RI 124 #define KVM_CAP_S390_RI 124
#define KVM_CAP_SPAPR_TCE_64 125
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
...@@ -1154,6 +1155,8 @@ struct kvm_s390_ucas_mapping { ...@@ -1154,6 +1155,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PPC_ALLOC_HTAB */ /* Available with KVM_CAP_PPC_ALLOC_HTAB */
#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \
struct kvm_create_spapr_tce_64)
/* Available with KVM_CAP_RMA */ /* Available with KVM_CAP_RMA */
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
/* Available with KVM_CAP_PPC_HTAB_FD */ /* Available with KVM_CAP_PPC_HTAB_FD */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment