Commit 9c6eb531 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-s390-next-5.16-1' of...

Merge tag 'kvm-s390-next-5.16-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD

KVM: s390: Fixes and Features for 5.16

- SIGP Fixes
- initial preparations for lazy destroy of secure VMs
- storage key improvements/fixes
- Log the guest CPNC
parents 7c8de080 3fd8417f
......@@ -1074,8 +1074,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pte_t res;
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
return res;
}
......@@ -1091,8 +1092,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
pte_t res;
res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(vma->vm_mm) && pte_present(res))
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
return res;
}
......@@ -1116,8 +1118,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
} else {
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
uv_convert_from_secure(pte_val(res) & PAGE_MASK);
uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
return res;
}
......
......@@ -18,6 +18,11 @@
#include <asm/page.h>
#include <asm/gmap.h>
#define UVC_CC_OK 0
#define UVC_CC_ERROR 1
#define UVC_CC_BUSY 2
#define UVC_CC_PARTIAL 3
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
#define UVC_RC_INV_STATE 0x0003
......@@ -351,8 +356,9 @@ static inline int is_prot_virt_host(void)
}
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int uv_destroy_page(unsigned long paddr);
int uv_destroy_owned_page(unsigned long paddr);
int uv_convert_from_secure(unsigned long paddr);
int uv_convert_owned_from_secure(unsigned long paddr);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
void setup_uv(void);
......@@ -360,7 +366,7 @@ void setup_uv(void);
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
static inline int uv_destroy_page(unsigned long paddr)
static inline int uv_destroy_owned_page(unsigned long paddr)
{
return 0;
}
......@@ -369,6 +375,11 @@ static inline int uv_convert_from_secure(unsigned long paddr)
{
return 0;
}
static inline int uv_convert_owned_from_secure(unsigned long paddr)
{
return 0;
}
#endif
#endif /* _ASM_S390_UV_H */
......@@ -100,7 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
*
* @paddr: Absolute host address of page to be destroyed
*/
int uv_destroy_page(unsigned long paddr)
static int uv_destroy_page(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
......@@ -120,6 +120,22 @@ int uv_destroy_page(unsigned long paddr)
return 0;
}
/*
* The caller must already hold a reference to the page
*/
int uv_destroy_owned_page(unsigned long paddr)
{
struct page *page = phys_to_page(paddr);
int rc;
get_page(page);
rc = uv_destroy_page(paddr);
if (!rc)
clear_bit(PG_arch_1, &page->flags);
put_page(page);
return rc;
}
/*
* Requests the Ultravisor to encrypt a guest page and make it
* accessible to the host for paging (export).
......@@ -139,6 +155,22 @@ int uv_convert_from_secure(unsigned long paddr)
return 0;
}
/*
* The caller must already hold a reference to the page
*/
int uv_convert_owned_from_secure(unsigned long paddr)
{
struct page *page = phys_to_page(paddr);
int rc;
get_page(page);
rc = uv_convert_from_secure(paddr);
if (!rc)
clear_bit(PG_arch_1, &page->flags);
put_page(page);
return rc;
}
/*
* Calculate the expected ref_count for a page that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
......@@ -165,7 +197,7 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
{
pte_t entry = READ_ONCE(*ptep);
struct page *page;
int expected, rc = 0;
int expected, cc = 0;
if (!pte_present(entry))
return -ENXIO;
......@@ -181,12 +213,25 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
if (!page_ref_freeze(page, expected))
return -EBUSY;
set_bit(PG_arch_1, &page->flags);
rc = uv_call(0, (u64)uvcb);
/*
* If the UVC does not succeed or fail immediately, we don't want to
* loop for long, or we might get stall notifications.
* On the other hand, this is a complex scenario and we are holding a lot of
* locks, so we can't easily sleep and reschedule. We try only once,
* and if the UVC returned busy or partial completion, we return
* -EAGAIN and we let the callers deal with it.
*/
cc = __uv_call(0, (u64)uvcb);
page_ref_unfreeze(page, expected);
/* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
if (rc)
rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
return rc;
/*
* Return -ENXIO if the page was not mapped, -EINVAL for other errors.
* If busy or partially completed, return -EAGAIN.
*/
if (cc == UVC_CC_OK)
return 0;
else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
/*
......@@ -212,7 +257,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
uaddr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(uaddr))
goto out;
vma = find_vma(gmap->mm, uaddr);
vma = vma_lookup(gmap->mm, uaddr);
if (!vma)
goto out;
/*
......@@ -239,6 +284,10 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
mmap_read_unlock(gmap->mm);
if (rc == -EAGAIN) {
/*
* If we are here because the UVC returned busy or partial
* completion, this is just a useless check, but it is safe.
*/
wait_on_page_writeback(page);
} else if (rc == -EBUSY) {
/*
......
......@@ -518,6 +518,11 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
*/
if (rc == -EINVAL)
return 0;
/*
* If we got -EAGAIN here, we simply return it. It will eventually
* get propagated all the way to userspace, which should then try
* again.
*/
return rc;
}
......
......@@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
struct kvm_vcpu *vcpu;
u8 vcpu_isc_mask;
for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
vcpu = kvm_get_vcpu(kvm, vcpu_idx);
if (psw_ioint_disabled(vcpu))
continue;
deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
if (deliverable_mask) {
vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
if (deliverable_mask & vcpu_isc_mask) {
/* lately kicked but not yet running */
if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
return;
......
......@@ -2487,8 +2487,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_S390_PV_COMMAND: {
struct kvm_pv_cmd args;
/* protvirt means user sigp */
kvm->arch.user_cpu_state_ctrl = 1;
/* protvirt means user cpu state */
kvm_s390_set_user_cpu_state_ctrl(kvm);
r = 0;
if (!is_prot_virt_host()) {
r = -EINVAL;
......@@ -3363,6 +3363,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
......@@ -3801,7 +3802,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
......@@ -4254,6 +4255,7 @@ static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
}
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
......
......@@ -208,6 +208,15 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
return kvm->arch.user_cpu_state_ctrl != 0;
}
static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
{
if (kvm->arch.user_cpu_state_ctrl)
return;
VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control");
kvm->arch.user_cpu_state_ctrl = 1;
}
/* implemented in pv.c */
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
......
......@@ -397,6 +397,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -EAGAIN)
continue;
if (rc < 0)
return rc;
start += PAGE_SIZE;
......
......@@ -16,18 +16,17 @@
int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
{
int cc = 0;
int cc;
if (!kvm_s390_pv_cpu_get_handle(vcpu))
return 0;
if (kvm_s390_pv_cpu_get_handle(vcpu)) {
cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
KVM_UV_EVENT(vcpu->kvm, 3,
"PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
vcpu->vcpu_id, *rc, *rrc);
WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
*rc, *rrc);
}
WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
/* Intended memory leak for something that should never happen. */
if (!cc)
free_pages(vcpu->arch.pv.stor_base,
......@@ -196,7 +195,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
cc = uv_call(0, (u64)&uvcb);
cc = uv_call_sched(0, (u64)&uvcb);
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
......
......@@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter,
u64 *status_reg)
{
unsigned int i;
struct kvm_vcpu *v;
bool all_stopped = true;
kvm_for_each_vcpu(i, v, vcpu->kvm) {
if (v == vcpu)
continue;
if (!is_vcpu_stopped(v))
all_stopped = false;
}
*status_reg &= 0xffffffff00000000UL;
/* Reject set arch order, with czam we're always in z/Arch mode. */
*status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER :
SIGP_STATUS_INCORRECT_STATE);
*status_reg |= SIGP_STATUS_INVALID_PARAMETER;
return SIGP_CC_STATUS_STORED;
}
......
......@@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault);
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
struct vm_area_struct *vma;
unsigned long vmaddr;
spinlock_t *ptl;
pte_t *ptep;
......@@ -681,12 +682,18 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
gaddr >> PMD_SHIFT);
if (vmaddr) {
vmaddr |= gaddr & ~PMD_MASK;
vma = vma_lookup(gmap->mm, vmaddr);
if (!vma || is_vm_hugetlb_page(vma))
return;
/* Get pointer to the page table entry */
ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
if (likely(ptep))
if (likely(ptep)) {
ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
pte_unmap_unlock(ptep, ptl);
}
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
......@@ -2677,8 +2684,10 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
{
pte_t pte = READ_ONCE(*ptep);
/* There is a reference through the mapping */
if (pte_present(pte))
WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK));
WARN_ON_ONCE(uv_destroy_owned_page(pte_val(pte) & PAGE_MASK));
return 0;
}
......
......@@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
}
#ifdef CONFIG_PGSTE
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
{
struct vm_area_struct *vma;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
/* We need a valid VMA, otherwise this is clearly a fault. */
vma = vma_lookup(mm, addr);
if (!vma)
return -EFAULT;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
return pmd;
if (!pgd_present(*pgd))
return -ENOENT;
p4d = p4d_offset(pgd, addr);
if (!p4d_present(*p4d))
return -ENOENT;
pud = pud_offset(p4d, addr);
if (!pud_present(*pud))
return -ENOENT;
/* Large PUDs are not supported yet. */
if (pud_large(*pud))
return -EFAULT;
*pmdp = pmd_offset(pud, addr);
return 0;
}
#endif
......@@ -778,14 +792,23 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp;
pte_t *ptep;
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
/*
* If we don't have a PTE table and if there is no huge page mapped,
* we can ignore attempts to set the key to 0, because it already is 0.
*/
switch (pmd_lookup(mm, addr, &pmdp)) {
case -ENOENT:
return key ? -EFAULT : 0;
case 0:
break;
default:
return -EFAULT;
}
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
return -EFAULT;
return key ? -EFAULT : 0;
}
if (pmd_large(*pmdp)) {
......@@ -801,10 +824,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
......@@ -881,14 +901,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
pte_t *ptep;
int cc = 0;
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
/*
* If we don't have a PTE table and if there is no huge page mapped,
* the storage key is 0 and there is nothing for us to do.
*/
switch (pmd_lookup(mm, addr, &pmdp)) {
case -ENOENT:
return 0;
case 0:
break;
default:
return -EFAULT;
}
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
return -EFAULT;
return 0;
}
if (pmd_large(*pmdp)) {
......@@ -900,10 +929,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
pgste_val(new) &= ~PGSTE_GR_BIT;
......@@ -935,15 +961,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp;
pte_t *ptep;
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
/*
* If we don't have a PTE table and if there is no huge page mapped,
* the storage key is 0.
*/
*key = 0;
switch (pmd_lookup(mm, addr, &pmdp)) {
case -ENOENT:
return 0;
case 0:
break;
default:
return -EFAULT;
}
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
/* Not yet mapped memory has a zero key */
spin_unlock(ptl);
*key = 0;
return 0;
}
......@@ -956,10 +991,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
paddr = pte_val(*ptep) & PAGE_MASK;
......@@ -988,6 +1020,7 @@ EXPORT_SYMBOL(get_guest_storage_key);
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
unsigned long *oldpte, unsigned long *oldpgste)
{
struct vm_area_struct *vma;
unsigned long pgstev;
spinlock_t *ptl;
pgste_t pgste;
......@@ -997,6 +1030,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
WARN_ON_ONCE(orc > ESSA_MAX);
if (unlikely(orc > ESSA_MAX))
return -EINVAL;
vma = vma_lookup(mm, hva);
if (!vma || is_vm_hugetlb_page(vma))
return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
......@@ -1089,10 +1126,14 @@ EXPORT_SYMBOL(pgste_perform_essa);
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
unsigned long bits, unsigned long value)
{
struct vm_area_struct *vma;
spinlock_t *ptl;
pgste_t new;
pte_t *ptep;
vma = vma_lookup(mm, hva);
if (!vma || is_vm_hugetlb_page(vma))
return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
......@@ -1117,9 +1158,13 @@ EXPORT_SYMBOL(set_pgste_bits);
*/
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
{
struct vm_area_struct *vma;
spinlock_t *ptl;
pte_t *ptep;
vma = vma_lookup(mm, hva);
if (!vma || is_vm_hugetlb_page(vma))
return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment