Commit 23758461 authored by Janosch Frank's avatar Janosch Frank

Merge tag 'hlp_stage1' of...

Merge tag 'hlp_stage1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvms390/next

KVM: s390: initial host large page support

- must be enabled via module parameter hpage=1
- cannot be used together with nested
- does support migration
- does support hugetlbfs
- no THP yet
parents 57cb198c a4499382
......@@ -4391,6 +4391,22 @@ all such vmexits.
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
7.14 KVM_CAP_S390_HPAGE_1M
Architectures: s390
Parameters: none
Returns: 0 on success, -EINVAL if hpage module parameter was not set
or cmma is enabled
With this capability the KVM support for memory backing with 1m pages
through hugetlbfs can be enabled for a VM. After the capability is
enabled, cmma can't be enabled anymore and pfmfi and the storage key
interpretation are disabled. If cmma has already been enabled or the
hpage module parameter is not set to 1, -EINVAL is returned.
While it is generally possible to create a huge page backed VM without
this capability, the VM will not be able to run.
8. Other capabilities.
----------------------
......
......@@ -9,6 +9,14 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
/* Generic bits for GMAP notification on DAT table entry changes. */
#define GMAP_NOTIFY_SHADOW 0x2
#define GMAP_NOTIFY_MPROT 0x1
/* Status bits only for huge segment entries */
#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */
/**
* struct gmap_struct - guest address space
* @list: list head for the mm->context gmap list
......@@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
int gmap_mprotect_notify(struct gmap *, unsigned long start,
unsigned long len, int prot);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
#endif /* _ASM_S390_GMAP_H */
......@@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
#define arch_clear_hugepage_flags(page) do { } while (0)
static inline void arch_clear_hugepage_flags(struct page *page)
{
clear_bit(PG_arch_1, &page->flags);
}
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
......
......@@ -24,6 +24,8 @@ typedef struct {
unsigned int uses_skeys:1;
/* The mmu context uses CMM. */
unsigned int uses_cmm:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
......
......@@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.uses_skeys = 0;
mm->context.uses_cmm = 0;
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
case _REGION2_SIZE:
......
......@@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr)
#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
......@@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
pte_t *sptep, pte_t *tptep, pte_t pte);
void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep);
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address,
pte_t *ptep);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq);
int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
......@@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr,
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep);
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
unsigned long *oldpte, unsigned long *oldpgste);
void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
/*
* Certain architectures need to do special things when PTEs
......
......@@ -172,6 +172,10 @@ static int nested;
module_param(nested, int, S_IRUGO);
MODULE_PARM_DESC(nested, "Nested virtualization support");
/* allow 1m huge page guest backing, if !nested */
static int hpage;
module_param(hpage, int, 0444);
MODULE_PARM_DESC(hpage, "1m huge page backing support");
/*
* For now we handle at most 16 double words as this is what the s390 base
......@@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS_MIGRATION:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
r = 0;
if (hpage)
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
......@@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot)
struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
unsigned long address;
unsigned long gaddr, vmaddr;
struct gmap *gmap = kvm->arch.gmap;
DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
/* Loop over all guest pages */
/* Loop over all guest segments */
cur_gfn = memslot->base_gfn;
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
gaddr = gfn_to_gpa(cur_gfn);
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
if (kvm_is_error_hva(vmaddr))
continue;
bitmap_zero(bitmap, _PAGE_ENTRIES);
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
for (i = 0; i < _PAGE_ENTRIES; i++) {
if (test_bit(i, bitmap))
mark_page_dirty(kvm, cur_gfn + i);
}
if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
......@@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_HPAGE_1M:
mutex_lock(&kvm->lock);
if (kvm->created_vcpus)
r = -EBUSY;
else if (!hpage || kvm->arch.use_cmma)
r = -EINVAL;
else {
r = 0;
kvm->mm->context.allow_gmap_hpage_1m = 1;
/*
* We might have to create fake 4k page
* tables. To avoid that the hardware works on
* stale PGSTEs, we emulate these instructions.
*/
kvm->arch.use_skf = 0;
kvm->arch.use_pfmfi = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
......@@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
if (!sclp.has_cmma)
break;
ret = -EBUSY;
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
if (kvm->created_vcpus)
ret = -EBUSY;
else if (kvm->mm->context.allow_gmap_hpage_1m)
ret = -EINVAL;
else {
kvm->arch.use_cmma = 1;
/* Not compatible with cmma. */
kvm->arch.use_pfmfi = 0;
......@@ -1514,6 +1558,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
uint8_t *keys;
uint64_t hva;
int srcu_idx, i, r = 0;
bool unlocked;
if (args->flags != 0)
return -EINVAL;
......@@ -1538,9 +1583,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
goto out;
i = 0;
down_read(&current->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
while (i < args->count) {
unlocked = false;
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
......@@ -1554,8 +1601,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
}
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r)
break;
if (r) {
r = fixup_user_fault(current, current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
}
if (!r)
i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&current->mm->mmap_sem);
......@@ -4141,6 +4194,11 @@ static int __init kvm_s390_init(void)
return -ENODEV;
}
if (nested && hpage) {
pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
return -EINVAL;
}
for (i = 0; i < 16; i++)
kvm_s390_fac_base[i] |=
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
......
......@@ -244,9 +244,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
static int handle_iske(struct kvm_vcpu *vcpu)
{
unsigned long addr;
unsigned long gaddr, vmaddr;
unsigned char key;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_iske++;
......@@ -260,18 +261,28 @@ static int handle_iske(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_logical_to_effective(vcpu, addr);
addr = kvm_s390_real_to_abs(vcpu, addr);
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(addr))
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
down_read(&current->mm->mmap_sem);
rc = get_guest_storage_key(current->mm, addr, &key);
up_read(&current->mm->mmap_sem);
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
up_read(&current->mm->mmap_sem);
goto retry;
}
}
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
vcpu->run->s.regs.gprs[reg1] |= key;
return 0;
......@@ -279,8 +290,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
static int handle_rrbe(struct kvm_vcpu *vcpu)
{
unsigned long addr;
unsigned long vmaddr, gaddr;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_rrbe++;
......@@ -294,19 +306,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_logical_to_effective(vcpu, addr);
addr = kvm_s390_real_to_abs(vcpu, addr);
addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(addr))
gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
down_read(&current->mm->mmap_sem);
rc = reset_guest_reference_bit(current->mm, addr);
up_read(&current->mm->mmap_sem);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
up_read(&current->mm->mmap_sem);
goto retry;
}
}
if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
......@@ -321,6 +341,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
unsigned long start, end;
unsigned char key, oldkey;
int reg1, reg2;
bool unlocked;
int rc;
vcpu->stat.instruction_sske++;
......@@ -353,19 +374,28 @@ static int handle_sske(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
unlocked = false;
if (kvm_is_error_hva(addr))
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
down_read(&current->mm->mmap_sem);
rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
up_read(&current->mm->mmap_sem);
if (rc < 0)
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
start += PAGE_SIZE;
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
}
if (m3 & (SSKE_MC | SSKE_MR)) {
......@@ -946,11 +976,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
while (start != end) {
unsigned long useraddr;
unsigned long vmaddr;
bool unlocked = false;
/* Translate guest address to host address */
useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(useraddr))
vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
......@@ -964,14 +995,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
down_read(&current->mm->mmap_sem);
rc = cond_set_guest_storage_key(current->mm, useraddr,
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
up_read(&current->mm->mmap_sem);
if (rc < 0)
if (rc < 0) {
rc = fixup_user_fault(current, current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
start += PAGE_SIZE;
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
}
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
......
This diff is collapsed.
......@@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste)
return pte;
}
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
{
struct page *page;
unsigned long size, paddr;
if (!mm_uses_skeys(mm) ||
rste & _SEGMENT_ENTRY_INVALID)
return;
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
page = pud_page(__pud(rste));
size = PUD_SIZE;
paddr = rste & PUD_MASK;
} else {
page = pmd_page(__pmd(rste));
size = PMD_SIZE;
paddr = rste & PMD_MASK;
}
if (!test_and_set_bit(PG_arch_1, &page->flags))
__storage_key_init_range(paddr, paddr + size - 1);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
......@@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
else
rste |= _SEGMENT_ENTRY_LARGE;
clear_huge_pte_skeys(mm, rste);
pte_val(*ptep) = rste;
}
......
......@@ -14,7 +14,7 @@
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
: [addr] "+a" (addr) : [skey] "d" (skey));
return addr;
}
......@@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
{
unsigned long boundary, size;
if (!PAGE_DEFAULT_KEY)
return;
while (start < end) {
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
......@@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
continue;
}
}
page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
start += PAGE_SIZE;
}
}
......
......@@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
mm->context.asce, IDTE_LOCAL);
else
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_local(mm, addr);
}
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
if (MACHINE_HAS_TLB_GUEST)
if (MACHINE_HAS_TLB_GUEST) {
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
else if (MACHINE_HAS_IDTE)
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
} else if (MACHINE_HAS_IDTE) {
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
else
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_idte_global(mm, addr);
} else {
__pmdp_csp(pmdp);
if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
gmap_pmdp_csp(mm, addr);
}
}
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
......@@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
if (mm_has_pgste(mm))
gmap_pmdp_invalidate(mm, addr);
} else {
pmdp_idte_global(mm, addr, pmdp);
}
......@@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
return old;
}
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
return pmd;
}
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t new)
{
......@@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/*
* Test and reset if a guest page is dirty
*/
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
spinlock_t *ptl;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgste_t pgste;
pte_t *ptep;
pte_t pte;
bool dirty;
int nodat;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return false;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return false;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return false;
/* We can't run guests backed by huge pages, but userspace can
* still set them up and then try to migrate them without any
* migration support.
*/
if (pmd_large(*pmd))
return true;
ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (unlikely(!ptep))
return false;
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
......@@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
*ptep = pte;
}
pgste_set_unlock(ptep, pgste);
spin_unlock(ptl);
return dirty;
}
EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq)
{
unsigned long keyul;
unsigned long keyul, paddr;
spinlock_t *ptl;
pgste_t old, new;
pmd_t *pmdp;
pte_t *ptep;
ptep = get_locked_pte(mm, addr, &ptl);
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
return -EFAULT;
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
return -EFAULT;
}
if (pmd_large(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
/*
* Huge pmds need quiescing operations, they are
* always mapped.
*/
page_set_storage_key(paddr, key, 1);
spin_unlock(ptl);
return 0;
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
......@@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long address, bits, skey;
unsigned long bits, skey;
address = pte_val(*ptep) & PAGE_MASK;
skey = (unsigned long) page_get_storage_key(address);
paddr = pte_val(*ptep) & PAGE_MASK;
skey = (unsigned long) page_get_storage_key(paddr);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set storage key ACC and FP */
page_set_storage_key(address, skey, !nq);
page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52;
}
......@@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
unsigned long paddr;
pgste_t old, new;
pmd_t *pmdp;
pte_t *ptep;
int cc = 0;
ptep = get_locked_pte(mm, addr, &ptl);
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
return -EFAULT;
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
return -EFAULT;
}
if (pmd_large(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
cc = page_reset_referenced(paddr);
spin_unlock(ptl);
return cc;
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
......@@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
pgste_val(new) &= ~PGSTE_GR_BIT;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
paddr = pte_val(*ptep) & PAGE_MASK;
cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
}
......@@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char *key)
{
unsigned long paddr;
spinlock_t *ptl;
pgste_t pgste;
pmd_t *pmdp;
pte_t *ptep;
ptep = get_locked_pte(mm, addr, &ptl);
pmdp = pmd_alloc_map(mm, addr);
if (unlikely(!pmdp))
return -EFAULT;
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
/* Not yet mapped memory has a zero key */
spin_unlock(ptl);
*key = 0;
return 0;
}
if (pmd_large(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
*key = page_get_storage_key(paddr);
spin_unlock(ptl);
return 0;
}
spin_unlock(ptl);
ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
paddr = pte_val(*ptep) & PAGE_MASK;
if (!(pte_val(*ptep) & _PAGE_INVALID))
*key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
*key = page_get_storage_key(paddr);
/* Reflect guest's logical view, not physical */
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
pgste_set_unlock(ptep, pgste);
......
......@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_GET_MSR_FEATURES 153
#define KVM_CAP_HYPERV_EVENTFD 154
#define KVM_CAP_HYPERV_TLBFLUSH 155
#define KVM_CAP_S390_HPAGE_1M 156
#ifdef KVM_CAP_IRQ_ROUTING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment