Commit 8f7b79b8 authored by Paul Mackerras's avatar Paul Mackerras Committed by Michael Ellerman

KVM: PPC: Book3S HV: Implement dirty page logging for radix guests

This adds code to keep track of dirty pages when requested (that is,
when memslot->dirty_bitmap is non-NULL) for radix guests.  We use the
dirty bits in the PTEs in the second-level (partition-scoped) page
tables, together with a bitmap of pages that were dirty when their
PTE was invalidated (e.g., when the page was paged out).  This bitmap
is stored in the first half of the memslot->dirty_bitmap area, and
kvm_vm_ioctl_get_dirty_log_hv() now uses the second half for the
bitmap that gets returned to userspace.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 01756099
...@@ -198,6 +198,8 @@ extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -198,6 +198,8 @@ extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn); unsigned long gfn);
extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
/* XXX remove this export when load_last_inst() is generic */ /* XXX remove this export when load_last_inst() is generic */
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
...@@ -228,8 +230,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, ...@@ -228,8 +230,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn, unsigned long pte_index, unsigned long avpn,
unsigned long *hpret); unsigned long *hpret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map); struct kvm_memory_slot *memslot, unsigned long *map);
extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map);
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask); unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
......
...@@ -1068,7 +1068,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) ...@@ -1068,7 +1068,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
return npages_dirty; return npages_dirty;
} }
static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot, struct kvm_memory_slot *memslot,
unsigned long *map) unsigned long *map)
{ {
...@@ -1086,12 +1086,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, ...@@ -1086,12 +1086,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
__set_bit_le(gfn - memslot->base_gfn, map); __set_bit_le(gfn - memslot->base_gfn, map);
} }
long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
unsigned long *map) struct kvm_memory_slot *memslot, unsigned long *map)
{ {
unsigned long i, j; unsigned long i, j;
unsigned long *rmapp; unsigned long *rmapp;
struct kvm_vcpu *vcpu;
preempt_disable(); preempt_disable();
rmapp = memslot->arch.rmap; rmapp = memslot->arch.rmap;
...@@ -1107,15 +1106,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -1107,15 +1106,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le(j, map); __set_bit_le(j, map);
++rmapp; ++rmapp;
} }
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu(i, vcpu, kvm) {
spin_lock(&vcpu->arch.vpa_update_lock);
harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
preempt_enable(); preempt_enable();
return 0; return 0;
} }
...@@ -1170,10 +1160,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, ...@@ -1170,10 +1160,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
srcu_idx = srcu_read_lock(&kvm->srcu); srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn); memslot = gfn_to_memslot(kvm, gfn);
if (memslot) { if (memslot) {
if (!kvm_is_radix(kvm)) {
rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
lock_rmap(rmap); lock_rmap(rmap);
*rmap |= KVMPPC_RMAP_CHANGED; *rmap |= KVMPPC_RMAP_CHANGED;
unlock_rmap(rmap); unlock_rmap(rmap);
} else if (memslot->dirty_bitmap) {
mark_page_dirty(kvm, gfn);
}
} }
srcu_read_unlock(&kvm->srcu, srcu_idx); srcu_read_unlock(&kvm->srcu, srcu_idx);
} }
......
...@@ -158,18 +158,21 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, ...@@ -158,18 +158,21 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
asm volatile("ptesync": : :"memory"); asm volatile("ptesync": : :"memory");
} }
void kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, unsigned long clr, unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
unsigned long set, unsigned long addr, unsigned long clr, unsigned long set,
unsigned int shift) unsigned long addr, unsigned int shift)
{ {
unsigned long old = 0;
if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) &&
pte_present(*ptep)) { pte_present(*ptep)) {
/* have to invalidate it first */ /* have to invalidate it first */
__radix_pte_update(ptep, _PAGE_PRESENT, 0); old = __radix_pte_update(ptep, _PAGE_PRESENT, 0);
kvmppc_radix_tlbie_page(kvm, addr, shift); kvmppc_radix_tlbie_page(kvm, addr, shift);
set |= _PAGE_PRESENT; set |= _PAGE_PRESENT;
old &= _PAGE_PRESENT;
} }
__radix_pte_update(ptep, clr, set); return __radix_pte_update(ptep, clr, set) | old;
} }
void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
...@@ -197,6 +200,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -197,6 +200,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
pud_t *pud, *new_pud = NULL; pud_t *pud, *new_pud = NULL;
pmd_t *pmd, *new_pmd = NULL; pmd_t *pmd, *new_pmd = NULL;
pte_t *ptep, *new_ptep = NULL; pte_t *ptep, *new_ptep = NULL;
unsigned long old;
int ret; int ret;
/* Traverse the guest's 2nd-level tree, allocate new levels needed */ /* Traverse the guest's 2nd-level tree, allocate new levels needed */
...@@ -262,9 +266,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -262,9 +266,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
ptep = pte_offset_kernel(pmd, gpa); ptep = pte_offset_kernel(pmd, gpa);
if (pte_present(*ptep)) { if (pte_present(*ptep)) {
/* PTE was previously valid, so invalidate it */ /* PTE was previously valid, so invalidate it */
kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
0, gpa, 0); 0, gpa, 0);
kvmppc_radix_tlbie_page(kvm, gpa, 0); kvmppc_radix_tlbie_page(kvm, gpa, 0);
if (old & _PAGE_DIRTY)
mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
} }
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
} else { } else {
...@@ -463,6 +469,26 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -463,6 +469,26 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return ret; return ret;
} }
static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned int order)
{
unsigned long i, limit;
unsigned long *dp;
if (!memslot->dirty_bitmap)
return;
limit = 1ul << order;
if (limit < BITS_PER_LONG) {
for (i = 0; i < limit; ++i)
mark_page_dirty(kvm, gfn + i);
return;
}
dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn);
limit /= BITS_PER_LONG;
for (i = 0; i < limit; ++i)
*dp++ = ~0ul;
}
/* Called with kvm->lock held */ /* Called with kvm->lock held */
int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn) unsigned long gfn)
...@@ -470,13 +496,21 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -470,13 +496,21 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
pte_t *ptep; pte_t *ptep;
unsigned long gpa = gfn << PAGE_SHIFT; unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift; unsigned int shift;
unsigned long old;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift); NULL, &shift);
if (ptep && pte_present(*ptep)) { if (ptep && pte_present(*ptep)) {
kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
gpa, shift); gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift);
if (old & _PAGE_DIRTY) {
if (!shift)
mark_page_dirty(kvm, gfn);
else
mark_pages_dirty(kvm, memslot,
gfn, shift - PAGE_SHIFT);
}
} }
return 0; return 0;
} }
...@@ -517,6 +551,65 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ...@@ -517,6 +551,65 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
return ref; return ref;
} }
/* Returns the number of PAGE_SIZE pages that are dirty */
static int kvm_radix_test_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot, int pagenum)
{
unsigned long gfn = memslot->base_gfn + pagenum;
unsigned long gpa = gfn << PAGE_SHIFT;
pte_t *ptep;
unsigned int shift;
int ret = 0;
ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa,
NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
if (shift)
ret = 1 << (shift - PAGE_SHIFT);
kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift);
}
return ret;
}
long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map)
{
unsigned long i, j;
unsigned long n, *p;
int npages;
/*
* Radix accumulates dirty bits in the first half of the
* memslot's dirty_bitmap area, for when pages are paged
* out or modified by the host directly. Pick up these
* bits and add them to the map.
*/
n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long);
p = memslot->dirty_bitmap;
for (i = 0; i < n; ++i)
map[i] |= xchg(&p[i], 0);
for (i = 0; i < memslot->npages; i = j) {
npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
/*
* Note that if npages > 0 then i must be a multiple of npages,
* since huge pages are only used to back the guest at guest
* real addresses that are a multiple of their size.
* Since we have at most one PTE covering any given guest
* real address, if npages > 1 we can skip to i + npages.
*/
j = i + 1;
if (npages)
for (j = i; npages; ++j, --npages)
__set_bit_le(j, map);
}
return 0;
}
void kvmppc_free_radix(struct kvm *kvm) void kvmppc_free_radix(struct kvm *kvm)
{ {
unsigned long ig, iu, im; unsigned long ig, iu, im;
......
...@@ -2961,8 +2961,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, ...@@ -2961,8 +2961,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
{ {
struct kvm_memslots *slots; struct kvm_memslots *slots;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
int r; int i, r;
unsigned long n; unsigned long n;
unsigned long *buf;
struct kvm_vcpu *vcpu;
mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->slots_lock);
...@@ -2976,15 +2978,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, ...@@ -2976,15 +2978,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
if (!memslot->dirty_bitmap) if (!memslot->dirty_bitmap)
goto out; goto out;
/*
* Use second half of bitmap area because radix accumulates
* bits in the first half.
*/
n = kvm_dirty_bitmap_bytes(memslot); n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n); buf = memslot->dirty_bitmap + n / sizeof(long);
memset(buf, 0, n);
r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); if (kvm_is_radix(kvm))
r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
else
r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
if (r) if (r)
goto out; goto out;
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu(i, vcpu, kvm) {
spin_lock(&vcpu->arch.vpa_update_lock);
kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
spin_unlock(&vcpu->arch.vpa_update_lock);
}
r = -EFAULT; r = -EFAULT;
if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) if (copy_to_user(log->dirty_bitmap, buf, n))
goto out; goto out;
r = 0; r = 0;
...@@ -3037,7 +3056,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, ...@@ -3037,7 +3056,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
if (npages) if (npages)
atomic64_inc(&kvm->arch.mmio_update); atomic64_inc(&kvm->arch.mmio_update);
if (npages && old->npages) { if (npages && old->npages && !kvm_is_radix(kvm)) {
/* /*
* If modifying a memslot, reset all the rmap dirty bits. * If modifying a memslot, reset all the rmap dirty bits.
* If this is a new memslot, we don't need to do anything * If this is a new memslot, we don't need to do anything
...@@ -3046,7 +3065,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, ...@@ -3046,7 +3065,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
*/ */
slots = kvm_memslots(kvm); slots = kvm_memslots(kvm);
memslot = id_to_memslot(slots, mem->slot); memslot = id_to_memslot(slots, mem->slot);
kvmppc_hv_get_dirty_log(kvm, memslot, NULL); kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment