Commit bad3b507 authored by Paul Mackerras's avatar Paul Mackerras Committed by Avi Kivity

KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits

This allows both the guest and the host to use the referenced (R) and
changed (C) bits in the guest hashed page table.  The guest has a view
of R and C that is maintained in the guest_rpte field of the revmap
entry for the HPTE, and the host has a view that is maintained in the
rmap entry for the associated gfn.

Both view are updated from the guest HPT.  If a bit (R or C) is zero
in either view, it will be initially set to zero in the HPTE (or HPTEs),
until set to 1 by hardware.  When an HPTE is removed for any reason,
the R and C bits from the HPTE are ORed into both views.  We have to
be careful to read the R and C bits from the HPTE after invalidating
it, but before unlocking it, in case of any late updates by the hardware.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent a92bce95
...@@ -200,8 +200,9 @@ struct revmap_entry { ...@@ -200,8 +200,9 @@ struct revmap_entry {
* index in the guest HPT of a HPTE that points to the page. * index in the guest HPT of a HPTE that points to the page.
*/ */
#define KVMPPC_RMAP_LOCK_BIT 63 #define KVMPPC_RMAP_LOCK_BIT 63
#define KVMPPC_RMAP_REF_BIT 33 #define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (1ul << KVMPPC_RMAP_REF_BIT) #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful #define KVMPPC_RMAP_INDEX 0xfffffffful
......
...@@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -505,6 +505,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long is_io; unsigned long is_io;
unsigned int writing, write_ok; unsigned int writing, write_ok;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long rcbits;
/* /*
* Real-mode code has already searched the HPT and found the * Real-mode code has already searched the HPT and found the
...@@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -640,11 +641,17 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_unlock; goto out_unlock;
} }
/* Only set R/C in real HPTE if set in both *rmap and guest_rpte */
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
if (hptep[0] & HPTE_V_VALID) { if (hptep[0] & HPTE_V_VALID) {
/* HPTE was previously valid, so we need to invalidate it */ /* HPTE was previously valid, so we need to invalidate it */
unlock_rmap(rmap); unlock_rmap(rmap);
hptep[0] |= HPTE_V_ABSENT; hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, index); kvmppc_invalidate_hpte(kvm, hptep, index);
/* don't lose previous R and C bits */
r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
} else { } else {
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
} }
...@@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -701,50 +708,55 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
struct revmap_entry *rev = kvm->arch.revmap; struct revmap_entry *rev = kvm->arch.revmap;
unsigned long h, i, j; unsigned long h, i, j;
unsigned long *hptep; unsigned long *hptep;
unsigned long ptel, psize; unsigned long ptel, psize, rcbits;
for (;;) { for (;;) {
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) lock_rmap(rmapp);
cpu_relax();
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); unlock_rmap(rmapp);
break; break;
} }
/* /*
* To avoid an ABBA deadlock with the HPTE lock bit, * To avoid an ABBA deadlock with the HPTE lock bit,
* we have to unlock the rmap chain before locking the HPTE. * we can't spin on the HPTE lock while holding the
* Thus we remove the first entry, unlock the rmap chain, * rmap chain lock.
* lock the HPTE and then check that it is for the
* page we're unmapping before changing it to non-present.
*/ */
i = *rmapp & KVMPPC_RMAP_INDEX; i = *rmapp & KVMPPC_RMAP_INDEX;
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
/* unlock rmap before spinning on the HPTE lock */
unlock_rmap(rmapp);
while (hptep[0] & HPTE_V_HVLOCK)
cpu_relax();
continue;
}
j = rev[i].forw; j = rev[i].forw;
if (j == i) { if (j == i) {
/* chain is now empty */ /* chain is now empty */
j = 0; *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
} else { } else {
/* remove i from chain */ /* remove i from chain */
h = rev[i].back; h = rev[i].back;
rev[h].forw = j; rev[h].forw = j;
rev[j].back = h; rev[j].back = h;
rev[i].forw = rev[i].back = i; rev[i].forw = rev[i].back = i;
j |= KVMPPC_RMAP_PRESENT; *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
} }
smp_wmb();
*rmapp = j | (1ul << KVMPPC_RMAP_REF_BIT);
/* Now lock, check and modify the HPTE */ /* Now check and modify the HPTE */
hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
ptel = rev[i].guest_rpte; ptel = rev[i].guest_rpte;
psize = hpte_page_size(hptep[0], ptel); psize = hpte_page_size(hptep[0], ptel);
if ((hptep[0] & HPTE_V_VALID) && if ((hptep[0] & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) { hpte_rpn(ptel, psize) == gfn) {
kvmppc_invalidate_hpte(kvm, hptep, i);
hptep[0] |= HPTE_V_ABSENT; hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, i);
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
rev[i].guest_rpte = ptel | rcbits;
} }
unlock_rmap(rmapp);
hptep[0] &= ~HPTE_V_HVLOCK; hptep[0] &= ~HPTE_V_HVLOCK;
} }
return 0; return 0;
...@@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, ...@@ -767,7 +779,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
kvm_unmap_rmapp(kvm, rmapp, gfn); kvm_unmap_rmapp(kvm, rmapp, gfn);
while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp)) while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmapp))
cpu_relax(); cpu_relax();
__clear_bit(KVMPPC_RMAP_REF_BIT, rmapp); *rmapp &= ~KVMPPC_RMAP_REFERENCED;
__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp); __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmapp);
return 1; return 1;
} }
......
...@@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); ...@@ -87,15 +87,17 @@ EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/* Remove this HPTE from the chain for a real page */ /* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index, static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unsigned long hpte_v) struct revmap_entry *rev,
unsigned long hpte_v, unsigned long hpte_r)
{ {
struct revmap_entry *rev, *next, *prev; struct revmap_entry *next, *prev;
unsigned long gfn, ptel, head; unsigned long gfn, ptel, head;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
unsigned long *rmap; unsigned long *rmap;
unsigned long rcbits;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte; ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = builtin_gfn_to_memslot(kvm, gfn); memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
...@@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, ...@@ -116,6 +118,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
else else
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
} }
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
unlock_rmap(rmap); unlock_rmap(rmap);
} }
...@@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -162,6 +165,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pte_t pte; pte_t pte;
unsigned int writing; unsigned int writing;
unsigned long mmu_seq; unsigned long mmu_seq;
unsigned long rcbits;
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel); psize = hpte_page_size(pteh, ptel);
...@@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -320,6 +324,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
} else { } else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
realmode); realmode);
/* Only set R/C in real HPTE if already set in *rmap */
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
} }
} }
...@@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -394,7 +401,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
asm volatile("tlbiel %0" : : "r" (rb)); asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
} }
remove_revmap_chain(kvm, pte_index, v); /* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
} }
r = rev->guest_rpte; r = rev->guest_rpte;
unlock_hpte(hpte, 0); unlock_hpte(hpte, 0);
...@@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -469,12 +477,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
args[j] = ((0x80 | flags) << 56) + pte_index; args[j] = ((0x80 | flags) << 56) + pte_index;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
/* insert R and C bits from guest PTE */
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
if (!(hp[0] & HPTE_V_VALID)) if (!(hp[0] & HPTE_V_VALID)) {
/* insert R and C bits from PTE */
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
continue; continue;
}
hp[0] &= ~HPTE_V_VALID; /* leave it locked */ hp[0] &= ~HPTE_V_VALID; /* leave it locked */
tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
...@@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) ...@@ -505,13 +514,16 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
asm volatile("ptesync" : : : "memory"); asm volatile("ptesync" : : : "memory");
} }
/* Read PTE low words after tlbie to get final R/C values */
for (k = 0; k < n; ++k) { for (k = 0; k < n; ++k) {
j = indexes[k]; j = indexes[k];
pte_index = args[j] & ((1ul << 56) - 1); pte_index = args[j] & ((1ul << 56) - 1);
hp = hptes[k]; hp = hptes[k];
rev = revs[k]; rev = revs[k];
remove_revmap_chain(kvm, pte_index, hp[0]); remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
unlock_hpte(hp, 0); rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
} }
} }
...@@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -595,8 +607,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
pte_index &= ~3; pte_index &= ~3;
n = 4; n = 4;
} }
if (flags & H_R_XLATE) rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
for (i = 0; i < n; ++i, ++pte_index) { for (i = 0; i < n; ++i, ++pte_index) {
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
v = hpte[0] & ~HPTE_V_HVLOCK; v = hpte[0] & ~HPTE_V_HVLOCK;
...@@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, ...@@ -605,12 +616,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
v &= ~HPTE_V_ABSENT; v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID; v |= HPTE_V_VALID;
} }
if (v & HPTE_V_VALID) { if (v & HPTE_V_VALID)
if (rev) r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
r = rev[i].guest_rpte;
else
r = hpte[1] | HPTE_R_RPN;
}
vcpu->arch.gpr[4 + i * 2] = v; vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r; vcpu->arch.gpr[5 + i * 2] = r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment