Commit 1d86b5cc authored by Avi Kivity's avatar Avi Kivity

Merge branch 'queue' into next

* queue:
  KVM: MMU: Eliminate pointless temporary 'ac'
  KVM: MMU: Avoid access/dirty update loop if all is well
  KVM: MMU: Eliminate eperm temporary
  KVM: MMU: Optimize is_last_gpte()
  KVM: MMU: Simplify walk_addr_generic() loop
  KVM: MMU: Optimize pte permission checks
  KVM: MMU: Update accessed and dirty bits after guest pagetable walk
  KVM: MMU: Move gpte_access() out of paging_tmpl.h
  KVM: MMU: Optimize gpte_access() slightly
  KVM: MMU: Push clean gpte write protection out of gpte_access()
  KVM: clarify kvmclock documentation
  KVM: make processes waiting on vcpu mutex killable
  KVM: SVM: Make use of asm.h
  KVM: VMX: Make use of asm.h
  KVM: VMX: Make lto-friendly
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parents ecba9a52 c5421519
...@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00 ...@@ -34,9 +34,12 @@ MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
time information and check that they are both equal and even. time information and check that they are both equal and even.
An odd version indicates an in-progress update. An odd version indicates an in-progress update.
sec: number of seconds for wallclock. sec: number of seconds for wallclock at time of boot.
nsec: number of nanoseconds for wallclock. nsec: number of nanoseconds for wallclock at time of boot.
In order to get the current wallclock time, the system_time from
MSR_KVM_SYSTEM_TIME_NEW needs to be added.
Note that although MSRs are per-CPU entities, the effect of this Note that although MSRs are per-CPU entities, the effect of this
particular MSR is global. particular MSR is global.
...@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 ...@@ -82,20 +85,25 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
time at the time this structure was last updated. Unit is time at the time this structure was last updated. Unit is
nanoseconds. nanoseconds.
tsc_to_system_mul: a function of the tsc frequency. One has tsc_to_system_mul: multiplier to be used when converting
to multiply any tsc-related quantity by this value to get tsc-related quantity to nanoseconds
a value in nanoseconds, besides dividing by 2^tsc_shift
tsc_shift: cycle to nanosecond divider, as a power of two, to tsc_shift: shift to be used when converting tsc-related
allow for shift rights. One has to shift right any tsc-related quantity to nanoseconds. This shift will ensure that
quantity by this value to get a value in nanoseconds, besides multiplication with tsc_to_system_mul does not overflow.
multiplying by tsc_to_system_mul. A positive value denotes a left shift, a negative value
a right shift.
With this information, guests can derive per-CPU time by The conversion from tsc to nanoseconds involves an additional
doing: right shift by 32 bits. With this information, guests can
derive per-CPU time by doing:
time = (current_tsc - tsc_timestamp) time = (current_tsc - tsc_timestamp)
time = (time * tsc_to_system_mul) >> tsc_shift if (tsc_shift >= 0)
time <<= tsc_shift;
else
time >>= -tsc_shift;
time = (time * tsc_to_system_mul) >> 32
time = time + system_time time = time + system_time
flags: bits in this field indicate extended capabilities flags: bits in this field indicate extended capabilities
......
...@@ -287,10 +287,24 @@ struct kvm_mmu { ...@@ -287,10 +287,24 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role; union kvm_mmu_page_role base_role;
bool direct_map; bool direct_map;
/*
* Bitmap; bit set = permission fault
* Byte index: page fault error code [4:1]
* Bit index: pte permissions in ACC_* format
*/
u8 permissions[16];
u64 *pae_root; u64 *pae_root;
u64 *lm_root; u64 *lm_root;
u64 rsvd_bits_mask[2][4]; u64 rsvd_bits_mask[2][4];
/*
* Bitmap: bit set = last pte in walk
* index[0:1]: level (zero-based)
* index[2]: pte.ps
*/
u8 last_pte_bitmap;
bool nx; bool nx;
u64 pdptrs[4]; /* pae */ u64 pdptrs[4]; /* pae */
......
...@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) ...@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
} }
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
{
unsigned mask;
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
mask = (unsigned)~ACC_WRITE_MASK;
/* Allow write access to dirty gptes */
mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
*access &= mask;
}
static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
int *nr_present) int *nr_present)
{ {
...@@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, ...@@ -3425,6 +3437,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
return false; return false;
} }
static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
access &= ~(gpte >> PT64_NX_SHIFT);
return access;
}
static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
{
unsigned index;
index = level - 1;
index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
return mmu->last_pte_bitmap & (1 << index);
}
#define PTTYPE 64 #define PTTYPE 64
#include "paging_tmpl.h" #include "paging_tmpl.h"
#undef PTTYPE #undef PTTYPE
...@@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, ...@@ -3494,6 +3525,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
} }
} }
static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned bit, byte, pfec;
u8 map;
bool fault, x, w, u, wf, uf, ff, smep;
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
u = bit & ACC_USER_MASK;
/* Not really needed: !nx will cause pte.nx to fault */
x |= !mmu->nx;
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
x &= !(smep && u && !uf);
fault = (ff && !x) || (uf && !u) || (wf && !w);
map |= fault << bit;
}
mmu->permissions[byte] = map;
}
}
static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
u8 map;
unsigned level, root_level = mmu->root_level;
const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */
if (root_level == PT32E_ROOT_LEVEL)
--root_level;
/* PT_PAGE_TABLE_LEVEL always terminates */
map = 1 | (1 << ps_set_index);
for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
if (level <= PT_PDPE_LEVEL
&& (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
map |= 1 << (ps_set_index | (level - 1));
}
mmu->last_pte_bitmap = map;
}
static int paging64_init_context_common(struct kvm_vcpu *vcpu, static int paging64_init_context_common(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, struct kvm_mmu *context,
int level) int level)
...@@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, ...@@ -3502,6 +3583,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
context->root_level = level; context->root_level = level;
reset_rsvds_bits_mask(vcpu, context); reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);
ASSERT(is_pae(vcpu)); ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3; context->new_cr3 = paging_new_cr3;
...@@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, ...@@ -3530,6 +3613,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
context->root_level = PT32_ROOT_LEVEL; context->root_level = PT32_ROOT_LEVEL;
reset_rsvds_bits_mask(vcpu, context); reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);
context->new_cr3 = paging_new_cr3; context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault; context->page_fault = paging32_page_fault;
...@@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -3590,6 +3675,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->gva_to_gpa = paging32_gva_to_gpa; context->gva_to_gpa = paging32_gva_to_gpa;
} }
update_permission_bitmask(vcpu, context);
update_last_pte_bitmap(vcpu, context);
return 0; return 0;
} }
...@@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) ...@@ -3665,6 +3753,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->gva_to_gpa = paging32_gva_to_gpa_nested; g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
} }
update_permission_bitmask(vcpu, g_context);
update_last_pte_bitmap(vcpu, g_context);
return 0; return 0;
} }
......
...@@ -18,8 +18,10 @@ ...@@ -18,8 +18,10 @@
#define PT_PCD_MASK (1ULL << 4) #define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_SHIFT 5
#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
#define PT_DIRTY_MASK (1ULL << 6) #define PT_DIRTY_SHIFT 6
#define PT_PAGE_SIZE_MASK (1ULL << 7) #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
#define PT_PAGE_SIZE_SHIFT 7
#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
#define PT_PAT_MASK (1ULL << 7) #define PT_PAT_MASK (1ULL << 7)
#define PT_GLOBAL_MASK (1ULL << 8) #define PT_GLOBAL_MASK (1ULL << 8)
#define PT64_NX_SHIFT 63 #define PT64_NX_SHIFT 63
...@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) ...@@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
return kvm_read_cr0_bits(vcpu, X86_CR0_WP); return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
} }
static inline bool check_write_user_access(struct kvm_vcpu *vcpu, /*
bool write_fault, bool user_fault, * Will a fault with a given page-fault error code (pfec) cause a permission
unsigned long pte) * fault with the given access (in ACC_* format)?
*/
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
unsigned pfec)
{ {
if (unlikely(write_fault && !is_writable_pte(pte) return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
&& (user_fault || is_write_protection(vcpu))))
return false;
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
return false;
return true;
} }
#endif #endif
...@@ -63,10 +63,12 @@ ...@@ -63,10 +63,12 @@
*/ */
struct guest_walker { struct guest_walker {
int level; int level;
unsigned max_level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS]; gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t ptes[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS];
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
unsigned pt_access; unsigned pt_access;
unsigned pte_access; unsigned pte_access;
gfn_t gfn; gfn_t gfn;
...@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return (ret != orig_pte); return (ret != orig_pte);
} }
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
bool last) struct kvm_mmu *mmu,
struct guest_walker *walker,
int write_fault)
{ {
unsigned access; unsigned level, index;
pt_element_t pte, orig_pte;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; pt_element_t __user *ptep_user;
if (last && !is_dirty_gpte(gpte)) gfn_t table_gfn;
access &= ~ACC_WRITE_MASK; int ret;
#if PTTYPE == 64
if (vcpu->arch.mmu.nx)
access &= ~(gpte >> PT64_NX_SHIFT);
#endif
return access;
}
static bool FNAME(is_last_gpte)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pt_element_t gpte)
{
if (walker->level == PT_PAGE_TABLE_LEVEL)
return true;
if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && for (level = walker->max_level; level >= walker->level; --level) {
(PTTYPE == 64 || is_pse(vcpu))) pte = orig_pte = walker->ptes[level - 1];
return true; table_gfn = walker->table_gfn[level - 1];
ptep_user = walker->ptep_user[level - 1];
index = offset_in_page(ptep_user) / sizeof(pt_element_t);
if (!(pte & PT_ACCESSED_MASK)) {
trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
pte |= PT_ACCESSED_MASK;
}
if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
pte |= PT_DIRTY_MASK;
}
if (pte == orig_pte)
continue;
if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
(mmu->root_level == PT64_ROOT_LEVEL)) if (ret)
return true; return ret;
return false; mark_page_dirty(vcpu->kvm, table_gfn);
walker->ptes[level] = pte;
}
return 0;
} }
/* /*
...@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gva_t addr, u32 access) gva_t addr, u32 access)
{ {
int ret;
pt_element_t pte; pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user); pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn; gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access); unsigned index, pt_access, pte_access, accessed_dirty, shift;
gpa_t pte_gpa; gpa_t pte_gpa;
bool eperm, last_gpte;
int offset; int offset;
const int write_fault = access & PFERR_WRITE_MASK; const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK; const int user_fault = access & PFERR_USER_MASK;
const int fetch_fault = access & PFERR_FETCH_MASK; const int fetch_fault = access & PFERR_FETCH_MASK;
u16 errcode = 0; u16 errcode = 0;
gpa_t real_gpa;
gfn_t gfn;
trace_kvm_mmu_pagetable_walk(addr, access); trace_kvm_mmu_pagetable_walk(addr, access);
retry_walk: retry_walk:
eperm = false;
walker->level = mmu->root_level; walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu); pte = mmu->get_cr3(vcpu);
...@@ -169,15 +175,21 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -169,15 +175,21 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
--walker->level; --walker->level;
} }
#endif #endif
walker->max_level = walker->level;
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
pt_access = ACC_ALL; accessed_dirty = PT_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
++walker->level;
for (;;) { do {
gfn_t real_gfn; gfn_t real_gfn;
unsigned long host_addr; unsigned long host_addr;
pt_access &= pte_access;
--walker->level;
index = PT_INDEX(addr, walker->level); index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte); table_gfn = gpte_to_gfn(pte);
...@@ -199,6 +211,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -199,6 +211,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
ptep_user = (pt_element_t __user *)((void *)host_addr + offset); ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
goto error; goto error;
walker->ptep_user[walker->level - 1] = ptep_user;
trace_kvm_mmu_paging_element(pte, walker->level); trace_kvm_mmu_paging_element(pte, walker->level);
...@@ -211,92 +224,48 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -211,92 +224,48 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
goto error; goto error;
} }
if (!check_write_user_access(vcpu, write_fault, user_fault, accessed_dirty &= pte;
pte)) pte_access = pt_access & gpte_access(vcpu, pte);
eperm = true;
#if PTTYPE == 64 walker->ptes[walker->level - 1] = pte;
if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) } while (!is_last_gpte(mmu, walker->level, pte));
eperm = true;
#endif
last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
if (last_gpte) {
pte_access = pt_access &
FNAME(gpte_access)(vcpu, pte, true);
/* check if the kernel is fetching from user page */
if (unlikely(pte_access & PT_USER_MASK) &&
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
if (fetch_fault && !user_fault)
eperm = true;
}
if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { if (unlikely(permission_fault(mmu, pte_access, access))) {
int ret; errcode |= PFERR_PRESENT_MASK;
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
pte, pte|PT_ACCESSED_MASK);
if (unlikely(ret < 0))
goto error; goto error;
else if (ret)
goto retry_walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_ACCESSED_MASK;
} }
walker->ptes[walker->level - 1] = pte; gfn = gpte_to_gfn_lvl(pte, walker->level);
gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
if (last_gpte) {
int lvl = walker->level;
gpa_t real_gpa;
gfn_t gfn;
u32 ac;
gfn = gpte_to_gfn_lvl(pte, lvl); if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
if (PTTYPE == 32 &&
walker->level == PT_DIRECTORY_LEVEL &&
is_cpuid_PSE36())
gfn += pse36_gfn_delta(pte); gfn += pse36_gfn_delta(pte);
ac = write_fault | fetch_fault | user_fault; real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
ac);
if (real_gpa == UNMAPPED_GVA) if (real_gpa == UNMAPPED_GVA)
return 0; return 0;
walker->gfn = real_gpa >> PAGE_SHIFT; walker->gfn = real_gpa >> PAGE_SHIFT;
break; if (!write_fault)
} protect_clean_gpte(&pte_access, pte);
pt_access &= FNAME(gpte_access)(vcpu, pte, false); /*
--walker->level; * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
} * place right.
*
if (unlikely(eperm)) { * On a read fault, do nothing.
errcode |= PFERR_PRESENT_MASK; */
goto error; shift = write_fault >> ilog2(PFERR_WRITE_MASK);
} shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
accessed_dirty &= pte >> shift;
if (write_fault && unlikely(!is_dirty_gpte(pte))) {
int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); if (unlikely(!accessed_dirty)) {
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
pte, pte|PT_DIRTY_MASK);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
goto error; goto error;
else if (ret) else if (ret)
goto retry_walk; goto retry_walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
} }
walker->pt_access = pt_access; walker->pt_access = pt_access;
...@@ -368,7 +337,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ...@@ -368,7 +337,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return; return;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); pte_access = sp->role.access & gpte_access(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
if (mmu_invalid_pfn(pfn)) if (mmu_invalid_pfn(pfn))
return; return;
...@@ -441,8 +411,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, ...@@ -441,8 +411,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
continue; continue;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, pte_access = sp->role.access & gpte_access(vcpu, gpte);
true); protect_clean_gpte(&pte_access, gpte);
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
pte_access & ACC_WRITE_MASK); pte_access & ACC_WRITE_MASK);
...@@ -794,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -794,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access; pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte, true); pte_access &= gpte_access(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
continue; continue;
......
...@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) ...@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
svm_complete_interrupts(svm); svm_complete_interrupts(svm);
} }
#ifdef CONFIG_X86_64
#define R "r"
#else
#define R "e"
#endif
static void svm_vcpu_run(struct kvm_vcpu *vcpu) static void svm_vcpu_run(struct kvm_vcpu *vcpu)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
...@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable(); local_irq_enable();
asm volatile ( asm volatile (
"push %%"R"bp; \n\t" "push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%"R"bx \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
"mov %c[rcx](%[svm]), %%"R"cx \n\t" "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
"mov %c[rdx](%[svm]), %%"R"dx \n\t" "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
"mov %c[rsi](%[svm]), %%"R"si \n\t" "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
"mov %c[rdi](%[svm]), %%"R"di \n\t" "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
"mov %c[rbp](%[svm]), %%"R"bp \n\t" "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
"mov %c[r8](%[svm]), %%r8 \n\t" "mov %c[r8](%[svm]), %%r8 \n\t"
"mov %c[r9](%[svm]), %%r9 \n\t" "mov %c[r9](%[svm]), %%r9 \n\t"
...@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif #endif
/* Enter guest mode */ /* Enter guest mode */
"push %%"R"ax \n\t" "push %%" _ASM_AX " \n\t"
"mov %c[vmcb](%[svm]), %%"R"ax \n\t" "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
__ex(SVM_VMLOAD) "\n\t" __ex(SVM_VMLOAD) "\n\t"
__ex(SVM_VMRUN) "\n\t" __ex(SVM_VMRUN) "\n\t"
__ex(SVM_VMSAVE) "\n\t" __ex(SVM_VMSAVE) "\n\t"
"pop %%"R"ax \n\t" "pop %%" _ASM_AX " \n\t"
/* Save guest registers, load host registers */ /* Save guest registers, load host registers */
"mov %%"R"bx, %c[rbx](%[svm]) \n\t" "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
"mov %%"R"cx, %c[rcx](%[svm]) \n\t" "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
"mov %%"R"dx, %c[rdx](%[svm]) \n\t" "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
"mov %%"R"si, %c[rsi](%[svm]) \n\t" "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
"mov %%"R"di, %c[rdi](%[svm]) \n\t" "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
"mov %%"R"bp, %c[rbp](%[svm]) \n\t" "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
"mov %%r8, %c[r8](%[svm]) \n\t" "mov %%r8, %c[r8](%[svm]) \n\t"
"mov %%r9, %c[r9](%[svm]) \n\t" "mov %%r9, %c[r9](%[svm]) \n\t"
...@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t"
#endif #endif
"pop %%"R"bp" "pop %%" _ASM_BP
: :
: [svm]"a"(svm), : [svm]"a"(svm),
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
...@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
#endif #endif
: "cc", "memory" : "cc", "memory"
, R"bx", R"cx", R"dx", R"si", R"di"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
, "rbx", "rcx", "rdx", "rsi", "rdi"
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
#else
, "ebx", "ecx", "edx", "esi", "edi"
#endif #endif
); );
...@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
mark_all_clean(svm->vmcb); mark_all_clean(svm->vmcb);
} }
#undef R
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
......
...@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO); ...@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO); module_param(ple_window, int, S_IRUGO);
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8 #define NR_AUTOLOAD_MSRS 8
#define VMCS02_POOL_SIZE 1 #define VMCS02_POOL_SIZE 1
...@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void) ...@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void)
native_store_idt(&dt); native_store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
vmcs_write32(HOST_IA32_SYSENTER_CS, low32); vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
...@@ -6183,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) ...@@ -6183,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host); msrs[i].host);
} }
#ifdef CONFIG_X86_64
#define R "r"
#define Q "q"
#else
#define R "e"
#define Q "l"
#endif
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
...@@ -6239,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6239,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->__launched = vmx->loaded_vmcs->launched; vmx->__launched = vmx->loaded_vmcs->launched;
asm( asm(
/* Store host registers */ /* Store host registers */
"push %%"R"dx; push %%"R"bp;" "push %%" _ASM_DX "; push %%" _ASM_BP ";"
"push %%"R"cx \n\t" /* placeholder for guest rcx */ "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
"push %%"R"cx \n\t" "push %%" _ASM_CX " \n\t"
"cmp %%"R"sp, %c[host_rsp](%0) \n\t" "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
"je 1f \n\t" "je 1f \n\t"
"mov %%"R"sp, %c[host_rsp](%0) \n\t" "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t" "1: \n\t"
/* Reload cr2 if changed */ /* Reload cr2 if changed */
"mov %c[cr2](%0), %%"R"ax \n\t" "mov %c[cr2](%0), %%" _ASM_AX " \n\t"
"mov %%cr2, %%"R"dx \n\t" "mov %%cr2, %%" _ASM_DX " \n\t"
"cmp %%"R"ax, %%"R"dx \n\t" "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
"je 2f \n\t" "je 2f \n\t"
"mov %%"R"ax, %%cr2 \n\t" "mov %%" _ASM_AX", %%cr2 \n\t"
"2: \n\t" "2: \n\t"
/* Check if vmlaunch of vmresume is needed */ /* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t" "cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers. Don't clobber flags. */ /* Load guest registers. Don't clobber flags. */
"mov %c[rax](%0), %%"R"ax \n\t" "mov %c[rax](%0), %%" _ASM_AX " \n\t"
"mov %c[rbx](%0), %%"R"bx \n\t" "mov %c[rbx](%0), %%" _ASM_BX " \n\t"
"mov %c[rdx](%0), %%"R"dx \n\t" "mov %c[rdx](%0), %%" _ASM_DX " \n\t"
"mov %c[rsi](%0), %%"R"si \n\t" "mov %c[rsi](%0), %%" _ASM_SI " \n\t"
"mov %c[rdi](%0), %%"R"di \n\t" "mov %c[rdi](%0), %%" _ASM_DI " \n\t"
"mov %c[rbp](%0), %%"R"bp \n\t" "mov %c[rbp](%0), %%" _ASM_BP " \n\t"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
"mov %c[r8](%0), %%r8 \n\t" "mov %c[r8](%0), %%r8 \n\t"
"mov %c[r9](%0), %%r9 \n\t" "mov %c[r9](%0), %%r9 \n\t"
...@@ -6273,24 +6266,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6273,24 +6266,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %c[r14](%0), %%r14 \n\t" "mov %c[r14](%0), %%r14 \n\t"
"mov %c[r15](%0), %%r15 \n\t" "mov %c[r15](%0), %%r15 \n\t"
#endif #endif
"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
/* Enter guest mode */ /* Enter guest mode */
"jne .Llaunched \n\t" "jne 1f \n\t"
__ex(ASM_VMX_VMLAUNCH) "\n\t" __ex(ASM_VMX_VMLAUNCH) "\n\t"
"jmp .Lkvm_vmx_return \n\t" "jmp 2f \n\t"
".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
".Lkvm_vmx_return: " "2: "
/* Save guest registers, load host registers, keep flags */ /* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%"R"sp) \n\t" "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t" "pop %0 \n\t"
"mov %%"R"ax, %c[rax](%0) \n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%"R"bx, %c[rbx](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
"pop"Q" %c[rcx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
"mov %%"R"dx, %c[rdx](%0) \n\t" "mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
"mov %%"R"si, %c[rsi](%0) \n\t" "mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
"mov %%"R"di, %c[rdi](%0) \n\t" "mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
"mov %%"R"bp, %c[rbp](%0) \n\t" "mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
"mov %%r8, %c[r8](%0) \n\t" "mov %%r8, %c[r8](%0) \n\t"
"mov %%r9, %c[r9](%0) \n\t" "mov %%r9, %c[r9](%0) \n\t"
...@@ -6301,11 +6294,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6301,11 +6294,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r14, %c[r14](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t"
#endif #endif
"mov %%cr2, %%"R"ax \n\t" "mov %%cr2, %%" _ASM_AX " \n\t"
"mov %%"R"ax, %c[cr2](%0) \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
"pop %%"R"bp; pop %%"R"dx \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
"setbe %c[fail](%0) \n\t" "setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
".popsection"
: : "c"(vmx), "d"((unsigned long)HOST_RSP), : : "c"(vmx), "d"((unsigned long)HOST_RSP),
[launched]"i"(offsetof(struct vcpu_vmx, __launched)), [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)), [fail]"i"(offsetof(struct vcpu_vmx, fail)),
...@@ -6330,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6330,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
[wordsize]"i"(sizeof(ulong)) [wordsize]"i"(sizeof(ulong))
: "cc", "memory" : "cc", "memory"
, R"ax", R"bx", R"di", R"si"
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
, "rax", "rbx", "rdi", "rsi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#else
, "eax", "ebx", "edi", "esi"
#endif #endif
); );
...@@ -6384,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -6384,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_complete_interrupts(vmx); vmx_complete_interrupts(vmx);
} }
#undef R
#undef Q
static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
......
...@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, ...@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception, gpa_t *gpa, struct x86_exception *exception,
bool write) bool write)
{ {
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva) && if (vcpu_match_mmio_gva(vcpu, gva)
check_write_user_access(vcpu, write, access, && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
vcpu->arch.access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1)); (gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false); trace_vcpu_match_mmio(gva, *gpa, write, false);
return 1; return 1;
} }
if (write)
access |= PFERR_WRITE_MASK;
*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
if (*gpa == UNMAPPED_GVA) if (*gpa == UNMAPPED_GVA)
...@@ -6016,7 +6013,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -6016,7 +6013,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
int r; int r;
vcpu->arch.mtrr_state.have_fixed = 1; vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu); r = vcpu_load(vcpu);
if (r)
return r;
r = kvm_arch_vcpu_reset(vcpu); r = kvm_arch_vcpu_reset(vcpu);
if (r == 0) if (r == 0)
r = kvm_mmu_setup(vcpu); r = kvm_mmu_setup(vcpu);
...@@ -6027,9 +6026,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -6027,9 +6026,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{ {
int r;
vcpu->arch.apf.msr_val = 0; vcpu->arch.apf.msr_val = 0;
vcpu_load(vcpu); r = vcpu_load(vcpu);
BUG_ON(r);
kvm_mmu_unload(vcpu); kvm_mmu_unload(vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
...@@ -6275,7 +6276,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -6275,7 +6276,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{ {
vcpu_load(vcpu); int r;
r = vcpu_load(vcpu);
BUG_ON(r);
kvm_mmu_unload(vcpu); kvm_mmu_unload(vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
} }
......
...@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) ...@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
void vcpu_load(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu);
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
......
...@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn) ...@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
/* /*
* Switches to specified vcpu, until a matching vcpu_put() * Switches to specified vcpu, until a matching vcpu_put()
*/ */
void vcpu_load(struct kvm_vcpu *vcpu) int vcpu_load(struct kvm_vcpu *vcpu)
{ {
int cpu; int cpu;
mutex_lock(&vcpu->mutex); if (mutex_lock_killable(&vcpu->mutex))
return -EINTR;
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */ /* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid; struct pid *oldpid = vcpu->pid;
...@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) ...@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
preempt_notifier_register(&vcpu->preempt_notifier); preempt_notifier_register(&vcpu->preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu); kvm_arch_vcpu_load(vcpu, cpu);
put_cpu(); put_cpu();
return 0;
} }
void vcpu_put(struct kvm_vcpu *vcpu) void vcpu_put(struct kvm_vcpu *vcpu)
...@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp, ...@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
#endif #endif
vcpu_load(vcpu); r = vcpu_load(vcpu);
if (r)
return r;
switch (ioctl) { switch (ioctl) {
case KVM_RUN: case KVM_RUN:
r = -EINVAL; r = -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment