Commit 6de410c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (66 commits)
  KVM: Remove unused 'instruction_length'
  KVM: Don't require explicit indication of completion of mmio or pio
  KVM: Remove extraneous guest entry on mmio read
  KVM: SVM: Only save/restore MSRs when needed
  KVM: fix an if() condition
  KVM: VMX: Add lazy FPU support for VT
  KVM: VMX: Properly shadow the CR0 register in the vcpu struct
  KVM: Don't complain about cpu erratum AA15
  KVM: Lazy FPU support for SVM
  KVM: Allow passing 64-bit values to the emulated read/write API
  KVM: Per-vcpu statistics
  KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles
  KVM: MMU: Avoid heavy ASSERT at non debug mode.
  KVM: VMX: Only save/restore MSR_K6_STAR if necessary
  KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c
  KVM: VMX: Don't switch 64-bit msrs for 32-bit guests
  KVM: VMX: Reduce unnecessary saving of host msrs
  KVM: Handle guest page faults when emulating mmio
  KVM: SVM: Report hardware exit reason to userspace instead of dmesg
  KVM: Retry sleeping allocation if atomic allocation fails
  ...
parents c6799ade 2ff81f70
......@@ -51,16 +51,19 @@
#define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 1
#define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
#define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16
#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
#define DE_VECTOR 0
#define NM_VECTOR 7
#define DF_VECTOR 8
#define TS_VECTOR 10
#define NP_VECTOR 11
......@@ -73,6 +76,8 @@
#define IOPL_SHIFT 12
#define KVM_PIO_PAGE_OFFSET 1
/*
* Address types:
*
......@@ -106,6 +111,7 @@ struct kvm_pte_chain {
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
* bits 17:18 - "access" - the user and writable bits of a huge page pde
*/
union kvm_mmu_page_role {
unsigned word;
......@@ -115,6 +121,7 @@ union kvm_mmu_page_role {
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
unsigned hugepage_access : 2;
};
};
......@@ -133,7 +140,6 @@ struct kvm_mmu_page {
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
int global; /* Set if all ptes in this page are global */
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
union {
......@@ -219,6 +225,34 @@ enum {
VCPU_SREG_LDTR,
};
struct kvm_pio_request {
unsigned long count;
int cur_count;
struct page *guest_pages[2];
unsigned guest_page_offset;
int in;
int size;
int string;
int down;
int rep;
};
struct kvm_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 request_irq_exits;
u32 irq_exits;
};
struct kvm_vcpu {
struct kvm *kvm;
union {
......@@ -228,6 +262,8 @@ struct kvm_vcpu {
struct mutex mutex;
int cpu;
int launched;
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
......@@ -266,6 +302,7 @@ struct kvm_vcpu {
char fx_buf[FX_BUF_SIZE];
char *host_fx_image;
char *guest_fx_image;
int fpu_active;
int mmio_needed;
int mmio_read_completed;
......@@ -273,6 +310,14 @@ struct kvm_vcpu {
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
int sigset_active;
sigset_t sigset;
struct kvm_stat stat;
struct {
int active;
......@@ -284,6 +329,15 @@ struct kvm_vcpu {
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
};
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
gfn_t target_gfn;
};
struct kvm_memory_slot {
......@@ -296,6 +350,8 @@ struct kvm_memory_slot {
struct kvm {
spinlock_t lock; /* protects everything except vcpus */
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
/*
......@@ -312,22 +368,6 @@ struct kvm {
struct file *filp;
};
struct kvm_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 request_irq_exits;
u32 irq_exits;
};
struct descriptor_table {
u16 limit;
unsigned long base;
......@@ -358,10 +398,8 @@ struct kvm_arch_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
......@@ -391,7 +429,6 @@ struct kvm_arch_ops {
unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
extern struct kvm_arch_ops *kvm_arch_ops;
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
......@@ -400,28 +437,29 @@ extern struct kvm_arch_ops *kvm_arch_ops;
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
void kvm_exit_arch(void);
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
void kvm_mmu_zap_all(struct kvm_vcpu *vcpu);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_emulator_want_group7_invlpg(void);
extern hpa_t bad_page_address;
static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn)
{
return slot->phys_mem[gfn - slot->base_gfn];
}
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
......@@ -444,6 +482,10 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
struct x86_emulate_ctxt;
int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int string, int down,
gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
......@@ -493,12 +535,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
return vcpu->mmu.page_fault(vcpu, gva, error_code);
}
static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
......
This diff is collapsed.
......@@ -9,17 +9,15 @@
#include "svm.h"
#include "kvm.h"
static const u32 host_save_msrs[] = {
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
MSR_FS_BASE, MSR_GS_BASE,
MSR_FS_BASE,
#endif
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP,
MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
};
#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs)
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4
struct vcpu_svm {
......@@ -28,13 +26,12 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
unsigned long cr0;
unsigned long cr4;
unsigned long db_regs[NUM_DB_REGS];
u64 next_rip;
u64 host_msrs[NR_HOST_SAVE_MSRS];
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
unsigned long host_db_regs[NUM_DB_REGS];
unsigned long host_dr6;
......
#ifndef __KVM_VMX_H
#define __KVM_VMX_H
#ifdef CONFIG_X86_64
/*
* avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
* mechanism (cpu bug AA24)
*/
#define NR_BAD_MSRS 2
#else
#define NR_BAD_MSRS 0
#endif
#endif
......@@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
static int dbg = 1;
#endif
#ifndef MMU_DEBUG
#define ASSERT(x) do { } while (0)
#else
#define ASSERT(x) \
if (!(x)) { \
printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
__FILE__, __LINE__, #x); \
}
#endif
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
......@@ -159,6 +163,9 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_WP_MASK;
......@@ -196,14 +203,15 @@ static int is_rmap_pte(u64 pte)
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
size_t objsize, int min)
struct kmem_cache *base_cache, int min,
gfp_t gfp_flags)
{
void *obj;
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kzalloc(objsize, GFP_NOWAIT);
obj = kmem_cache_zalloc(base_cache, gfp_flags);
if (!obj)
return -ENOMEM;
cache->objects[cache->nobjs++] = obj;
......@@ -217,20 +225,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
kfree(mc->objects[--mc->nobjs]);
}
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
{
int r;
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
sizeof(struct kvm_pte_chain), 4);
pte_chain_cache, 4, gfp_flags);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
sizeof(struct kvm_rmap_desc), 1);
rmap_desc_cache, 1, gfp_flags);
out:
return r;
}
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
kvm_arch_ops->vcpu_put(vcpu);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
kvm_arch_ops->vcpu_load(vcpu);
spin_lock(&vcpu->kvm->lock);
}
return r;
}
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
......@@ -390,13 +413,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
{
struct kvm *kvm = vcpu->kvm;
struct page *page;
struct kvm_memory_slot *slot;
struct kvm_rmap_desc *desc;
u64 *spte;
slot = gfn_to_memslot(kvm, gfn);
BUG_ON(!slot);
page = gfn_to_page(slot, gfn);
page = gfn_to_page(kvm, gfn);
BUG_ON(!page);
while (page_private(page)) {
if (!(page_private(page) & 1))
......@@ -417,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
}
}
#ifdef MMU_DEBUG
static int is_empty_shadow_page(hpa_t page_hpa)
{
u64 *pos;
......@@ -431,15 +453,15 @@ static int is_empty_shadow_page(hpa_t page_hpa)
}
return 1;
}
#endif
static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
{
struct kvm_mmu_page *page_head = page_header(page_hpa);
ASSERT(is_empty_shadow_page(page_hpa));
list_del(&page_head->link);
page_head->page_hpa = page_hpa;
list_add(&page_head->link, &vcpu->free_pages);
list_move(&page_head->link, &vcpu->free_pages);
++vcpu->kvm->n_free_mmu_pages;
}
......@@ -457,11 +479,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
return NULL;
page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
list_del(&page->link);
list_add(&page->link, &vcpu->kvm->active_mmu_pages);
list_move(&page->link, &vcpu->kvm->active_mmu_pages);
ASSERT(is_empty_shadow_page(page->page_hpa));
page->slot_bitmap = 0;
page->global = 1;
page->multimapped = 0;
page->parent_pte = parent_pte;
--vcpu->kvm->n_free_mmu_pages;
......@@ -569,6 +589,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gva_t gaddr,
unsigned level,
int metaphysical,
unsigned hugepage_access,
u64 *parent_pte)
{
union kvm_mmu_page_role role;
......@@ -582,6 +603,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role.glevels = vcpu->mmu.root_level;
role.level = level;
role.metaphysical = metaphysical;
role.hugepage_access = hugepage_access;
if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
......@@ -669,10 +691,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
if (!page->root_count) {
hlist_del(&page->hash_link);
kvm_mmu_free_page(vcpu, page->page_hpa);
} else {
list_del(&page->link);
list_add(&page->link, &vcpu->kvm->active_mmu_pages);
}
} else
list_move(&page->link, &vcpu->kvm->active_mmu_pages);
}
static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
......@@ -714,14 +734,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
struct kvm_memory_slot *slot;
struct page *page;
ASSERT((gpa & HPA_ERR_MASK) == 0);
slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
if (!slot)
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
if (!page)
return gpa | HPA_ERR_MASK;
page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
| (gpa & (PAGE_SIZE-1));
}
......@@ -735,6 +753,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
return gpa_to_hpa(vcpu, gpa);
}
struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
if (gpa == UNMAPPED_GVA)
return NULL;
return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
......@@ -772,7 +799,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
v, level - 1,
1, &table[index]);
1, 0, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
......@@ -804,10 +831,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->mmu.pae_root[i];
ASSERT(VALID_PAGE(root));
root &= PT64_BASE_ADDR_MASK;
page = page_header(root);
--page->root_count;
if (root) {
ASSERT(VALID_PAGE(root));
root &= PT64_BASE_ADDR_MASK;
page = page_header(root);
--page->root_count;
}
vcpu->mmu.pae_root[i] = INVALID_PAGE;
}
vcpu->mmu.root_hpa = INVALID_PAGE;
......@@ -827,7 +856,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
page = kvm_mmu_get_page(vcpu, root_gfn, 0,
PT64_ROOT_LEVEL, 0, NULL);
PT64_ROOT_LEVEL, 0, 0, NULL);
root = page->page_hpa;
++page->root_count;
vcpu->mmu.root_hpa = root;
......@@ -838,13 +867,17 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
hpa_t root = vcpu->mmu.pae_root[i];
ASSERT(!VALID_PAGE(root));
if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) {
if (!is_present_pte(vcpu->pdptrs[i])) {
vcpu->mmu.pae_root[i] = 0;
continue;
}
root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
else if (vcpu->mmu.root_level == 0)
} else if (vcpu->mmu.root_level == 0)
root_gfn = 0;
page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu),
NULL);
0, NULL);
root = page->page_hpa;
++page->root_count;
vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
......@@ -903,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{
++kvm_stat.tlb_flush;
++vcpu->stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu);
}
......@@ -918,11 +951,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
}
static void mark_pagetable_nonglobal(void *shadow_pte)
{
page_header(__pa(shadow_pte))->global = 0;
}
static inline void set_pte_common(struct kvm_vcpu *vcpu,
u64 *shadow_pte,
gpa_t gaddr,
......@@ -940,9 +968,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
*shadow_pte |= access_bits;
if (!(*shadow_pte & PT_GLOBAL_MASK))
mark_pagetable_nonglobal(shadow_pte);
if (is_error_hpa(paddr)) {
*shadow_pte |= gaddr;
*shadow_pte |= PT_SHADOW_IO_MARK;
......@@ -1316,6 +1341,51 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
}
}
void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
{
destroy_kvm_mmu(vcpu);
while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
struct kvm_mmu_page *page;
page = container_of(vcpu->kvm->active_mmu_pages.next,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(vcpu, page);
}
mmu_free_memory_caches(vcpu);
kvm_arch_ops->tlb_flush(vcpu);
init_kvm_mmu(vcpu);
}
void kvm_mmu_module_exit(void)
{
if (pte_chain_cache)
kmem_cache_destroy(pte_chain_cache);
if (rmap_desc_cache)
kmem_cache_destroy(rmap_desc_cache);
}
int kvm_mmu_module_init(void)
{
pte_chain_cache = kmem_cache_create("kvm_pte_chain",
sizeof(struct kvm_pte_chain),
0, 0, NULL, NULL);
if (!pte_chain_cache)
goto nomem;
rmap_desc_cache = kmem_cache_create("kvm_rmap_desc",
sizeof(struct kvm_rmap_desc),
0, 0, NULL, NULL);
if (!rmap_desc_cache)
goto nomem;
return 0;
nomem:
kvm_mmu_module_exit();
return -ENOMEM;
}
#ifdef AUDIT
static const char *audit_msg;
......@@ -1338,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
u64 ent = pt[i];
if (!ent & PT_PRESENT_MASK)
if (!(ent & PT_PRESENT_MASK))
continue;
va = canonicalize(va);
......@@ -1360,7 +1430,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
static void audit_mappings(struct kvm_vcpu *vcpu)
{
int i;
unsigned i;
if (vcpu->mmu.root_level == 4)
audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
......
......@@ -148,8 +148,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
break;
}
if (walker->level != 3 || is_long_mode(vcpu))
walker->inherited_ar &= walker->table[index];
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0);
......@@ -248,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
unsigned hugepage_access = 0;
if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
if (level == PT_PAGE_TABLE_LEVEL)
......@@ -277,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
hugepage_access = *guest_ent;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
} else {
......@@ -284,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, shadow_ent);
metaphysical, hugepage_access,
shadow_ent);
shadow_addr = shadow_page->page_hpa;
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
......@@ -444,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (is_io_pte(*shadow_pte))
return 1;
++kvm_stat.pf_fixed;
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
return write_pt;
......
This diff is collapsed.
......@@ -44,6 +44,9 @@ enum {
INTERCEPT_RDTSCP,
INTERCEPT_ICEBP,
INTERCEPT_WBINVD,
INTERCEPT_MONITOR,
INTERCEPT_MWAIT,
INTERCEPT_MWAIT_COND,
};
......@@ -298,6 +301,9 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXIT_RDTSCP 0x087
#define SVM_EXIT_ICEBP 0x088
#define SVM_EXIT_WBINVD 0x089
#define SVM_EXIT_MONITOR 0x08a
#define SVM_EXIT_MWAIT 0x08b
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_ERR -1
......
This diff is collapsed.
......@@ -833,8 +833,9 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
dst.ptr = (unsigned long *)cr2;
dst.bytes = (d & ByteOp) ? 1 : op_bytes;
if (d & BitOp) {
dst.ptr += src.val / BITS_PER_LONG;
dst.bytes = sizeof(long);
unsigned long mask = ~(dst.bytes * 8 - 1);
dst.ptr = (void *)dst.ptr + (src.val & mask) / 8;
}
if (!(d & Mov) && /* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)dst.ptr,
......@@ -1044,7 +1045,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
if ((rc = ops->write_std(
register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]),
dst.val, dst.bytes, ctxt)) != 0)
&dst.val, dst.bytes, ctxt)) != 0)
goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */
break;
......@@ -1077,12 +1078,12 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case OP_MEM:
if (lock_prefix)
rc = ops->cmpxchg_emulated((unsigned long)dst.
ptr, dst.orig_val,
dst.val, dst.bytes,
ptr, &dst.orig_val,
&dst.val, dst.bytes,
ctxt);
else
rc = ops->write_emulated((unsigned long)dst.ptr,
dst.val, dst.bytes,
&dst.val, dst.bytes,
ctxt);
if (rc != 0)
goto done;
......@@ -1320,36 +1321,8 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
realmode_set_cr(ctxt->vcpu, modrm_reg, modrm_val, &_eflags);
break;
case 0xc7: /* Grp9 (cmpxchg8b) */
#if defined(__i386__)
{
unsigned long old_lo, old_hi;
if (((rc = ops->read_emulated(cr2 + 0, &old_lo, 4,
ctxt)) != 0)
|| ((rc = ops->read_emulated(cr2 + 4, &old_hi, 4,
ctxt)) != 0))
goto done;
if ((old_lo != _regs[VCPU_REGS_RAX])
|| (old_hi != _regs[VCPU_REGS_RDX])) {
_regs[VCPU_REGS_RAX] = old_lo;
_regs[VCPU_REGS_RDX] = old_hi;
_eflags &= ~EFLG_ZF;
} else if (ops->cmpxchg8b_emulated == NULL) {
rc = X86EMUL_UNHANDLEABLE;
goto done;
} else {
if ((rc = ops->cmpxchg8b_emulated(cr2, old_lo,
old_hi,
_regs[VCPU_REGS_RBX],
_regs[VCPU_REGS_RCX],
ctxt)) != 0)
goto done;
_eflags |= EFLG_ZF;
}
break;
}
#elif defined(CONFIG_X86_64)
{
unsigned long old, new;
u64 old, new;
if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0)
goto done;
if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
......@@ -1358,15 +1331,15 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
_regs[VCPU_REGS_RDX] = (u32) (old >> 32);
_eflags &= ~EFLG_ZF;
} else {
new = (_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX];
if ((rc = ops->cmpxchg_emulated(cr2, old,
new, 8, ctxt)) != 0)
new = ((u64)_regs[VCPU_REGS_RCX] << 32)
| (u32) _regs[VCPU_REGS_RBX];
if ((rc = ops->cmpxchg_emulated(cr2, &old,
&new, 8, ctxt)) != 0)
goto done;
_eflags |= EFLG_ZF;
}
break;
}
#endif
}
goto writeback;
......
......@@ -59,8 +59,7 @@ struct x86_emulate_ops {
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr,
unsigned long *val,
int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/*
......@@ -71,8 +70,7 @@ struct x86_emulate_ops {
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr,
unsigned long val,
int (*write_std)(unsigned long addr, const void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/*
......@@ -82,7 +80,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_emulated) (unsigned long addr,
unsigned long *val,
void *val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
......@@ -94,7 +92,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_emulated) (unsigned long addr,
unsigned long val,
const void *val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
......@@ -107,29 +105,11 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to access using CMPXCHG.
*/
int (*cmpxchg_emulated) (unsigned long addr,
unsigned long old,
unsigned long new,
const void *old,
const void *new,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
/*
* cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
* emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
* NOTES:
* 1. This function is only ever called when emulating a real CMPXCHG8B.
* 2. This function is *never* called on x86/64 systems.
* 2. Not defining this function (i.e., specifying NULL) is equivalent
* to defining a function that always returns X86EMUL_UNHANDLEABLE.
*/
int (*cmpxchg8b_emulated) (unsigned long addr,
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
unsigned long new_hi,
struct x86_emulate_ctxt * ctxt);
};
struct cpu_user_regs;
......
......@@ -96,6 +96,7 @@ header-y += iso_fs.h
header-y += ixjuser.h
header-y += jffs2.h
header-y += keyctl.h
header-y += kvm.h
header-y += limits.h
header-y += lock_dlm_plock.h
header-y += magic.h
......
......@@ -11,7 +11,7 @@
#include <asm/types.h>
#include <linux/ioctl.h>
#define KVM_API_VERSION 4
#define KVM_API_VERSION 12
/*
* Architectural interrupt line count, and the size of the bitmap needed
......@@ -33,37 +33,39 @@ struct kvm_memory_region {
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
#define KVM_EXIT_TYPE_FAIL_ENTRY 1
#define KVM_EXIT_TYPE_VM_EXIT 2
struct kvm_memory_alias {
__u32 slot; /* this has a different namespace than memory slots */
__u32 flags;
__u64 guest_phys_addr;
__u64 memory_size;
__u64 target_phys_addr;
};
enum kvm_exit_reason {
KVM_EXIT_UNKNOWN = 0,
KVM_EXIT_EXCEPTION = 1,
KVM_EXIT_IO = 2,
KVM_EXIT_CPUID = 3,
KVM_EXIT_HYPERCALL = 3,
KVM_EXIT_DEBUG = 4,
KVM_EXIT_HLT = 5,
KVM_EXIT_MMIO = 6,
KVM_EXIT_IRQ_WINDOW_OPEN = 7,
KVM_EXIT_SHUTDOWN = 8,
KVM_EXIT_FAIL_ENTRY = 9,
KVM_EXIT_INTR = 10,
};
/* for KVM_RUN */
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u32 emulated; /* skip current instruction */
__u32 mmio_completed; /* mmio request completed */
__u8 request_interrupt_window;
__u8 padding1[7];
/* out */
__u32 exit_type;
__u32 exit_reason;
__u32 instruction_length;
__u8 ready_for_interrupt_injection;
__u8 if_flag;
__u16 padding2;
__u8 padding2[2];
/* in (pre_kvm_run), out (post_kvm_run) */
__u64 cr8;
......@@ -72,29 +74,26 @@ struct kvm_run {
union {
/* KVM_EXIT_UNKNOWN */
struct {
__u32 hardware_exit_reason;
__u64 hardware_exit_reason;
} hw;
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
__u32 exception;
__u32 error_code;
} ex;
/* KVM_EXIT_IO */
struct {
struct kvm_io {
#define KVM_EXIT_IO_IN 0
#define KVM_EXIT_IO_OUT 1
__u8 direction;
__u8 size; /* bytes */
__u8 string;
__u8 string_down;
__u8 rep;
__u8 pad;
__u16 port;
__u64 count;
union {
__u64 address;
__u32 value;
};
__u32 count;
__u64 data_offset; /* relative to kvm_run start */
} io;
struct {
} debug;
......@@ -105,6 +104,13 @@ struct kvm_run {
__u32 len;
__u8 is_write;
} mmio;
/* KVM_EXIT_HYPERCALL */
struct {
__u64 args[6];
__u64 ret;
__u32 longmode;
__u32 pad;
} hypercall;
};
};
......@@ -118,6 +124,21 @@ struct kvm_regs {
__u64 rip, rflags;
};
/* for KVM_GET_FPU and KVM_SET_FPU */
struct kvm_fpu {
__u8 fpr[8][16];
__u16 fcw;
__u16 fsw;
__u8 ftwx; /* in fxsave format */
__u8 pad1;
__u16 last_opcode;
__u64 last_ip;
__u64 last_dp;
__u8 xmm[16][16];
__u32 mxcsr;
__u32 pad2;
};
struct kvm_segment {
__u64 base;
__u32 limit;
......@@ -210,38 +231,74 @@ struct kvm_dirty_log {
};
};
struct kvm_cpuid_entry {
__u32 function;
__u32 eax;
__u32 ebx;
__u32 ecx;
__u32 edx;
__u32 padding;
};
/* for KVM_SET_CPUID */
struct kvm_cpuid {
__u32 nent;
__u32 padding;
struct kvm_cpuid_entry entries[0];
};
/* for KVM_SET_SIGNAL_MASK */
struct kvm_signal_mask {
__u32 len;
__u8 sigset[0];
};
#define KVMIO 0xAE
/*
* ioctls for /dev/kvm fds:
*/
#define KVM_GET_API_VERSION _IO(KVMIO, 1)
#define KVM_CREATE_VM _IO(KVMIO, 2) /* returns a VM fd */
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 15, struct kvm_msr_list)
#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
/*
* Check if a kvm extension is available. Argument is extension number,
* return is 1 (yes) or 0 (no, sorry).
*/
#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03)
/*
* Get size for mmap(vcpu_fd)
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
/*
* ioctls for VM fds
*/
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region)
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
/*
* KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
* a vcpu fd.
*/
#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log)
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/*
* ioctls for vcpu fds
*/
#define KVM_RUN _IOWR(KVMIO, 2, struct kvm_run)
#define KVM_GET_REGS _IOR(KVMIO, 3, struct kvm_regs)
#define KVM_SET_REGS _IOW(KVMIO, 4, struct kvm_regs)
#define KVM_GET_SREGS _IOR(KVMIO, 5, struct kvm_sregs)
#define KVM_SET_SREGS _IOW(KVMIO, 6, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 7, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 8, struct kvm_interrupt)
#define KVM_DEBUG_GUEST _IOW(KVMIO, 9, struct kvm_debug_guest)
#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs)
#define KVM_RUN _IO(KVMIO, 0x80)
#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs)
#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs)
#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs)
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#endif
......@@ -29,6 +29,7 @@
#define TUN_MINOR 200
#define HPET_MINOR 228
#define KVM_MINOR 232
struct device;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment