Commit 3749c66c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (106 commits)
  KVM: Replace enum by #define
  KVM: Skip pio instruction when it is emulated, not executed
  KVM: x86 emulator: popf
  KVM: x86 emulator: fix src, dst value initialization
  KVM: x86 emulator: jmp abs
  KVM: x86 emulator: lea
  KVM: X86 emulator: jump conditional short
  KVM: x86 emulator: imlpement jump conditional relative
  KVM: x86 emulator: sort opcodes into ascending order
  KVM: Improve emulation failure reporting
  KVM: x86 emulator: pushf
  KVM: x86 emulator: call near
  KVM: x86 emulator: push imm8
  KVM: VMX: Fix exit qualification width on i386
  KVM: Move main vcpu loop into subarch independent code
  KVM: VMX: Move vm entry failure handling to the exit handler
  KVM: MMU: Don't do GFP_NOWAIT allocations
  KVM: Rename kvm_arch_ops to kvm_x86_ops
  KVM: Simplify memory allocation
  KVM: Hoist SVM's get_cs_db_l_bits into core code.
  ...
parents 835c34a1 8a45450d
...@@ -17,6 +17,7 @@ if VIRTUALIZATION ...@@ -17,6 +17,7 @@ if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support" tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL depends on X86 && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES select ANON_INODES
---help--- ---help---
Support hosting fully virtualized guest machines using hardware Support hosting fully virtualized guest machines using hardware
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module # Makefile for Kernel-based Virtual Machine module
# #
kvm-objs := kvm_main.o mmu.o x86_emulate.o kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
......
This diff is collapsed.
/*
* Copyright (C) 2001 MandrakeSoft S.A.
*
* MandrakeSoft S.A.
* 43, rue d'Aboukir
* 75002 Paris - France
* http://www.linux-mandrake.com/
* http://www.mandrakesoft.com/
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Yunhong Jiang <yunhong.jiang@intel.com>
* Yaozu (Eddie) Dong <eddie.dong@intel.com>
* Based on Xen 3.1 code.
*/
#include "kvm.h"
#include <linux/kvm.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/hrtimer.h>
#include <linux/io.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h>
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/io_apic.h>
#include "irq.h"
/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define ioapic_debug(fmt, arg...)
static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
unsigned long addr,
unsigned long length)
{
unsigned long result = 0;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
| (IOAPIC_VERSION_ID & 0xff));
break;
case IOAPIC_REG_APIC_ID:
case IOAPIC_REG_ARB_ID:
result = ((ioapic->id & 0xf) << 24);
break;
default:
{
u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
u64 redir_content;
ASSERT(redir_index < IOAPIC_NUM_PINS);
redir_content = ioapic->redirtbl[redir_index].bits;
result = (ioapic->ioregsel & 0x1) ?
(redir_content >> 32) & 0xffffffff :
redir_content & 0xffffffff;
break;
}
}
return result;
}
static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
{
union ioapic_redir_entry *pent;
pent = &ioapic->redirtbl[idx];
if (!pent->fields.mask) {
ioapic_deliver(ioapic, idx);
if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
pent->fields.remote_irr = 1;
}
if (!pent->fields.trig_mode)
ioapic->irr &= ~(1 << idx);
}
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
/* Writes are ignored. */
break;
case IOAPIC_REG_APIC_ID:
ioapic->id = (val >> 24) & 0xf;
break;
case IOAPIC_REG_ARB_ID:
break;
default:
index = (ioapic->ioregsel - 0x10) >> 1;
ioapic_debug("change redir index %x val %x", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
if (ioapic->ioregsel & 1) {
ioapic->redirtbl[index].bits &= 0xffffffff;
ioapic->redirtbl[index].bits |= (u64) val << 32;
} else {
ioapic->redirtbl[index].bits &= ~0xffffffffULL;
ioapic->redirtbl[index].bits |= (u32) val;
ioapic->redirtbl[index].fields.remote_irr = 0;
}
if (ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
}
}
static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
struct kvm_lapic *target,
u8 vector, u8 trig_mode, u8 delivery_mode)
{
ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
delivery_mode);
ASSERT((delivery_mode == dest_Fixed) ||
(delivery_mode == dest_LowestPrio));
kvm_apic_set_irq(target, vector, trig_mode);
}
static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode)
{
u32 mask = 0;
int i;
struct kvm *kvm = ioapic->kvm;
struct kvm_vcpu *vcpu;
ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
if (dest_mode == 0) { /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */
for (i = 0; i < KVM_MAX_VCPUS; ++i)
if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
mask |= 1 << i;
return mask;
}
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
if (vcpu->apic)
mask = 1 << i;
break;
}
}
} else if (dest != 0) /* Logical mode, MDA non-zero. */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
if (vcpu->apic &&
kvm_apic_match_logical_addr(vcpu->apic, dest))
mask |= 1 << vcpu->vcpu_id;
}
ioapic_debug("mask %x", mask);
return mask;
}
static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
{
u8 dest = ioapic->redirtbl[irq].fields.dest_id;
u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
u8 vector = ioapic->redirtbl[irq].fields.vector;
u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
u32 deliver_bitmask;
struct kvm_lapic *target;
struct kvm_vcpu *vcpu;
int vcpu_id;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x",
dest, dest_mode, delivery_mode, vector, trig_mode);
deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
if (!deliver_bitmask) {
ioapic_debug("no target on destination");
return;
}
switch (delivery_mode) {
case dest_LowestPrio:
target =
kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
if (target != NULL)
ioapic_inj_irq(ioapic, target, vector,
trig_mode, delivery_mode);
else
ioapic_debug("null round robin: "
"mask=%x vector=%x delivery_mode=%x",
deliver_bitmask, vector, dest_LowestPrio);
break;
case dest_Fixed:
for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
if (!(deliver_bitmask & (1 << vcpu_id)))
continue;
deliver_bitmask &= ~(1 << vcpu_id);
vcpu = ioapic->kvm->vcpus[vcpu_id];
if (vcpu) {
target = vcpu->apic;
ioapic_inj_irq(ioapic, target, vector,
trig_mode, delivery_mode);
}
}
break;
/* TODO: NMI */
default:
printk(KERN_WARNING "Unsupported delivery mode %d\n",
delivery_mode);
break;
}
}
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
{
u32 old_irr = ioapic->irr;
u32 mask = 1 << irq;
union ioapic_redir_entry entry;
if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
entry = ioapic->redirtbl[irq];
level ^= entry.fields.polarity;
if (!level)
ioapic->irr &= ~mask;
else {
ioapic->irr |= mask;
if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
|| !entry.fields.remote_irr)
ioapic_service(ioapic, irq);
}
}
}
static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
{
int i;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
if (ioapic->redirtbl[i].fields.vector == vector)
return i;
return -1;
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
{
struct kvm_ioapic *ioapic = kvm->vioapic;
union ioapic_redir_entry *ent;
int gsi;
gsi = get_eoi_gsi(ioapic, vector);
if (gsi == -1) {
printk(KERN_WARNING "Can't find redir item for %d EOI\n",
vector);
return;
}
ent = &ioapic->redirtbl[gsi];
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
ioapic_deliver(ioapic, gsi);
}
static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
return ((addr >= ioapic->base_address &&
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
}
static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
void *val)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 result;
ioapic_debug("addr %lx", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
switch (addr) {
case IOAPIC_REG_SELECT:
result = ioapic->ioregsel;
break;
case IOAPIC_REG_WINDOW:
result = ioapic_read_indirect(ioapic, addr, len);
break;
default:
result = 0;
break;
}
switch (len) {
case 8:
*(u64 *) val = result;
break;
case 1:
case 2:
case 4:
memcpy(val, (char *)&result, len);
break;
default:
printk(KERN_WARNING "ioapic: wrong length %d\n", len);
}
}
static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
const void *val)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 data;
ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
if (len == 4 || len == 8)
data = *(u32 *) val;
else {
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
return;
}
addr &= 0xff;
switch (addr) {
case IOAPIC_REG_SELECT:
ioapic->ioregsel = data;
break;
case IOAPIC_REG_WINDOW:
ioapic_write_indirect(ioapic, data);
break;
default:
break;
}
}
int kvm_ioapic_init(struct kvm *kvm)
{
struct kvm_ioapic *ioapic;
int i;
ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
if (!ioapic)
return -ENOMEM;
kvm->vioapic = ioapic;
for (i = 0; i < IOAPIC_NUM_PINS; i++)
ioapic->redirtbl[i].fields.mask = 1;
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->dev.read = ioapic_mmio_read;
ioapic->dev.write = ioapic_mmio_write;
ioapic->dev.in_range = ioapic_in_range;
ioapic->dev.private = ioapic;
ioapic->kvm = kvm;
kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
return 0;
}
/*
* irq.c: API for in kernel interrupt controller
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#include <linux/module.h>
#include "kvm.h"
#include "irq.h"
/*
* check if there is pending interrupt without
* intack.
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */
return s->output;
} else
return 0;
}
return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
/*
* Read pending interrupt vector and intack.
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
int vector;
vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
vector = kvm_pic_read_irq(s);
}
}
return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
static void vcpu_kick_intr(void *info)
{
#ifdef DEBUG
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
#endif
}
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int ipi_pcpu = vcpu->cpu;
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
if (vcpu->guest_mode)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
}
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
/*
* irq.h: in kernel interrupt controller related definitions
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
* Authors:
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
*
*/
#ifndef __IRQ_H
#define __IRQ_H
#include "kvm.h"
typedef void irq_request_func(void *opaque, int level);
struct kvm_kpic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
u8 poll;
u8 special_mask;
u8 init_state;
u8 auto_eoi;
u8 rotate_on_auto_eoi;
u8 special_fully_nested_mode;
u8 init4; /* true if 4 byte init */
u8 elcr; /* PIIX edge/trigger selection */
u8 elcr_mask;
struct kvm_pic *pics_state;
};
struct kvm_pic {
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
void kvm_pic_update_irq(struct kvm_pic *s);
#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
#define IOAPIC_EDGE_TRIG 0
#define IOAPIC_LEVEL_TRIG 1
#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_MEM_LENGTH 0x100
/* Direct registers. */
#define IOAPIC_REG_SELECT 0x00
#define IOAPIC_REG_WINDOW 0x10
#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
/* Indirect registers. */
#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
#define IOAPIC_REG_VERSION 0x01
#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
struct kvm_ioapic {
u64 base_address;
u32 ioregsel;
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
};
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
};
#ifdef DEBUG
#define ASSERT(x) \
do { \
if (!(x)) { \
printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
__FILE__, __LINE__, #x); \
BUG(); \
} \
} while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_lapic_reset(struct kvm_vcpu *vcpu);
void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
unsigned long bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
#endif
This diff is collapsed.
This diff is collapsed.
...@@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = { ...@@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = {
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
#define NUM_DB_REGS 4 #define NUM_DB_REGS 4
struct kvm_vcpu;
struct vcpu_svm { struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb; struct vmcb *vmcb;
unsigned long vmcb_pa; unsigned long vmcb_pa;
struct svm_cpu_data *svm_data; struct svm_cpu_data *svm_data;
......
This diff is collapsed.
...@@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache; ...@@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache;
static int is_write_protection(struct kvm_vcpu *vcpu) static int is_write_protection(struct kvm_vcpu *vcpu)
{ {
return vcpu->cr0 & CR0_WP_MASK; return vcpu->cr0 & X86_CR0_WP;
} }
static int is_cpuid_PSE36(void) static int is_cpuid_PSE36(void)
...@@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte) ...@@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte)
} }
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
struct kmem_cache *base_cache, int min, struct kmem_cache *base_cache, int min)
gfp_t gfp_flags)
{ {
void *obj; void *obj;
if (cache->nobjs >= min) if (cache->nobjs >= min)
return 0; return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) { while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, gfp_flags); obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
if (!obj) if (!obj)
return -ENOMEM; return -ENOMEM;
cache->objects[cache->nobjs++] = obj; cache->objects[cache->nobjs++] = obj;
...@@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) ...@@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
} }
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
int min, gfp_t gfp_flags) int min)
{ {
struct page *page; struct page *page;
if (cache->nobjs >= min) if (cache->nobjs >= min)
return 0; return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) { while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = alloc_page(gfp_flags); page = alloc_page(GFP_KERNEL);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
set_page_private(page, 0); set_page_private(page, 0);
...@@ -247,44 +246,28 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) ...@@ -247,44 +246,28 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
free_page((unsigned long)mc->objects[--mc->nobjs]); free_page((unsigned long)mc->objects[--mc->nobjs]);
} }
static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{ {
int r; int r;
kvm_mmu_free_some_pages(vcpu);
r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
pte_chain_cache, 4, gfp_flags); pte_chain_cache, 4);
if (r) if (r)
goto out; goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
rmap_desc_cache, 1, gfp_flags); rmap_desc_cache, 1);
if (r) if (r)
goto out; goto out;
r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags); r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
if (r) if (r)
goto out; goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
mmu_page_header_cache, 4, gfp_flags); mmu_page_header_cache, 4);
out: out:
return r; return r;
} }
static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
{
int r;
r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
kvm_mmu_free_some_pages(vcpu);
if (r < 0) {
spin_unlock(&vcpu->kvm->lock);
kvm_arch_ops->vcpu_put(vcpu);
r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
kvm_arch_ops->vcpu_load(vcpu);
spin_lock(&vcpu->kvm->lock);
kvm_mmu_free_some_pages(vcpu);
}
return r;
}
static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{ {
mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
...@@ -969,7 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) ...@@ -969,7 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
{ {
++vcpu->stat.tlb_flush; ++vcpu->stat.tlb_flush;
kvm_arch_ops->tlb_flush(vcpu); kvm_x86_ops->tlb_flush(vcpu);
} }
static void paging_new_cr3(struct kvm_vcpu *vcpu) static void paging_new_cr3(struct kvm_vcpu *vcpu)
...@@ -982,7 +965,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, ...@@ -982,7 +965,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
u64 addr, u64 addr,
u32 err_code) u32 err_code)
{ {
kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
} }
static void paging_free(struct kvm_vcpu *vcpu) static void paging_free(struct kvm_vcpu *vcpu)
...@@ -1071,15 +1054,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) ...@@ -1071,15 +1054,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
{ {
int r; int r;
spin_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->lock);
r = mmu_topup_memory_caches(vcpu); r = mmu_topup_memory_caches(vcpu);
if (r) if (r)
goto out; goto out;
mmu_alloc_roots(vcpu); mmu_alloc_roots(vcpu);
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu); kvm_mmu_flush_tlb(vcpu);
out: out:
spin_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->lock);
return r; return r;
} }
EXPORT_SYMBOL_GPL(kvm_mmu_load); EXPORT_SYMBOL_GPL(kvm_mmu_load);
...@@ -1124,7 +1107,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, ...@@ -1124,7 +1107,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
} }
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *old, const u8 *new, int bytes) const u8 *new, int bytes)
{ {
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *page; struct kvm_mmu_page *page;
......
...@@ -58,7 +58,10 @@ struct guest_walker { ...@@ -58,7 +58,10 @@ struct guest_walker {
int level; int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS]; gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table; pt_element_t *table;
pt_element_t pte;
pt_element_t *ptep; pt_element_t *ptep;
struct page *page;
int index;
pt_element_t inherited_ar; pt_element_t inherited_ar;
gfn_t gfn; gfn_t gfn;
u32 error_code; u32 error_code;
...@@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
pgprintk("%s: addr %lx\n", __FUNCTION__, addr); pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level; walker->level = vcpu->mmu.root_level;
walker->table = NULL; walker->table = NULL;
walker->page = NULL;
walker->ptep = NULL;
root = vcpu->cr3; root = vcpu->cr3;
#if PTTYPE == 64 #if PTTYPE == 64
if (!is_long_mode(vcpu)) { if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep; root = *walker->ptep;
walker->pte = root;
if (!(root & PT_PRESENT_MASK)) if (!(root & PT_PRESENT_MASK))
goto not_present; goto not_present;
--walker->level; --walker->level;
...@@ -96,10 +102,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -96,10 +102,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->level - 1, table_gfn); walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn); slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
...@@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
hpa_t paddr; hpa_t paddr;
ptep = &walker->table[index]; ptep = &walker->table[index];
walker->index = index;
ASSERT(((unsigned long)walker->table & PAGE_MASK) == ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
((unsigned long)ptep & PAGE_MASK)); ((unsigned long)ptep & PAGE_MASK));
...@@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->inherited_ar &= walker->table[index]; walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0); kunmap_atomic(walker->table, KM_USER0);
walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
KM_USER0); walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
walker->table = kmap_atomic(walker->page, KM_USER0);
--walker->level; --walker->level;
walker->table_gfn[walker->level - 1 ] = table_gfn; walker->table_gfn[walker->level - 1 ] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn); walker->level - 1, table_gfn);
} }
walker->ptep = ptep; walker->pte = *ptep;
if (walker->page)
walker->ptep = NULL;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
return 1; return 1;
...@@ -175,13 +187,9 @@ static int FNAME(walk_addr)(struct guest_walker *walker, ...@@ -175,13 +187,9 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->error_code |= PFERR_USER_MASK; walker->error_code |= PFERR_USER_MASK;
if (fetch_fault) if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK; walker->error_code |= PFERR_FETCH_MASK;
return 0;
}
static void FNAME(release_walker)(struct guest_walker *walker)
{
if (walker->table) if (walker->table)
kunmap_atomic(walker->table, KM_USER0); kunmap_atomic(walker->table, KM_USER0);
return 0;
} }
static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
...@@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, ...@@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
u64 *shadow_pte, u64 *shadow_pte,
gpa_t gaddr, gpa_t gaddr,
pt_element_t *gpte, pt_element_t gpte,
u64 access_bits, u64 access_bits,
int user_fault, int user_fault,
int write_fault, int write_fault,
...@@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, ...@@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
gfn_t gfn) gfn_t gfn)
{ {
hpa_t paddr; hpa_t paddr;
int dirty = *gpte & PT_DIRTY_MASK; int dirty = gpte & PT_DIRTY_MASK;
u64 spte = *shadow_pte; u64 spte = *shadow_pte;
int was_rmapped = is_rmap_pte(spte); int was_rmapped = is_rmap_pte(spte);
pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
" user_fault %d gfn %lx\n", " user_fault %d gfn %lx\n",
__FUNCTION__, spte, (u64)*gpte, access_bits, __FUNCTION__, spte, (u64)gpte, access_bits,
write_fault, user_fault, gfn); write_fault, user_fault, gfn);
if (write_fault && !dirty) { if (write_fault && !dirty) {
*gpte |= PT_DIRTY_MASK; pt_element_t *guest_ent, *tmp = NULL;
if (walker->ptep)
guest_ent = walker->ptep;
else {
tmp = kmap_atomic(walker->page, KM_USER0);
guest_ent = &tmp[walker->index];
}
*guest_ent |= PT_DIRTY_MASK;
if (!walker->ptep)
kunmap_atomic(tmp, KM_USER0);
dirty = 1; dirty = 1;
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
} }
spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
spte |= *gpte & PT64_NX_MASK; spte |= gpte & PT64_NX_MASK;
if (!dirty) if (!dirty)
access_bits &= ~PT_WRITABLE_MASK; access_bits &= ~PT_WRITABLE_MASK;
...@@ -255,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, ...@@ -255,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
access_bits &= ~PT_WRITABLE_MASK; access_bits &= ~PT_WRITABLE_MASK;
if (is_writeble_pte(spte)) { if (is_writeble_pte(spte)) {
spte &= ~PT_WRITABLE_MASK; spte &= ~PT_WRITABLE_MASK;
kvm_arch_ops->tlb_flush(vcpu); kvm_x86_ops->tlb_flush(vcpu);
} }
if (write_fault) if (write_fault)
*ptwrite = 1; *ptwrite = 1;
...@@ -273,13 +292,13 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, ...@@ -273,13 +292,13 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
rmap_add(vcpu, shadow_pte); rmap_add(vcpu, shadow_pte);
} }
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
u64 *shadow_pte, u64 access_bits, u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite, int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn) struct guest_walker *walker, gfn_t gfn)
{ {
access_bits &= *gpte; access_bits &= gpte;
FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
gpte, access_bits, user_fault, write_fault, gpte, access_bits, user_fault, write_fault,
ptwrite, walker, gfn); ptwrite, walker, gfn);
} }
...@@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
return; return;
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
0, NULL, NULL, 0, NULL, NULL,
(gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
} }
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
u64 *shadow_pte, u64 access_bits, u64 *shadow_pte, u64 access_bits,
int user_fault, int write_fault, int *ptwrite, int user_fault, int write_fault, int *ptwrite,
struct guest_walker *walker, gfn_t gfn) struct guest_walker *walker, gfn_t gfn)
{ {
gpa_t gaddr; gpa_t gaddr;
access_bits &= *gpde; access_bits &= gpde;
gaddr = (gpa_t)gfn << PAGE_SHIFT; gaddr = (gpa_t)gfn << PAGE_SHIFT;
if (PTTYPE == 32 && is_cpuid_PSE36()) if (PTTYPE == 32 && is_cpuid_PSE36())
gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
(32 - PT32_DIR_PSE36_SHIFT); (32 - PT32_DIR_PSE36_SHIFT);
FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
gpde, access_bits, user_fault, write_fault, gpde, access_bits, user_fault, write_fault,
...@@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
int level; int level;
u64 *shadow_ent; u64 *shadow_ent;
u64 *prev_shadow_ent = NULL; u64 *prev_shadow_ent = NULL;
pt_element_t *guest_ent = walker->ptep;
if (!is_present_pte(*guest_ent)) if (!is_present_pte(walker->pte))
return NULL; return NULL;
shadow_addr = vcpu->mmu.root_hpa; shadow_addr = vcpu->mmu.root_hpa;
...@@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (level - 1 == PT_PAGE_TABLE_LEVEL if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) { && walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1; metaphysical = 1;
hugepage_access = *guest_ent; hugepage_access = walker->pte;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
if (*guest_ent & PT64_NX_MASK) if (walker->pte & PT64_NX_MASK)
hugepage_access |= (1 << 2); hugepage_access |= (1 << 2);
hugepage_access >>= PT_WRITABLE_SHIFT; hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT; >> PAGE_SHIFT;
} else { } else {
metaphysical = 0; metaphysical = 0;
...@@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
} }
if (walker->level == PT_DIRECTORY_LEVEL) { if (walker->level == PT_DIRECTORY_LEVEL) {
FNAME(set_pde)(vcpu, guest_ent, shadow_ent, FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault, walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn); ptwrite, walker, walker->gfn);
} else { } else {
ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
FNAME(set_pte)(vcpu, guest_ent, shadow_ent, FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
walker->inherited_ar, user_fault, write_fault, walker->inherited_ar, user_fault, write_fault,
ptwrite, walker, walker->gfn); ptwrite, walker, walker->gfn);
} }
...@@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
if (!r) { if (!r) {
pgprintk("%s: guest page fault\n", __FUNCTION__); pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code); inject_page_fault(vcpu, addr, walker.error_code);
FNAME(release_walker)(&walker);
vcpu->last_pt_write_count = 0; /* reset fork detector */ vcpu->last_pt_write_count = 0; /* reset fork detector */
return 0; return 0;
} }
...@@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
shadow_pte, *shadow_pte, write_pt); shadow_pte, *shadow_pte, write_pt);
FNAME(release_walker)(&walker);
if (!write_pt) if (!write_pt)
vcpu->last_pt_write_count = 0; /* reset fork detector */ vcpu->last_pt_write_count = 0; /* reset fork detector */
...@@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) ...@@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
gpa |= vaddr & ~PAGE_MASK; gpa |= vaddr & ~PAGE_MASK;
} }
FNAME(release_walker)(&walker);
return gpa; return gpa;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -25,29 +25,36 @@ ...@@ -25,29 +25,36 @@
* *
*/ */
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 #define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080 #define CPU_BASED_HLT_EXITING 0x00000080
#define CPU_BASED_INVDPG_EXITING 0x00000200 #define CPU_BASED_INVLPG_EXITING 0x00000200
#define CPU_BASED_MWAIT_EXITING 0x00000400 #define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800 #define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000 #define CPU_BASED_RDTSC_EXITING 0x00001000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000 #define CPU_BASED_TPR_SHADOW 0x00200000
#define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000
#define CPU_BASED_MSR_BITMAPS 0x10000000 #define CPU_BASED_USE_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000 #define CPU_BASED_PAUSE_EXITING 0x40000000
#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
#define PIN_BASED_EXT_INTR_MASK 0x1 #define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x8 #define PIN_BASED_NMI_EXITING 0x00000008
#define PIN_BASED_VIRTUAL_NMIS 0x00000020
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
/* VMCS Encodings */ /* VMCS Encodings */
enum vmcs_field { enum vmcs_field {
...@@ -206,6 +213,7 @@ enum vmcs_field { ...@@ -206,6 +213,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36 #define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
/* /*
* Interruption-information format * Interruption-information format
...@@ -261,9 +269,6 @@ enum vmcs_field { ...@@ -261,9 +269,6 @@ enum vmcs_field {
/* segment AR */ /* segment AR */
#define SEGMENT_AR_L_MASK (1 << 13) #define SEGMENT_AR_L_MASK (1 << 13)
/* entry controls */
#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9)
#define AR_TYPE_ACCESSES_MASK 1 #define AR_TYPE_ACCESSES_MASK 1
#define AR_TYPE_READABLE_MASK (1 << 1) #define AR_TYPE_READABLE_MASK (1 << 1)
#define AR_TYPE_WRITEABLE_MASK (1 << 2) #define AR_TYPE_WRITEABLE_MASK (1 << 2)
...@@ -285,13 +290,21 @@ enum vmcs_field { ...@@ -285,13 +290,21 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00 #define AR_RESERVD_MASK 0xfffe0f00
#define CR4_VMXE 0x2000 #define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_VMX_PINBASED_CTLS 0x481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
#define MSR_IA32_VMX_EXIT_CTLS 0x483
#define MSR_IA32_VMX_ENTRY_CTLS 0x484
#define MSR_IA32_VMX_MISC 0x485
#define MSR_IA32_VMX_CR0_FIXED0 0x486
#define MSR_IA32_VMX_CR0_FIXED1 0x487
#define MSR_IA32_VMX_CR4_FIXED0 0x488
#define MSR_IA32_VMX_CR4_FIXED1 0x489
#define MSR_IA32_VMX_VMCS_ENUM 0x48a
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
#define MSR_IA32_VMX_BASIC 0x480 #define MSR_IA32_FEATURE_CONTROL 0x3a
#define MSR_IA32_FEATURE_CONTROL 0x03a #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
#define MSR_IA32_VMX_PINBASED_CTLS 0x481 #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
#define MSR_IA32_VMX_EXIT_CTLS 0x483
#define MSR_IA32_VMX_ENTRY_CTLS 0x484
#endif #endif
This diff is collapsed.
...@@ -60,7 +60,7 @@ struct x86_emulate_ops { ...@@ -60,7 +60,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to read from memory. * @bytes: [IN ] Number of bytes to read from memory.
*/ */
int (*read_std)(unsigned long addr, void *val, int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt); unsigned int bytes, struct kvm_vcpu *vcpu);
/* /*
* write_std: Write bytes of standard (non-emulated/special) memory. * write_std: Write bytes of standard (non-emulated/special) memory.
...@@ -71,7 +71,7 @@ struct x86_emulate_ops { ...@@ -71,7 +71,7 @@ struct x86_emulate_ops {
* @bytes: [IN ] Number of bytes to write to memory. * @bytes: [IN ] Number of bytes to write to memory.
*/ */
int (*write_std)(unsigned long addr, const void *val, int (*write_std)(unsigned long addr, const void *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt); unsigned int bytes, struct kvm_vcpu *vcpu);
/* /*
* read_emulated: Read bytes from emulated/special memory area. * read_emulated: Read bytes from emulated/special memory area.
...@@ -82,7 +82,7 @@ struct x86_emulate_ops { ...@@ -82,7 +82,7 @@ struct x86_emulate_ops {
int (*read_emulated) (unsigned long addr, int (*read_emulated) (unsigned long addr,
void *val, void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct kvm_vcpu *vcpu);
/* /*
* write_emulated: Read bytes from emulated/special memory area. * write_emulated: Read bytes from emulated/special memory area.
...@@ -94,7 +94,7 @@ struct x86_emulate_ops { ...@@ -94,7 +94,7 @@ struct x86_emulate_ops {
int (*write_emulated) (unsigned long addr, int (*write_emulated) (unsigned long addr,
const void *val, const void *val,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct kvm_vcpu *vcpu);
/* /*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
...@@ -108,12 +108,10 @@ struct x86_emulate_ops { ...@@ -108,12 +108,10 @@ struct x86_emulate_ops {
const void *old, const void *old,
const void *new, const void *new,
unsigned int bytes, unsigned int bytes,
struct x86_emulate_ctxt * ctxt); struct kvm_vcpu *vcpu);
}; };
struct cpu_user_regs;
struct x86_emulate_ctxt { struct x86_emulate_ctxt {
/* Register state before/after emulation. */ /* Register state before/after emulation. */
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
...@@ -154,12 +152,4 @@ struct x86_emulate_ctxt { ...@@ -154,12 +152,4 @@ struct x86_emulate_ctxt {
int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops); struct x86_emulate_ops *ops);
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
void *decode_register(u8 modrm_reg, unsigned long *regs,
int highbyte_regs);
#endif /* __X86_EMULATE_H__ */ #endif /* __X86_EMULATE_H__ */
...@@ -11,8 +11,6 @@ ...@@ -11,8 +11,6 @@
* Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
*/ */
#ifdef CONFIG_X86_IO_APIC
/* /*
* The structure of the IO-APIC: * The structure of the IO-APIC:
*/ */
...@@ -55,12 +53,6 @@ union IO_APIC_reg_03 { ...@@ -55,12 +53,6 @@ union IO_APIC_reg_03 {
} __attribute__ ((packed)) bits; } __attribute__ ((packed)) bits;
}; };
/*
* # of IO-APICs and # of IRQ routing registers
*/
extern int nr_ioapics;
extern int nr_ioapic_registers[MAX_IO_APICS];
enum ioapic_irq_destination_types { enum ioapic_irq_destination_types {
dest_Fixed = 0, dest_Fixed = 0,
dest_LowestPrio = 1, dest_LowestPrio = 1,
...@@ -100,6 +92,14 @@ struct IO_APIC_route_entry { ...@@ -100,6 +92,14 @@ struct IO_APIC_route_entry {
} __attribute__ ((packed)); } __attribute__ ((packed));
#ifdef CONFIG_X86_IO_APIC
/*
* # of IO-APICs and # of IRQ routing registers
*/
extern int nr_ioapics;
extern int nr_ioapic_registers[MAX_IO_APICS];
/* /*
* MP-BIOS irq configuration table structures: * MP-BIOS irq configuration table structures:
*/ */
......
...@@ -63,7 +63,7 @@ ...@@ -63,7 +63,7 @@
/* /*
* x86-64 Task Priority Register, CR8 * x86-64 Task Priority Register, CR8
*/ */
#define X86_CR8_TPR 0x00000007 /* task priority register */ #define X86_CR8_TPR 0x0000000F /* task priority register */
/* /*
* AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
......
...@@ -4,8 +4,7 @@ ...@@ -4,8 +4,7 @@
/* /*
* Userspace interface for /dev/kvm - kernel based virtual machine * Userspace interface for /dev/kvm - kernel based virtual machine
* *
* Note: this interface is considered experimental and may change without * Note: you must update KVM_API_VERSION if you change this interface.
* notice.
*/ */
#include <asm/types.h> #include <asm/types.h>
...@@ -13,14 +12,8 @@ ...@@ -13,14 +12,8 @@
#define KVM_API_VERSION 12 #define KVM_API_VERSION 12
/* /* Architectural interrupt line count. */
* Architectural interrupt line count, and the size of the bitmap needed
* to hold them.
*/
#define KVM_NR_INTERRUPTS 256 #define KVM_NR_INTERRUPTS 256
#define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8)
#define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
/* for KVM_CREATE_MEMORY_REGION */ /* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region { struct kvm_memory_region {
...@@ -41,20 +34,89 @@ struct kvm_memory_alias { ...@@ -41,20 +34,89 @@ struct kvm_memory_alias {
__u64 target_phys_addr; __u64 target_phys_addr;
}; };
enum kvm_exit_reason { /* for KVM_IRQ_LINE */
KVM_EXIT_UNKNOWN = 0, struct kvm_irq_level {
KVM_EXIT_EXCEPTION = 1, /*
KVM_EXIT_IO = 2, * ACPI gsi notion of irq.
KVM_EXIT_HYPERCALL = 3, * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
KVM_EXIT_DEBUG = 4, * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
KVM_EXIT_HLT = 5, */
KVM_EXIT_MMIO = 6, __u32 irq;
KVM_EXIT_IRQ_WINDOW_OPEN = 7, __u32 level;
KVM_EXIT_SHUTDOWN = 8, };
KVM_EXIT_FAIL_ENTRY = 9,
KVM_EXIT_INTR = 10, /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
struct kvm_pic_state {
__u8 last_irr; /* edge detection */
__u8 irr; /* interrupt request register */
__u8 imr; /* interrupt mask register */
__u8 isr; /* interrupt service register */
__u8 priority_add; /* highest irq priority */
__u8 irq_base;
__u8 read_reg_select;
__u8 poll;
__u8 special_mask;
__u8 init_state;
__u8 auto_eoi;
__u8 rotate_on_auto_eoi;
__u8 special_fully_nested_mode;
__u8 init4; /* true if 4 byte init */
__u8 elcr; /* PIIX edge/trigger selection */
__u8 elcr_mask;
};
#define KVM_IOAPIC_NUM_PINS 24
struct kvm_ioapic_state {
__u64 base_address;
__u32 ioregsel;
__u32 id;
__u32 irr;
__u32 pad;
union {
__u64 bits;
struct {
__u8 vector;
__u8 delivery_mode:3;
__u8 dest_mode:1;
__u8 delivery_status:1;
__u8 polarity:1;
__u8 remote_irr:1;
__u8 trig_mode:1;
__u8 mask:1;
__u8 reserve:7;
__u8 reserved[4];
__u8 dest_id;
} fields;
} redirtbl[KVM_IOAPIC_NUM_PINS];
}; };
#define KVM_IRQCHIP_PIC_MASTER 0
#define KVM_IRQCHIP_PIC_SLAVE 1
#define KVM_IRQCHIP_IOAPIC 2
struct kvm_irqchip {
__u32 chip_id;
__u32 pad;
union {
char dummy[512]; /* reserving space */
struct kvm_pic_state pic;
struct kvm_ioapic_state ioapic;
} chip;
};
#define KVM_EXIT_UNKNOWN 0
#define KVM_EXIT_EXCEPTION 1
#define KVM_EXIT_IO 2
#define KVM_EXIT_HYPERCALL 3
#define KVM_EXIT_DEBUG 4
#define KVM_EXIT_HLT 5
#define KVM_EXIT_MMIO 6
#define KVM_EXIT_IRQ_WINDOW_OPEN 7
#define KVM_EXIT_SHUTDOWN 8
#define KVM_EXIT_FAIL_ENTRY 9
#define KVM_EXIT_INTR 10
#define KVM_EXIT_SET_TPR 11
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run { struct kvm_run {
/* in */ /* in */
...@@ -106,11 +168,14 @@ struct kvm_run { ...@@ -106,11 +168,14 @@ struct kvm_run {
} mmio; } mmio;
/* KVM_EXIT_HYPERCALL */ /* KVM_EXIT_HYPERCALL */
struct { struct {
__u64 nr;
__u64 args[6]; __u64 args[6];
__u64 ret; __u64 ret;
__u32 longmode; __u32 longmode;
__u32 pad; __u32 pad;
} hypercall; } hypercall;
/* Fix the size of the union. */
char padding[256];
}; };
}; };
...@@ -139,6 +204,12 @@ struct kvm_fpu { ...@@ -139,6 +204,12 @@ struct kvm_fpu {
__u32 pad2; __u32 pad2;
}; };
/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
#define KVM_APIC_REG_SIZE 0x400
struct kvm_lapic_state {
char regs[KVM_APIC_REG_SIZE];
};
struct kvm_segment { struct kvm_segment {
__u64 base; __u64 base;
__u32 limit; __u32 limit;
...@@ -164,7 +235,7 @@ struct kvm_sregs { ...@@ -164,7 +235,7 @@ struct kvm_sregs {
__u64 cr0, cr2, cr3, cr4, cr8; __u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer; __u64 efer;
__u64 apic_base; __u64 apic_base;
__u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)]; __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
}; };
struct kvm_msr_entry { struct kvm_msr_entry {
...@@ -271,6 +342,12 @@ struct kvm_signal_mask { ...@@ -271,6 +342,12 @@ struct kvm_signal_mask {
*/ */
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
/*
* Extension capability list.
*/
#define KVM_CAP_IRQCHIP 0
#define KVM_CAP_HLT 1
/* /*
* ioctls for VM fds * ioctls for VM fds
*/ */
...@@ -282,6 +359,11 @@ struct kvm_signal_mask { ...@@ -282,6 +359,11 @@ struct kvm_signal_mask {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/* Device model IOC */
#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
/* /*
* ioctls for vcpu fds * ioctls for vcpu fds
...@@ -300,5 +382,7 @@ struct kvm_signal_mask { ...@@ -300,5 +382,7 @@ struct kvm_signal_mask {
#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state)
#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment