Commit f4b10bc6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.40' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (131 commits)
  KVM: MMU: Use ptep_user for cmpxchg_gpte()
  KVM: Fix kvm mmu_notifier initialization order
  KVM: Add documentation for KVM_CAP_NR_VCPUS
  KVM: make guest mode entry to be rcu quiescent state
  KVM: x86 emulator: Make jmp far emulation into a separate function
  KVM: x86 emulator: Rename emulate_grpX() to em_grpX()
  KVM: x86 emulator: Remove unused arg from emulate_pop()
  KVM: x86 emulator: Remove unused arg from writeback()
  KVM: x86 emulator: Remove unused arg from read_descriptor()
  KVM: x86 emulator: Remove unused arg from seg_override()
  KVM: Validate userspace_addr of memslot when registered
  KVM: MMU: Clean up gpte reading with copy_from_user()
  KVM: PPC: booke: add sregs support
  KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0)
  KVM: PPC: use ticks, not usecs, for exit timing
  KVM: PPC: fix exit accounting for SPRs, tlbwe, tlbsx
  KVM: PPC: e500: emulate SVR
  KVM: VMX: Cache vmcs segment fields
  KVM: x86 emulator: consolidate segment accessors
  KVM: VMX: Avoid reading %rip unnecessarily when handling exceptions
  ...
parents 53ee7569 c8cfbb55
......@@ -175,7 +175,10 @@ Parameters: vcpu id (apic id on x86)
Returns: vcpu fd on success, -1 on error
This API adds a vcpu to a virtual machine. The vcpu id is a small integer
in the range [0, max_vcpus).
in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the
KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
cpus max.
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
......@@ -261,7 +264,7 @@ See KVM_GET_REGS for the data structure.
4.13 KVM_GET_SREGS
Capability: basic
Architectures: x86
Architectures: x86, ppc
Type: vcpu ioctl
Parameters: struct kvm_sregs (out)
Returns: 0 on success, -1 on error
......@@ -279,6 +282,8 @@ struct kvm_sregs {
__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
};
/* ppc -- see arch/powerpc/include/asm/kvm.h */
interrupt_bitmap is a bitmap of pending external interrupts. At most
one bit may be set. This interrupt has been acknowledged by the APIC
but not yet injected into the cpu core.
......@@ -286,7 +291,7 @@ but not yet injected into the cpu core.
4.14 KVM_SET_SREGS
Capability: basic
Architectures: x86
Architectures: x86, ppc
Type: vcpu ioctl
Parameters: struct kvm_sregs (in)
Returns: 0 on success, -1 on error
......@@ -1263,6 +1268,29 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
4.54 KVM_SET_TSC_KHZ
Capability: KVM_CAP_TSC_CONTROL
Architectures: x86
Type: vcpu ioctl
Parameters: virtual tsc_khz
Returns: 0 on success, -1 on error
Specifies the tsc frequency for the virtual machine. The unit of the
frequency is KHz.
4.55 KVM_GET_TSC_KHZ
Capability: KVM_CAP_GET_TSC_KHZ
Architectures: x86
Type: vcpu ioctl
Parameters: none
Returns: virtual tsc-khz on success, negative value on error
Returns the tsc frequency of the guest. The unit of the return value is
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
error.
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
......
......@@ -83,13 +83,13 @@
union vac {
unsigned long value;
struct {
int a_int:1;
int a_from_int_cr:1;
int a_to_int_cr:1;
int a_from_psr:1;
int a_from_cpuid:1;
int a_cover:1;
int a_bsw:1;
unsigned int a_int:1;
unsigned int a_from_int_cr:1;
unsigned int a_to_int_cr:1;
unsigned int a_from_psr:1;
unsigned int a_from_cpuid:1;
unsigned int a_cover:1;
unsigned int a_bsw:1;
long reserved:57;
};
};
......@@ -97,12 +97,12 @@ union vac {
union vdc {
unsigned long value;
struct {
int d_vmsw:1;
int d_extint:1;
int d_ibr_dbr:1;
int d_pmc:1;
int d_to_pmd:1;
int d_itm:1;
unsigned int d_vmsw:1;
unsigned int d_extint:1;
unsigned int d_ibr_dbr:1;
unsigned int d_pmc:1;
unsigned int d_to_pmd:1;
unsigned int d_itm:1;
long reserved:58;
};
};
......
......@@ -45,6 +45,114 @@ struct kvm_regs {
__u64 gpr[32];
};
#define KVM_SREGS_E_IMPL_NONE 0
#define KVM_SREGS_E_IMPL_FSL 1
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
* corresponding to unset feature bits will not be modified. This allows
* restoring a checkpoint made without that feature, while keeping the
* default values of the new registers.
*
* KVM_SREGS_E_BASE contains:
* CSRR0/1 (refers to SRR2/3 on 40x)
* ESR
* DEAR
* MCSR
* TSR
* TCR
* DEC
* TB
* VRSAVE (USPRG0)
*/
#define KVM_SREGS_E_BASE (1 << 0)
/*
* KVM_SREGS_E_ARCH206 contains:
*
* PIR
* MCSRR0/1
* DECAR
* IVPR
*/
#define KVM_SREGS_E_ARCH206 (1 << 1)
/*
* Contains EPCR, plus the upper half of 64-bit registers
* that are 32-bit on 32-bit implementations.
*/
#define KVM_SREGS_E_64 (1 << 2)
#define KVM_SREGS_E_SPRG8 (1 << 3)
#define KVM_SREGS_E_MCIVPR (1 << 4)
/*
* IVORs are used -- contains IVOR0-15, plus additional IVORs
* in combination with an appropriate feature bit.
*/
#define KVM_SREGS_E_IVOR (1 << 5)
/*
* Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
* Also TLBnPS if MMUCFG[MAVN] = 1.
*/
#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
/* DBSR, DBCR, IAC, DAC, DVC */
#define KVM_SREGS_E_DEBUG (1 << 7)
/* Enhanced debug -- DSRR0/1, SPRG9 */
#define KVM_SREGS_E_ED (1 << 8)
/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_SPE (1 << 9)
/* External Proxy (EXP) -- EPR */
#define KVM_SREGS_EXP (1 << 10)
/* External PID (E.PD) -- EPSC/EPLC */
#define KVM_SREGS_E_PD (1 << 11)
/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PC (1 << 12)
/* Page table (E.PT) -- EPTCFG */
#define KVM_SREGS_E_PT (1 << 13)
/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PM (1 << 14)
/*
* Special updates:
*
* Some registers may change even while a vcpu is not running.
* To avoid losing these changes, by default these registers are
* not updated by KVM_SET_SREGS. To force an update, set the bit
* in u.e.update_special corresponding to the register to be updated.
*
* The update_special field is zero on return from KVM_GET_SREGS.
*
* When restoring a checkpoint, the caller can set update_special
* to 0xffffffff to ensure that everything is restored, even new features
* that the caller doesn't know about.
*/
#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
/*
* In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
* previous KVM_GET_REGS.
*
* Unless otherwise indicated, setting any register with KVM_SET_SREGS
* directly sets its value. It does not trigger any special semantics such
* as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
* just received from KVM_GET_SREGS is always a no-op.
*/
struct kvm_sregs {
__u32 pvr;
union {
......@@ -62,6 +170,82 @@ struct kvm_sregs {
__u64 dbat[8];
} ppc32;
} s;
struct {
union {
struct { /* KVM_SREGS_E_IMPL_FSL */
__u32 features; /* KVM_SREGS_E_FSL_ */
__u32 svr;
__u64 mcar;
__u32 hid0;
/* KVM_SREGS_E_FSL_PIDn */
__u32 pid1, pid2;
} fsl;
__u8 pad[256];
} impl;
__u32 features; /* KVM_SREGS_E_ */
__u32 impl_id; /* KVM_SREGS_E_IMPL_ */
__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
__u32 pir; /* read-only */
__u64 sprg8;
__u64 sprg9; /* E.ED */
__u64 csrr0;
__u64 dsrr0; /* E.ED */
__u64 mcsrr0;
__u32 csrr1;
__u32 dsrr1; /* E.ED */
__u32 mcsrr1;
__u32 esr;
__u64 dear;
__u64 ivpr;
__u64 mcivpr;
__u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
__u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
__u32 tcr;
__u32 decar;
__u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
/*
* Userspace can read TB directly, but the
* value reported here is consistent with "dec".
*
* Read-only.
*/
__u64 tb;
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
__u8 num_iac; /* read-only */
__u8 num_dac; /* read-only */
__u8 num_dvc; /* read-only */
__u8 pad;
__u32 epr; /* EXP */
__u32 vrsave; /* a.k.a. USPRG0 */
__u32 epcr; /* KVM_SREGS_E_64 */
__u32 mas0;
__u32 mas1;
__u64 mas2;
__u64 mas7_3;
__u32 mas4;
__u32 mas6;
__u32 ivor_low[16]; /* IVOR0-15 */
__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
__u32 mmucfg; /* read-only */
__u32 eptcfg; /* E.PT, read-only */
__u32 tlbcfg[4];/* read-only */
__u32 tlbps[4]; /* read-only */
__u32 eplc, epsc; /* E.PD */
} e;
__u8 pad[1020];
} u;
};
......
......@@ -61,7 +61,6 @@ static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
......
......@@ -43,6 +43,7 @@ struct kvmppc_vcpu_e500 {
u32 host_pid[E500_PID_NUM];
u32 pid[E500_PID_NUM];
u32 svr;
u32 mas0;
u32 mas1;
......@@ -58,6 +59,7 @@ struct kvmppc_vcpu_e500 {
u32 hid1;
u32 tlb0cfg;
u32 tlb1cfg;
u64 mcar;
struct kvm_vcpu vcpu;
};
......
......@@ -223,6 +223,7 @@ struct kvm_vcpu_arch {
ulong hflags;
ulong guest_owned_ext;
#endif
u32 vrsave; /* also USPRG0 */
u32 mmucr;
ulong sprg4;
ulong sprg5;
......@@ -232,6 +233,9 @@ struct kvm_vcpu_arch {
ulong csrr1;
ulong dsrr0;
ulong dsrr1;
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
ulong esr;
u32 dec;
u32 decar;
......@@ -255,6 +259,7 @@ struct kvm_vcpu_arch {
u32 dbsr;
#ifdef CONFIG_KVM_EXIT_TIMING
struct mutex exit_timing_lock;
struct kvmppc_exit_timing timing_exit;
struct kvmppc_exit_timing timing_last_enter;
u32 last_exit_type;
......
......@@ -61,6 +61,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
/* Core-specific hooks */
......@@ -142,4 +143,12 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
return r;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
#endif /* __POWERPC_KVM_PPC_H__ */
......@@ -396,6 +396,7 @@ int main(void)
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
......
......@@ -107,6 +107,16 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
kvmppc_get_sregs_ivor(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_44x *vcpu_44x;
......
......@@ -158,7 +158,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
return emulated;
}
......@@ -179,7 +178,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
return emulated;
}
......@@ -569,6 +569,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.shared->srr0 = regs->srr0;
vcpu->arch.shared->srr1 = regs->srr1;
kvmppc_set_pid(vcpu, regs->pid);
vcpu->arch.shared->sprg0 = regs->sprg0;
vcpu->arch.shared->sprg1 = regs->sprg1;
vcpu->arch.shared->sprg2 = regs->sprg2;
......@@ -584,16 +585,165 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
static void get_sregs_base(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
u64 tb = get_tb();
sregs->u.e.features |= KVM_SREGS_E_BASE;
sregs->u.e.csrr0 = vcpu->arch.csrr0;
sregs->u.e.csrr1 = vcpu->arch.csrr1;
sregs->u.e.mcsr = vcpu->arch.mcsr;
sregs->u.e.esr = vcpu->arch.esr;
sregs->u.e.dear = vcpu->arch.shared->dar;
sregs->u.e.tsr = vcpu->arch.tsr;
sregs->u.e.tcr = vcpu->arch.tcr;
sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
sregs->u.e.tb = tb;
sregs->u.e.vrsave = vcpu->arch.vrsave;
}
static int set_sregs_base(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_BASE))
return 0;
vcpu->arch.csrr0 = sregs->u.e.csrr0;
vcpu->arch.csrr1 = sregs->u.e.csrr1;
vcpu->arch.mcsr = sregs->u.e.mcsr;
vcpu->arch.esr = sregs->u.e.esr;
vcpu->arch.shared->dar = sregs->u.e.dear;
vcpu->arch.vrsave = sregs->u.e.vrsave;
vcpu->arch.tcr = sregs->u.e.tcr;
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
vcpu->arch.dec = sregs->u.e.dec;
kvmppc_emulate_dec(vcpu);
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
/*
* FIXME: existing KVM timer handling is incomplete.
* TSR cannot be read by the guest, and its value in
* vcpu->arch is always zero. For now, just handle
* the case where the caller is trying to inject a
* decrementer interrupt.
*/
if ((sregs->u.e.tsr & TSR_DIS) &&
(vcpu->arch.tcr & TCR_DIE))
kvmppc_core_queue_dec(vcpu);
}
return 0;
}
static void get_sregs_arch206(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
sregs->u.e.features |= KVM_SREGS_E_ARCH206;
sregs->u.e.pir = 0;
sregs->u.e.mcsrr0 = vcpu->arch.mcsrr0;
sregs->u.e.mcsrr1 = vcpu->arch.mcsrr1;
sregs->u.e.decar = vcpu->arch.decar;
sregs->u.e.ivpr = vcpu->arch.ivpr;
}
static int set_sregs_arch206(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_ARCH206))
return 0;
if (sregs->u.e.pir != 0)
return -EINVAL;
vcpu->arch.mcsrr0 = sregs->u.e.mcsrr0;
vcpu->arch.mcsrr1 = sregs->u.e.mcsrr1;
vcpu->arch.decar = sregs->u.e.decar;
vcpu->arch.ivpr = sregs->u.e.ivpr;
return 0;
}
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
sregs->u.e.features |= KVM_SREGS_E_IVOR;
sregs->u.e.ivor_low[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
sregs->u.e.ivor_low[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
sregs->u.e.ivor_low[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
sregs->u.e.ivor_low[3] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
sregs->u.e.ivor_low[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
sregs->u.e.ivor_low[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
sregs->u.e.ivor_low[6] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
sregs->u.e.ivor_low[7] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
sregs->u.e.ivor_low[8] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
sregs->u.e.ivor_low[9] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
sregs->u.e.ivor_low[10] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
sregs->u.e.ivor_low[11] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
sregs->u.e.ivor_low[12] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
}
int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = sregs->u.e.ivor_low[0];
vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = sregs->u.e.ivor_low[1];
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = sregs->u.e.ivor_low[2];
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = sregs->u.e.ivor_low[3];
vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = sregs->u.e.ivor_low[4];
vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = sregs->u.e.ivor_low[5];
vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = sregs->u.e.ivor_low[6];
vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = sregs->u.e.ivor_low[7];
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = sregs->u.e.ivor_low[8];
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = sregs->u.e.ivor_low[9];
vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = sregs->u.e.ivor_low[10];
vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = sregs->u.e.ivor_low[11];
vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = sregs->u.e.ivor_low[12];
vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = sregs->u.e.ivor_low[13];
vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = sregs->u.e.ivor_low[14];
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = sregs->u.e.ivor_low[15];
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
return -ENOTSUPP;
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
kvmppc_core_get_sregs(vcpu, sregs);
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
return -ENOTSUPP;
int ret;
if (vcpu->arch.pvr != sregs->pvr)
return -EINVAL;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
return ret;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
return ret;
return kvmppc_core_set_sregs(vcpu, sregs);
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
......
......@@ -380,7 +380,6 @@ lightweight_exit:
* because host interrupt handlers would get confused. */
lwz r1, VCPU_GPR(r1)(r4)
/* XXX handle USPRG0 */
/* Host interrupt handlers may have clobbered these guest-readable
* SPRGs, so we need to reload them here with the guest's values. */
lwz r3, VCPU_SPRG4(r4)
......
......@@ -63,6 +63,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
/* Registers init */
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
......@@ -96,6 +97,81 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_SPE |
KVM_SREGS_E_PM;
sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL;
sregs->u.e.impl.fsl.features = 0;
sregs->u.e.impl.fsl.svr = vcpu_e500->svr;
sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0;
sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar;
sregs->u.e.mas0 = vcpu_e500->mas0;
sregs->u.e.mas1 = vcpu_e500->mas1;
sregs->u.e.mas2 = vcpu_e500->mas2;
sregs->u.e.mas7_3 = ((u64)vcpu_e500->mas7 << 32) | vcpu_e500->mas3;
sregs->u.e.mas4 = vcpu_e500->mas4;
sregs->u.e.mas6 = vcpu_e500->mas6;
sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG);
sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg;
sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg;
sregs->u.e.tlbcfg[2] = 0;
sregs->u.e.tlbcfg[3] = 0;
sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
sregs->u.e.ivor_high[3] =
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
kvmppc_get_sregs_ivor(vcpu, sregs);
}
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
vcpu_e500->svr = sregs->u.e.impl.fsl.svr;
vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0;
vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar;
}
if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) {
vcpu_e500->mas0 = sregs->u.e.mas0;
vcpu_e500->mas1 = sregs->u.e.mas1;
vcpu_e500->mas2 = sregs->u.e.mas2;
vcpu_e500->mas7 = sregs->u.e.mas7_3 >> 32;
vcpu_e500->mas3 = (u32)sregs->u.e.mas7_3;
vcpu_e500->mas4 = sregs->u.e.mas4;
vcpu_e500->mas6 = sregs->u.e.mas6;
}
if (!(sregs->u.e.features & KVM_SREGS_E_IVOR))
return 0;
if (sregs->u.e.features & KVM_SREGS_E_SPE) {
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] =
sregs->u.e.ivor_high[0];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] =
sregs->u.e.ivor_high[1];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] =
sregs->u.e.ivor_high[2];
}
if (sregs->u.e.features & KVM_SREGS_E_PM) {
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] =
sregs->u.e.ivor_high[3];
}
return kvmppc_set_sregs_ivor(vcpu, sregs);
}
struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
......
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, <yu.liu@freescale.com>
*
......@@ -78,8 +78,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_PID:
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
vcpu->arch.pid = spr_val;
kvmppc_set_pid(vcpu, spr_val);
break;
case SPRN_PID1:
vcpu_e500->pid[1] = spr_val; break;
......@@ -175,6 +174,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
case SPRN_HID1:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
case SPRN_SVR:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break;
case SPRN_MMUCSR0:
kvmppc_set_gpr(vcpu, rt, 0); break;
......
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Yu Liu, yu.liu@freescale.com
*
......@@ -24,6 +24,7 @@
#include "../mm/mmu_decl.h"
#include "e500_tlb.h"
#include "trace.h"
#include "timing.h"
#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
......@@ -506,6 +507,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
vcpu_e500->mas7 = 0;
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
}
......@@ -571,6 +573,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
write_host_tlbe(vcpu_e500, stlbsel, sesel);
}
kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
return EMULATE_DONE;
}
......@@ -672,6 +675,14 @@ int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
return -1;
}
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
vcpu->arch.pid = pid;
}
void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct tlbe *tlbe;
......
......@@ -114,6 +114,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
{
u64 jd = tb - vcpu->arch.dec_jiffies;
return vcpu->arch.dec - jd;
}
/* XXX to do:
* lhax
* lhaux
......@@ -279,11 +285,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_DEC:
{
u64 jd = get_tb() - vcpu->arch.dec_jiffies;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
pr_debug("mfDEC: %x - %llx = %lx\n",
vcpu->arch.dec, jd,
kvmppc_get_gpr(vcpu, rt));
kvmppc_set_gpr(vcpu, rt,
kvmppc_get_dec(vcpu, get_tb()));
break;
}
default:
......@@ -294,6 +297,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
break;
case OP_31_XOP_STHX:
......@@ -363,6 +367,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
printk("mtspr: unknown spr %x\n", sprn);
break;
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
break;
case OP_31_XOP_DCBI:
......
......@@ -175,7 +175,11 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_SREGS:
#else
case KVM_CAP_PPC_SEGSTATE:
#endif
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
......@@ -284,6 +288,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
return 0;
}
......@@ -294,12 +302,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
/*
* vrsave (formerly usprg0) isn't used by Linux, but may
* be used by the guest.
*
* On non-booke this is associated with Altivec and
* is handled by code in book3s.c.
*/
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvmppc_core_vcpu_put(vcpu);
#ifdef CONFIG_BOOKE
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
......
......@@ -34,8 +34,8 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
{
int i;
/* pause guest execution to avoid concurrent updates */
mutex_lock(&vcpu->mutex);
/* Take a lock to avoid concurrent updates */
mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.last_exit_type = 0xDEAD;
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
......@@ -49,7 +49,7 @@ void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
vcpu->arch.timing_exit.tv64 = 0;
vcpu->arch.timing_last_enter.tv64 = 0;
mutex_unlock(&vcpu->mutex);
mutex_unlock(&vcpu->arch.exit_timing_lock);
}
static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
......@@ -65,6 +65,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
return;
}
mutex_lock(&vcpu->arch.exit_timing_lock);
vcpu->arch.timing_count_type[type]++;
/* sum */
......@@ -93,6 +95,8 @@ static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
vcpu->arch.timing_min_duration[type] = duration;
if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
vcpu->arch.timing_max_duration[type] = duration;
mutex_unlock(&vcpu->arch.exit_timing_lock);
}
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
......@@ -147,17 +151,30 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
{
struct kvm_vcpu *vcpu = m->private;
int i;
u64 min, max, sum, sum_quad;
seq_printf(m, "%s", "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
min = vcpu->arch.timing_min_duration[i];
do_div(min, tb_ticks_per_usec);
max = vcpu->arch.timing_max_duration[i];
do_div(max, tb_ticks_per_usec);
sum = vcpu->arch.timing_sum_duration[i];
do_div(sum, tb_ticks_per_usec);
sum_quad = vcpu->arch.timing_sum_quad_duration[i];
do_div(sum_quad, tb_ticks_per_usec);
seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
kvm_exit_names[i],
vcpu->arch.timing_count_type[i],
vcpu->arch.timing_min_duration[i],
vcpu->arch.timing_max_duration[i],
vcpu->arch.timing_sum_duration[i],
vcpu->arch.timing_sum_quad_duration[i]);
min,
max,
sum,
sum_quad);
}
return 0;
}
......
This diff is collapsed.
......@@ -30,14 +30,30 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_MMIO_SIZE 16
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXSAVE \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
#define INVALID_PAGE (~(hpa_t)0)
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
......@@ -118,6 +134,9 @@ enum kvm_reg {
enum kvm_reg_ex {
VCPU_EXREG_PDPTR = NR_VCPU_REGS,
VCPU_EXREG_CR3,
VCPU_EXREG_RFLAGS,
VCPU_EXREG_CPL,
VCPU_EXREG_SEGMENTS,
};
enum {
......@@ -256,7 +275,7 @@ struct kvm_mmu {
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte, unsigned long mmu_seq);
u64 *spte, const void *pte);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
......@@ -340,7 +359,6 @@ struct kvm_vcpu_arch {
struct fpu guest_fpu;
u64 xcr0;
gva_t mmio_fault_cr2;
struct kvm_pio_request pio;
void *pio_data;
......@@ -367,18 +385,22 @@ struct kvm_vcpu_arch {
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
bool emulate_regs_need_sync_to_vcpu;
bool emulate_regs_need_sync_from_vcpu;
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
unsigned int time_offset;
struct page *time_page;
u64 last_host_tsc;
u64 last_guest_tsc;
u64 last_kernel_ns;
u64 last_tsc_nsec;
u64 last_tsc_write;
u32 virtual_tsc_khz;
bool tsc_catchup;
u32 tsc_catchup_mult;
s8 tsc_catchup_shift;
bool nmi_pending;
bool nmi_injected;
......@@ -448,9 +470,6 @@ struct kvm_arch {
u64 last_tsc_nsec;
u64 last_tsc_offset;
u64 last_tsc_write;
u32 virtual_tsc_khz;
u32 virtual_tsc_mult;
s8 virtual_tsc_shift;
struct kvm_xen_hvm_config xen_hvm_config;
......@@ -502,6 +521,8 @@ struct kvm_vcpu_stat {
u32 nmi_injections;
};
struct x86_instruction_info;
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
......@@ -586,9 +607,17 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
const struct trace_print_flags *exit_reasons_str;
};
......@@ -627,6 +656,13 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
/* control of guest tsc rate supported? */
extern bool kvm_has_tsc_control;
/* minimum supported tsc_khz for guests */
extern u32 kvm_min_guest_tsc_khz;
/* maximum supported tsc_khz for guests */
extern u32 kvm_max_guest_tsc_khz;
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
......@@ -645,9 +681,6 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void kvm_enable_efer_bits(u64);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
......@@ -657,8 +690,6 @@ struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
......@@ -721,8 +752,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
......
......@@ -118,6 +118,7 @@
complete list. */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
......
This diff is collapsed.
......@@ -33,7 +33,6 @@ struct kvm_kpit_state {
};
struct kvm_pit {
unsigned long base_addresss;
struct kvm_io_device dev;
struct kvm_io_device speaker_dev;
struct kvm *kvm;
......@@ -51,7 +50,6 @@ struct kvm_pit {
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
#define KVM_PIT_CHANNEL_MASK 0x3
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
void kvm_free_pit(struct kvm *kvm);
......
......@@ -75,7 +75,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_destroy_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
{
......@@ -100,7 +99,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
int pit_has_pending_timer(struct kvm_vcpu *vcpu);
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
#endif
......@@ -1206,7 +1206,7 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
static void nonpaging_update_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
const void *pte, unsigned long mmu_seq)
const void *pte)
{
WARN_ON(1);
}
......@@ -3163,9 +3163,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
}
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *spte,
const void *new, unsigned long mmu_seq)
struct kvm_mmu_page *sp, u64 *spte,
const void *new)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
......@@ -3173,7 +3172,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq);
vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
......@@ -3229,7 +3228,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct kvm_mmu_page *sp;
struct hlist_node *node;
LIST_HEAD(invalid_list);
unsigned long mmu_seq;
u64 entry, gentry, *spte;
unsigned pte_size, page_offset, misaligned, quadrant, offset;
int level, npte, invlpg_counter, r, flooded = 0;
......@@ -3271,9 +3269,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
break;
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
spin_lock(&vcpu->kvm->mmu_lock);
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
gentry = 0;
......@@ -3345,8 +3340,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry,
mmu_seq);
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (!remote_flush && need_remote_flush(entry, *spte))
remote_flush = true;
++spte;
......
......@@ -78,15 +78,19 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
gfn_t table_gfn, unsigned index,
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
pt_element_t __user *ptep_user, unsigned index,
pt_element_t orig_pte, pt_element_t new_pte)
{
int npages;
pt_element_t ret;
pt_element_t *table;
struct page *page;
page = gfn_to_page(kvm, table_gfn);
npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page);
/* Check if the user is doing something meaningless. */
if (unlikely(npages != 1))
return -EFAULT;
table = kmap_atomic(page, KM_USER0);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
......@@ -117,6 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
gva_t addr, u32 access)
{
pt_element_t pte;
pt_element_t __user *ptep_user;
gfn_t table_gfn;
unsigned index, pt_access, uninitialized_var(pte_access);
gpa_t pte_gpa;
......@@ -152,6 +157,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_access = ACC_ALL;
for (;;) {
gfn_t real_gfn;
unsigned long host_addr;
index = PT_INDEX(addr, walker->level);
table_gfn = gpte_to_gfn(pte);
......@@ -160,43 +168,64 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte,
offset, sizeof(pte),
PFERR_USER_MASK|PFERR_WRITE_MASK)) {
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
PFERR_USER_MASK|PFERR_WRITE_MASK);
if (unlikely(real_gfn == UNMAPPED_GVA)) {
present = false;
break;
}
real_gfn = gpa_to_gfn(real_gfn);
host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
if (unlikely(kvm_is_error_hva(host_addr))) {
present = false;
break;
}
ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) {
present = false;
break;
}
trace_kvm_mmu_paging_element(pte, walker->level);
if (!is_present_gpte(pte)) {
if (unlikely(!is_present_gpte(pte))) {
present = false;
break;
}
if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) {
if (unlikely(is_rsvd_bits_set(&vcpu->arch.mmu, pte,
walker->level))) {
rsvd_fault = true;
break;
}
if (write_fault && !is_writable_pte(pte))
if (user_fault || is_write_protection(vcpu))
if (unlikely(write_fault && !is_writable_pte(pte)
&& (user_fault || is_write_protection(vcpu))))
eperm = true;
if (user_fault && !(pte & PT_USER_MASK))
if (unlikely(user_fault && !(pte & PT_USER_MASK)))
eperm = true;
#if PTTYPE == 64
if (fetch_fault && (pte & PT64_NX_MASK))
if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
eperm = true;
#endif
if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
if (!eperm && !rsvd_fault
&& unlikely(!(pte & PT_ACCESSED_MASK))) {
int ret;
trace_kvm_mmu_set_accessed_bit(table_gfn, index,
sizeof(pte));
if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
index, pte, pte|PT_ACCESSED_MASK))
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
pte, pte|PT_ACCESSED_MASK);
if (unlikely(ret < 0)) {
present = false;
break;
} else if (ret)
goto walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_ACCESSED_MASK;
}
......@@ -241,17 +270,21 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
--walker->level;
}
if (!present || eperm || rsvd_fault)
if (unlikely(!present || eperm || rsvd_fault))
goto error;
if (write_fault && !is_dirty_gpte(pte)) {
bool ret;
if (write_fault && unlikely(!is_dirty_gpte(pte))) {
int ret;
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
pte|PT_DIRTY_MASK);
if (ret)
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
pte, pte|PT_DIRTY_MASK);
if (unlikely(ret < 0)) {
present = false;
goto error;
} else if (ret)
goto walk;
mark_page_dirty(vcpu->kvm, table_gfn);
pte |= PT_DIRTY_MASK;
walker->ptes[walker->level - 1] = pte;
......@@ -325,7 +358,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte, unsigned long mmu_seq)
u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
......@@ -342,8 +375,6 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
kvm_release_pfn_clean(pfn);
return;
}
if (mmu_notifier_retry(vcpu, mmu_seq))
return;
/*
* we call mmu_set_spte() with host_writable = true because that
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -77,7 +77,7 @@ static inline u32 bit(int bitno)
void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
......
......@@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_PPC_GET_PVINFO 57
#define KVM_CAP_PPC_IRQ_LEVEL 58
#define KVM_CAP_ASYNC_PF 59
#define KVM_CAP_TSC_CONTROL 60
#define KVM_CAP_GET_TSC_KHZ 61
#define KVM_CAP_PPC_BOOKE_SREGS 62
#ifdef KVM_CAP_IRQ_ROUTING
......@@ -677,6 +680,9 @@ struct kvm_clock_data {
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
/* Available with KVM_CAP_PPC_GET_PVINFO */
#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
/* Available with KVM_CAP_TSC_CONTROL */
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
/*
* ioctls for vcpu fds
......
......@@ -27,6 +27,10 @@
#include <asm/kvm_host.h>
#ifndef KVM_MMIO_SIZE
#define KVM_MMIO_SIZE 8
#endif
/*
* vcpu->requests bit members
*/
......@@ -43,7 +47,6 @@
#define KVM_REQ_DEACTIVATE_FPU 10
#define KVM_REQ_EVENT 11
#define KVM_REQ_APF_HALT 12
#define KVM_REQ_NMI 13
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
......@@ -133,7 +136,8 @@ struct kvm_vcpu {
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
int mmio_index;
unsigned char mmio_data[KVM_MMIO_SIZE];
gpa_t mmio_phys_addr;
#endif
......@@ -292,9 +296,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
}
#define kvm_for_each_vcpu(idx, vcpup, kvm) \
for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
idx < atomic_read(&kvm->online_vcpus) && vcpup; \
vcpup = kvm_get_vcpu(kvm, ++idx))
for (idx = 0; \
idx < atomic_read(&kvm->online_vcpus) && \
(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
idx++)
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
......@@ -365,7 +370,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn);
void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
......@@ -587,8 +591,17 @@ static inline int kvm_deassign_device(struct kvm *kvm,
static inline void kvm_guest_enter(void)
{
BUG_ON(preemptible());
account_system_vtime(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
* is very similar to exiting to userspase from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
*/
rcu_virt_note_context_switch(smp_processor_id());
}
static inline void kvm_guest_exit(void)
......@@ -597,6 +610,11 @@ static inline void kvm_guest_exit(void)
current->flags &= ~PF_VCPU;
}
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
{
return gfn_to_memslot(kvm, gfn)->id;
}
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
......
......@@ -167,7 +167,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest, entry->fields.dest_mode,
entry->fields.dest_id, entry->fields.dest_mode,
entry->fields.delivery_mode, entry->fields.vector,
entry->fields.trig_mode);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment