Commit 7223b915 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
  lguest: stop using KVM hypercall mechanism
  lguest: workaround cmpxchg8b_emu by ignoring cli in the guest.
parents d471a4b9 091ebf07
...@@ -28,22 +28,39 @@ ...@@ -28,22 +28,39 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/hw_irq.h> #include <asm/hw_irq.h>
#include <asm/kvm_para.h>
/*G:030 /*G:030
* But first, how does our Guest contact the Host to ask for privileged * But first, how does our Guest contact the Host to ask for privileged
* operations? There are two ways: the direct way is to make a "hypercall", * operations? There are two ways: the direct way is to make a "hypercall",
* to make requests of the Host Itself. * to make requests of the Host Itself.
* *
* We use the KVM hypercall mechanism, though completely different hypercall * Our hypercall mechanism uses the highest unused trap code (traps 32 and
* numbers. Seventeen hypercalls are available: the hypercall number is put in * above are used by real hardware interrupts). Seventeen hypercalls are
* the %eax register, and the arguments (when required) are placed in %ebx, * available: the hypercall number is put in the %eax register, and the
* %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. * arguments (when required) are placed in %ebx, %ecx, %edx and %esi.
* If a return value makes sense, it's returned in %eax.
* *
* Grossly invalid calls result in Sudden Death at the hands of the vengeful * Grossly invalid calls result in Sudden Death at the hands of the vengeful
* Host, rather than returning failure. This reflects Winston Churchill's * Host, rather than returning failure. This reflects Winston Churchill's
* definition of a gentleman: "someone who is only rude intentionally". * definition of a gentleman: "someone who is only rude intentionally".
:*/ */
static inline unsigned long
hcall(unsigned long call,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
unsigned long arg4)
{
/* "int" is the Intel instruction to trigger a trap. */
asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
/* The call in %eax (aka "a") might be overwritten */
: "=a"(call)
/* The arguments are in %eax, %ebx, %ecx, %edx & %esi */
: "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4)
/* "memory" means this might write somewhere in memory.
* This isn't true for all calls, but it's safe to tell
* gcc that it might happen so it doesn't get clever. */
: "memory");
return call;
}
/* Can't use our min() macro here: needs to be a constant */ /* Can't use our min() macro here: needs to be a constant */
#define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
......
...@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, ...@@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
local_irq_save(flags); local_irq_save(flags);
if (lguest_data.hcall_status[next_call] != 0xFF) { if (lguest_data.hcall_status[next_call] != 0xFF) {
/* Table full, so do normal hcall which will flush table. */ /* Table full, so do normal hcall which will flush table. */
kvm_hypercall4(call, arg1, arg2, arg3, arg4); hcall(call, arg1, arg2, arg3, arg4);
} else { } else {
lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg0 = call;
lguest_data.hcalls[next_call].arg1 = arg1; lguest_data.hcalls[next_call].arg1 = arg1;
...@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1, ...@@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,
* So, when we're in lazy mode, we call async_hcall() to store the call for * So, when we're in lazy mode, we call async_hcall() to store the call for
* future processing: * future processing:
*/ */
static void lazy_hcall1(unsigned long call, static void lazy_hcall1(unsigned long call, unsigned long arg1)
unsigned long arg1)
{ {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
kvm_hypercall1(call, arg1); hcall(call, arg1, 0, 0, 0);
else else
async_hcall(call, arg1, 0, 0, 0); async_hcall(call, arg1, 0, 0, 0);
} }
/* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/
static void lazy_hcall2(unsigned long call, static void lazy_hcall2(unsigned long call,
unsigned long arg1, unsigned long arg1,
unsigned long arg2) unsigned long arg2)
{ {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
kvm_hypercall2(call, arg1, arg2); hcall(call, arg1, arg2, 0, 0);
else else
async_hcall(call, arg1, arg2, 0, 0); async_hcall(call, arg1, arg2, 0, 0);
} }
static void lazy_hcall3(unsigned long call, static void lazy_hcall3(unsigned long call,
unsigned long arg1, unsigned long arg1,
unsigned long arg2, unsigned long arg2,
unsigned long arg3) unsigned long arg3)
{ {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
kvm_hypercall3(call, arg1, arg2, arg3); hcall(call, arg1, arg2, arg3, 0);
else else
async_hcall(call, arg1, arg2, arg3, 0); async_hcall(call, arg1, arg2, arg3, 0);
} }
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
static void lazy_hcall4(unsigned long call, static void lazy_hcall4(unsigned long call,
unsigned long arg1, unsigned long arg1,
unsigned long arg2, unsigned long arg2,
unsigned long arg3, unsigned long arg3,
unsigned long arg4) unsigned long arg4)
{ {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
kvm_hypercall4(call, arg1, arg2, arg3, arg4); hcall(call, arg1, arg2, arg3, arg4);
else else
async_hcall(call, arg1, arg2, arg3, arg4); async_hcall(call, arg1, arg2, arg3, arg4);
} }
...@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call, ...@@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,
:*/ :*/
static void lguest_leave_lazy_mmu_mode(void) static void lguest_leave_lazy_mmu_mode(void)
{ {
kvm_hypercall0(LHCALL_FLUSH_ASYNC); hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
paravirt_leave_lazy_mmu(); paravirt_leave_lazy_mmu();
} }
static void lguest_end_context_switch(struct task_struct *next) static void lguest_end_context_switch(struct task_struct *next)
{ {
kvm_hypercall0(LHCALL_FLUSH_ASYNC); hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
paravirt_end_context_switch(next); paravirt_end_context_switch(next);
} }
...@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt, ...@@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
/* Keep the local copy up to date. */ /* Keep the local copy up to date. */
native_write_idt_entry(dt, entrynum, g); native_write_idt_entry(dt, entrynum, g);
/* Tell Host about this new entry. */ /* Tell Host about this new entry. */
kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);
} }
/* /*
...@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc) ...@@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
struct desc_struct *idt = (void *)desc->address; struct desc_struct *idt = (void *)desc->address;
for (i = 0; i < (desc->size+1)/8; i++) for (i = 0; i < (desc->size+1)/8; i++)
kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);
} }
/* /*
...@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc) ...@@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
struct desc_struct *gdt = (void *)desc->address; struct desc_struct *gdt = (void *)desc->address;
for (i = 0; i < (desc->size+1)/8; i++) for (i = 0; i < (desc->size+1)/8; i++)
kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);
} }
/* /*
...@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, ...@@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
{ {
native_write_gdt_entry(dt, entrynum, desc, type); native_write_gdt_entry(dt, entrynum, desc, type);
/* Tell Host about this new entry. */ /* Tell Host about this new entry. */
kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, hcall(LHCALL_LOAD_GDT_ENTRY, entrynum,
dt[entrynum].a, dt[entrynum].b); dt[entrynum].a, dt[entrynum].b, 0);
} }
/* /*
...@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta, ...@@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
} }
/* Please wake us this far in the future. */ /* Please wake us this far in the future. */
kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);
return 0; return 0;
} }
...@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, ...@@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_SHUTDOWN:
/* A 0 argument shuts the clock down. */ /* A 0 argument shuts the clock down. */
kvm_hypercall0(LHCALL_SET_CLOCKEVENT); hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);
break; break;
case CLOCK_EVT_MODE_ONESHOT: case CLOCK_EVT_MODE_ONESHOT:
/* This is what we expect. */ /* This is what we expect. */
...@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void) ...@@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)
/* STOP! Until an interrupt comes in. */ /* STOP! Until an interrupt comes in. */
static void lguest_safe_halt(void) static void lguest_safe_halt(void)
{ {
kvm_hypercall0(LHCALL_HALT); hcall(LHCALL_HALT, 0, 0, 0, 0);
} }
/* /*
...@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void) ...@@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)
*/ */
static void lguest_power_off(void) static void lguest_power_off(void)
{ {
kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), hcall(LHCALL_SHUTDOWN, __pa("Power down"),
LGUEST_SHUTDOWN_POWEROFF); LGUEST_SHUTDOWN_POWEROFF, 0, 0);
} }
/* /*
...@@ -1123,7 +1122,7 @@ static void lguest_power_off(void) ...@@ -1123,7 +1122,7 @@ static void lguest_power_off(void)
*/ */
static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
{ {
kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);
/* The hcall won't return, but to keep gcc happy, we're "done". */ /* The hcall won't return, but to keep gcc happy, we're "done". */
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) ...@@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
len = sizeof(scratch) - 1; len = sizeof(scratch) - 1;
scratch[len] = '\0'; scratch[len] = '\0';
memcpy(scratch, buf, len); memcpy(scratch, buf, len);
kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);
/* This routine returns the number of bytes actually written. */ /* This routine returns the number of bytes actually written. */
return len; return len;
...@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) ...@@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
*/ */
static void lguest_restart(char *reason) static void lguest_restart(char *reason)
{ {
kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);
} }
/*G:050 /*G:050
......
...@@ -32,7 +32,7 @@ ENTRY(lguest_entry) ...@@ -32,7 +32,7 @@ ENTRY(lguest_entry)
*/ */
movl $LHCALL_LGUEST_INIT, %eax movl $LHCALL_LGUEST_INIT, %eax
movl $lguest_data - __PAGE_OFFSET, %ebx movl $lguest_data - __PAGE_OFFSET, %ebx
.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ int $LGUEST_TRAP_ENTRY
/* Set up the initial stack so we can run C code. */ /* Set up the initial stack so we can run C code. */
movl $(init_thread_union+THREAD_SIZE),%esp movl $(init_thread_union+THREAD_SIZE),%esp
......
...@@ -178,7 +178,7 @@ static void set_status(struct virtio_device *vdev, u8 status) ...@@ -178,7 +178,7 @@ static void set_status(struct virtio_device *vdev, u8 status)
/* We set the status. */ /* We set the status. */
to_lgdev(vdev)->desc->status = status; to_lgdev(vdev)->desc->status = status;
kvm_hypercall1(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset); hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0);
} }
static void lg_set_status(struct virtio_device *vdev, u8 status) static void lg_set_status(struct virtio_device *vdev, u8 status)
...@@ -229,7 +229,7 @@ static void lg_notify(struct virtqueue *vq) ...@@ -229,7 +229,7 @@ static void lg_notify(struct virtqueue *vq)
*/ */
struct lguest_vq_info *lvq = vq->priv; struct lguest_vq_info *lvq = vq->priv;
kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0);
} }
/* An extern declaration inside a C file is bad form. Don't do it. */ /* An extern declaration inside a C file is bad form. Don't do it. */
......
...@@ -287,6 +287,18 @@ static int emulate_insn(struct lg_cpu *cpu) ...@@ -287,6 +287,18 @@ static int emulate_insn(struct lg_cpu *cpu)
/* Decoding x86 instructions is icky. */ /* Decoding x86 instructions is icky. */
insn = lgread(cpu, physaddr, u8); insn = lgread(cpu, physaddr, u8);
/*
* Around 2.6.33, the kernel started using an emulation for the
* cmpxchg8b instruction in early boot on many configurations. This
* code isn't paravirtualized, and it tries to disable interrupts.
* Ignore it, which will Mostly Work.
*/
if (insn == 0xfa) {
/* "cli", or Clear Interrupt Enable instruction. Skip it. */
cpu->regs->eip++;
return 1;
}
/* /*
* 0x66 is an "operand prefix". It means it's using the upper 16 bits * 0x66 is an "operand prefix". It means it's using the upper 16 bits
* of the eax register. * of the eax register.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment