Commit 5ed92a8a authored by Brian Gerst's avatar Brian Gerst Committed by Ingo Molnar

x86/vm86: Use the normal pt_regs area for vm86

Change to use the normal pt_regs area to enter and exit vm86
mode.  This is done by increasing the padding at the top of the
stack to make room for the extra vm86 segment slots in the IRET
frame.  It then saves the 32-bit regs in the off-stack vm86
data, and copies in the vm86 regs.  Exiting back to 32-bit mode
does the reverse.  This allows removing the hacks to jump
directly into the exit asm code due to having to change the
stack pointer.  Returning normally from the vm86 syscall and the
exception handlers allows things like ptrace and auditing to work properly.
Signed-off-by: default avatarBrian Gerst <brgerst@gmail.com>
Acked-by: default avatarAndy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1438148483-11932-5-git-send-email-brgerst@gmail.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 90c6085a
...@@ -525,34 +525,12 @@ work_resched: ...@@ -525,34 +525,12 @@ work_resched:
work_notifysig: # deal with pending signals and work_notifysig: # deal with pending signals and
# notify-resume requests # notify-resume requests
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
movl %esp, %eax
jnz work_notifysig_v86 # returning to kernel-space or
# vm86-space
1:
#else
movl %esp, %eax
#endif
TRACE_IRQS_ON TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE) ENABLE_INTERRUPTS(CLBR_NONE)
movb PT_CS(%esp), %bl movl %esp, %eax
andb $SEGMENT_RPL_MASK, %bl
cmpb $USER_RPL, %bl
jb resume_kernel
xorl %edx, %edx xorl %edx, %edx
call do_notify_resume call do_notify_resume
jmp resume_userspace jmp resume_userspace
#ifdef CONFIG_VM86
ALIGN
work_notifysig_v86:
pushl %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
movl %eax, %esp
jmp 1b
#endif
END(work_pending) END(work_pending)
# perform syscall exit tracing # perform syscall exit tracing
......
...@@ -27,14 +27,17 @@ ...@@ -27,14 +27,17 @@
* Without this offset, that can result in a page fault. (We are * Without this offset, that can result in a page fault. (We are
* careful that, in this case, the value we read doesn't matter.) * careful that, in this case, the value we read doesn't matter.)
* *
* In vm86 mode, the hardware frame is much longer still, but we neither * In vm86 mode, the hardware frame is much longer still, so add 16
* access the extra members from NMI context, nor do we write such a * bytes to make room for the real-mode segments.
* frame at sp0 at all.
* *
* x86_64 has a fixed-length stack frame. * x86_64 has a fixed-length stack frame.
*/ */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# define TOP_OF_KERNEL_STACK_PADDING 8 # ifdef CONFIG_VM86
# define TOP_OF_KERNEL_STACK_PADDING 16
# else
# define TOP_OF_KERNEL_STACK_PADDING 8
# endif
#else #else
# define TOP_OF_KERNEL_STACK_PADDING 0 # define TOP_OF_KERNEL_STACK_PADDING 0
#endif #endif
......
...@@ -29,7 +29,7 @@ struct kernel_vm86_regs { ...@@ -29,7 +29,7 @@ struct kernel_vm86_regs {
struct vm86 { struct vm86 {
struct vm86plus_struct __user *vm86_info; struct vm86plus_struct __user *vm86_info;
struct pt_regs *regs32; struct pt_regs regs32;
unsigned long v86flags; unsigned long v86flags;
unsigned long v86mask; unsigned long v86mask;
unsigned long saved_sp0; unsigned long saved_sp0;
...@@ -46,7 +46,7 @@ struct vm86 { ...@@ -46,7 +46,7 @@ struct vm86 {
void handle_vm86_fault(struct kernel_vm86_regs *, long); void handle_vm86_fault(struct kernel_vm86_regs *, long);
int handle_vm86_trap(struct kernel_vm86_regs *, long, int); int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
struct pt_regs *save_v86_state(struct kernel_vm86_regs *); void save_v86_state(struct kernel_vm86_regs *, int);
struct task_struct; struct task_struct;
void release_vm86_irqs(struct task_struct *); void release_vm86_irqs(struct task_struct *);
...@@ -69,6 +69,8 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) ...@@ -69,6 +69,8 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
return 0; return 0;
} }
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
#define free_vm86(t) do { } while(0) #define free_vm86(t) do { } while(0)
#endif /* CONFIG_VM86 */ #endif /* CONFIG_VM86 */
......
...@@ -635,6 +635,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) ...@@ -635,6 +635,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
bool stepping, failed; bool stepping, failed;
struct fpu *fpu = &current->thread.fpu; struct fpu *fpu = &current->thread.fpu;
if (v8086_mode(regs))
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
/* Are we from a system call? */ /* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) { if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */ /* If so, check system call restarting.. */
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include <asm/io.h> #include <asm/io.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/traps.h>
/* /*
* Known problems: * Known problems:
...@@ -87,10 +88,9 @@ ...@@ -87,10 +88,9 @@
#define SAFE_MASK (0xDD5) #define SAFE_MASK (0xDD5)
#define RETURN_MASK (0xDFF) #define RETURN_MASK (0xDFF)
struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) void save_v86_state(struct kernel_vm86_regs *regs, int retval)
{ {
struct tss_struct *tss; struct tss_struct *tss;
struct pt_regs *ret;
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct vm86plus_struct __user *user; struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86; struct vm86 *vm86 = current->thread.vm86;
...@@ -149,11 +149,11 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) ...@@ -149,11 +149,11 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
vm86->saved_sp0 = 0; vm86->saved_sp0 = 0;
put_cpu(); put_cpu();
ret = vm86->regs32; memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
lazy_load_gs(ret->gs); lazy_load_gs(vm86->regs32.gs);
return ret; regs->pt.ax = retval;
} }
static void mark_screen_rdonly(struct mm_struct *mm) static void mark_screen_rdonly(struct mm_struct *mm)
...@@ -228,7 +228,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) ...@@ -228,7 +228,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct vm86 *vm86 = tsk->thread.vm86; struct vm86 *vm86 = tsk->thread.vm86;
struct kernel_vm86_regs vm86regs; struct kernel_vm86_regs vm86regs;
struct pt_regs *regs32 = current_pt_regs(); struct pt_regs *regs = current_pt_regs();
unsigned long err = 0; unsigned long err = 0;
if (!vm86) { if (!vm86) {
...@@ -287,7 +287,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) ...@@ -287,7 +287,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus)
} else } else
memset(&vm86->vm86plus, 0, memset(&vm86->vm86plus, 0,
sizeof(struct vm86plus_info_struct)); sizeof(struct vm86plus_info_struct));
vm86->regs32 = regs32;
memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
vm86->vm86_info = v86; vm86->vm86_info = v86;
/* /*
...@@ -297,10 +298,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) ...@@ -297,10 +298,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus)
*/ */
VEFLAGS = vm86regs.pt.flags; VEFLAGS = vm86regs.pt.flags;
vm86regs.pt.flags &= SAFE_MASK; vm86regs.pt.flags &= SAFE_MASK;
vm86regs.pt.flags |= regs32->flags & ~SAFE_MASK; vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
vm86regs.pt.flags |= X86_VM_MASK; vm86regs.pt.flags |= X86_VM_MASK;
vm86regs.pt.orig_ax = regs32->orig_ax; vm86regs.pt.orig_ax = regs->orig_ax;
switch (vm86->cpu_type) { switch (vm86->cpu_type) {
case CPU_286: case CPU_286:
...@@ -318,15 +319,14 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) ...@@ -318,15 +319,14 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus)
} }
/* /*
* Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) * Save old state
*/ */
regs32->ax = VM86_SIGNAL;
vm86->saved_sp0 = tsk->thread.sp0; vm86->saved_sp0 = tsk->thread.sp0;
lazy_save_gs(regs32->gs); lazy_save_gs(vm86->regs32.gs);
tss = &per_cpu(cpu_tss, get_cpu()); tss = &per_cpu(cpu_tss, get_cpu());
/* Set new sp0 right below 32-bit regs */ /* make room for real-mode segments */
tsk->thread.sp0 = (unsigned long) regs32; tsk->thread.sp0 += 16;
if (cpu_has_sep) if (cpu_has_sep)
tsk->thread.sysenter_cs = 0; tsk->thread.sysenter_cs = 0;
load_sp0(tss, &tsk->thread); load_sp0(tss, &tsk->thread);
...@@ -335,41 +335,14 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) ...@@ -335,41 +335,14 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus)
if (vm86->flags & VM86_SCREEN_BITMAP) if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm); mark_screen_rdonly(tsk->mm);
/*call __audit_syscall_exit since we do not exit via the normal paths */ memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
#ifdef CONFIG_AUDITSYSCALL force_iret();
if (unlikely(current->audit_context)) return regs->ax;
__audit_syscall_exit(1, 0);
#endif
__asm__ __volatile__(
"movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
#ifdef CONFIG_X86_32_LAZY_GS
"mov %2, %%gs\n\t"
#endif
"jmp resume_userspace"
: /* no outputs */
:"r" (&vm86regs), "r" (task_thread_info(tsk)), "r" (0));
unreachable(); /* we never return here */
}
static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
{
struct pt_regs *regs32;
regs32 = save_v86_state(regs16);
regs32->ax = retval;
__asm__ __volatile__("movl %0,%%esp\n\t"
"movl %1,%%ebp\n\t"
"jmp resume_userspace"
: : "r" (regs32), "r" (current_thread_info()));
} }
static inline void set_IF(struct kernel_vm86_regs *regs) static inline void set_IF(struct kernel_vm86_regs *regs)
{ {
VEFLAGS |= X86_EFLAGS_VIF; VEFLAGS |= X86_EFLAGS_VIF;
if (VEFLAGS & X86_EFLAGS_VIP)
return_to_32bit(regs, VM86_STI);
} }
static inline void clear_IF(struct kernel_vm86_regs *regs) static inline void clear_IF(struct kernel_vm86_regs *regs)
...@@ -549,7 +522,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, ...@@ -549,7 +522,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
return; return;
cannot_handle: cannot_handle:
return_to_32bit(regs, VM86_INTx + (i << 8)); save_v86_state(regs, VM86_INTx + (i << 8));
} }
int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
...@@ -558,11 +531,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) ...@@ -558,11 +531,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
if (vm86->vm86plus.is_vm86pus) { if (vm86->vm86plus.is_vm86pus) {
if ((trapno == 3) || (trapno == 1)) { if ((trapno == 3) || (trapno == 1)) {
vm86->regs32->ax = VM86_TRAP + (trapno << 8); save_v86_state(regs, VM86_TRAP + (trapno << 8));
/* setting this flag forces the code in entry_32.S to
the path where we call save_v86_state() and change
the stack pointer to regs32 */
set_thread_flag(TIF_NOTIFY_RESUME);
return 0; return 0;
} }
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
...@@ -588,12 +557,6 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -588,12 +557,6 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
#define CHECK_IF_IN_TRAP \ #define CHECK_IF_IN_TRAP \
if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
newflags |= X86_EFLAGS_TF newflags |= X86_EFLAGS_TF
#define VM86_FAULT_RETURN do { \
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
return_to_32bit(regs, VM86_PICRETURN); \
if (orig_flags & X86_EFLAGS_TF) \
handle_vm86_trap(regs, 0, 1); \
return; } while (0)
orig_flags = *(unsigned short *)&regs->pt.flags; orig_flags = *(unsigned short *)&regs->pt.flags;
...@@ -632,7 +595,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -632,7 +595,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
SP(regs) -= 2; SP(regs) -= 2;
} }
IP(regs) = ip; IP(regs) = ip;
VM86_FAULT_RETURN; goto vm86_fault_return;
/* popf */ /* popf */
case 0x9d: case 0x9d:
...@@ -652,7 +615,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -652,7 +615,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
else else
set_vflags_short(newflags, regs); set_vflags_short(newflags, regs);
VM86_FAULT_RETURN; goto check_vip;
} }
/* int xx */ /* int xx */
...@@ -660,8 +623,10 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -660,8 +623,10 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
int intno = popb(csp, ip, simulate_sigsegv); int intno = popb(csp, ip, simulate_sigsegv);
IP(regs) = ip; IP(regs) = ip;
if (vmpi->vm86dbg_active) { if (vmpi->vm86dbg_active) {
if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
return_to_32bit(regs, VM86_INTx + (intno << 8)); save_v86_state(regs, VM86_INTx + (intno << 8));
return;
}
} }
do_int(regs, intno, ssp, sp); do_int(regs, intno, ssp, sp);
return; return;
...@@ -692,14 +657,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -692,14 +657,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
} else { } else {
set_vflags_short(newflags, regs); set_vflags_short(newflags, regs);
} }
VM86_FAULT_RETURN; goto check_vip;
} }
/* cli */ /* cli */
case 0xfa: case 0xfa:
IP(regs) = ip; IP(regs) = ip;
clear_IF(regs); clear_IF(regs);
VM86_FAULT_RETURN; goto vm86_fault_return;
/* sti */ /* sti */
/* /*
...@@ -711,12 +676,27 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -711,12 +676,27 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
case 0xfb: case 0xfb:
IP(regs) = ip; IP(regs) = ip;
set_IF(regs); set_IF(regs);
VM86_FAULT_RETURN; goto check_vip;
default: default:
return_to_32bit(regs, VM86_UNKNOWN); save_v86_state(regs, VM86_UNKNOWN);
}
return;
check_vip:
if (VEFLAGS & X86_EFLAGS_VIP) {
save_v86_state(regs, VM86_STI);
return;
} }
vm86_fault_return:
if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
save_v86_state(regs, VM86_PICRETURN);
return;
}
if (orig_flags & X86_EFLAGS_TF)
handle_vm86_trap(regs, 0, X86_TRAP_DB);
return; return;
simulate_sigsegv: simulate_sigsegv:
...@@ -730,7 +710,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) ...@@ -730,7 +710,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
* should be a mixture of the two, but how do we * should be a mixture of the two, but how do we
* get the information? [KD] * get the information? [KD]
*/ */
return_to_32bit(regs, VM86_UNKNOWN); save_v86_state(regs, VM86_UNKNOWN);
} }
/* ---------------- vm86 special IRQ passing stuff ----------------- */ /* ---------------- vm86 special IRQ passing stuff ----------------- */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment