Commit 7abe2c67 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 merge for 2.6.4

The biggest new feature is fixed 32bit vsyscall (SYSCALL+SYSENTER)
support, mostly from Jakub Jelinek.  This increases 32bit syscall
performance greatly (latency halved and better).  The SYSENTER for Intel
support required some infrastructure changes, but seems to work now too.

The 64bit vsyscall vtime() just references xtime.tv_sec now.  This
should make it a lot faster too.

A fix for some Intel IA32e systems.  Also a few long standing bugs in
NMI like exception handlers were fixed.

And a lot of other bug fixes.

Full changeLog:
 - Clean up 32bit address room limit handling, fix 3gb personality
 - Move memcpy_{from,to}io export to ksyms.c file. This seems to work
   around a toolchain bug (Andreas Gruenbacher)
 - Update defconfig
 - ACPI merges from i386 (SBF should work now, acpi=strict)
 - Implement mmconfig support based on i386 code (untested)
 - Fix i386/x86-64 pci source file sharing
 - Implement ptrace access for 32bit vsyscall page
 - Always initialize all 32bit SYSENTER/SYSCALL MSRs.
 - Export run time cache line size to generic kernel
 - Remove explicit CPUID in ia32 syscall code
 - Fill in most of boot_cpu_data early
 - Remove unused PER_LINUX32 setup
 - Fix syscall trace in fast 32bit calls (Suresh B. Siddha)
 - Tighten first line of the oops again.
 - Set up ptrace registers correctly for debug,ss,double fault exceptions
 - Fix 64bit bug in sys_time64
 - Optimize time syscall/vsyscall to only read xtime
 - Fix csum_partial_copy_nocheck
 - Remove last traces of FPU emulation
 - Check properly for rescheduling in exceptions with own stack
 - Harden exception stack entries (#SS,#NMI,#MC,#DF,#DB) against bogus GS
 - Use an exception stack for machine checks
 - Handle TIF_SINGLESTEP properly in kernel exit
 - Add exception stack for debug handler
 - Disable X86_HT for Opteron optimized builds because it pulls in ACPI_BOOT
 - Fix CONFIG_ACPI_BOOT compilation without CONFIG_ACPI
 - Fix eflags handling in SYSENTER path (Jakub Jelinek)
 - Use atomic counter for enable/disable_hlt
 - Support 32bit SYSENTER vsyscall too (Jakub Jelinek)
 - Don't redefine Dprintk
 - Change some cpu/apic id arrays to char
 - Support arbitary cpu<->apicid in hard_smp_processor_id (Surresh B Sidda)
 - Move K8 erratum #100 workaround into slow path of page fault handler.
 - Fix 32bit cdrom direct access ioctls (Jens Axboe)
 - Enable 32bit vsyscalls by default
 - Fix 32bit vsyscalls (Jakub Jelinek)
parent 626942a4
...@@ -160,9 +160,10 @@ config X86_CPUID ...@@ -160,9 +160,10 @@ config X86_CPUID
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid. /dev/cpu/31/cpuid.
# disable it for opteron optimized builds because it pulls in ACPI_BOOT
config X86_HT config X86_HT
bool bool
depends on SMP depends on SMP && !MK8
default y default y
config MATH_EMULATION config MATH_EMULATION
...@@ -330,6 +331,11 @@ config PCI_DIRECT ...@@ -330,6 +331,11 @@ config PCI_DIRECT
depends on PCI depends on PCI
default y default y
config PCI_MMCONFIG
bool "Support mmconfig PCI config space access"
depends on PCI
select ACPI_BOOT
# the drivers/pci/msi.c code needs to be fixed first before enabling # the drivers/pci/msi.c code needs to be fixed first before enabling
config PCI_USE_VECTOR config PCI_USE_VECTOR
bool "Vector-based interrupt indexing" bool "Vector-based interrupt indexing"
......
...@@ -63,7 +63,7 @@ head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kern ...@@ -63,7 +63,7 @@ head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kern
libs-y += arch/x86_64/lib/ libs-y += arch/x86_64/lib/
core-y += arch/x86_64/kernel/ arch/x86_64/mm/ core-y += arch/x86_64/kernel/ arch/x86_64/mm/
core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/i386/pci/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/
drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
boot := arch/x86_64/boot boot := arch/x86_64/boot
......
This diff is collapsed.
...@@ -11,18 +11,22 @@ obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) ...@@ -11,18 +11,22 @@ obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
$(obj)/syscall32.o: $(src)/syscall32.c $(obj)/vsyscall.so $(obj)/syscall32.o: $(src)/syscall32.c \
$(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
# Teach kbuild about targets # Teach kbuild about targets
targets := vsyscall.o vsyscall.so targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
# The DSO images are built using a special linker script # The DSO images are built using a special linker script
quiet_cmd_vsyscall = SYSCALL $@ quiet_cmd_syscall = SYSCALL $@
cmd_vsyscall = $(CC) -m32 -nostdlib -shared -s \ cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
-Wl,-soname=linux-gate.so.1 -o $@ \ -Wl,-soname=linux-gate.so.1 -o $@ \
-Wl,-T,$(filter-out FORCE,$^) -Wl,-T,$(filter-out FORCE,$^)
$(obj)/vsyscall.so: $(src)/vsyscall.lds $(obj)/vsyscall.o FORCE
$(call if_changed,vsyscall)
AFLAGS_vsyscall.o = -m32 $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
AFLAGS_vsyscall-sysenter.o = -m32
AFLAGS_vsyscall-syscall.o = -m32
CFLAGS_ia32_ioctl.o += -Ifs/ CFLAGS_ia32_ioctl.o += -Ifs/
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#define AT_SYSINFO 32 #define AT_SYSINFO 32
#define AT_SYSINFO_EHDR 33 #define AT_SYSINFO_EHDR 33
int sysctl_vsyscall32; int sysctl_vsyscall32 = 1;
#define ARCH_DLINFO do { \ #define ARCH_DLINFO do { \
if (sysctl_vsyscall32) { \ if (sysctl_vsyscall32) { \
...@@ -46,7 +46,7 @@ struct elf_phdr; ...@@ -46,7 +46,7 @@ struct elf_phdr;
#define IA32_EMULATOR 1 #define IA32_EMULATOR 1
#define ELF_ET_DYN_BASE (IA32_PAGE_OFFSET/3 + 0x1000000) #define ELF_ET_DYN_BASE (TASK_UNMAPPED_32 + 0x1000000)
#undef ELF_ARCH #undef ELF_ARCH
#define ELF_ARCH EM_386 #define ELF_ARCH EM_386
...@@ -261,7 +261,6 @@ do { \ ...@@ -261,7 +261,6 @@ do { \
set_thread_flag(TIF_ABI_PENDING); \ set_thread_flag(TIF_ABI_PENDING); \
else \ else \
clear_thread_flag(TIF_ABI_PENDING); \ clear_thread_flag(TIF_ABI_PENDING); \
set_personality((ibcs2)?PER_SVR4:current->personality); \
} while (0) } while (0)
/* Override some function names */ /* Override some function names */
......
...@@ -273,8 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs regs) ...@@ -273,8 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs regs)
sigset_t set; sigset_t set;
unsigned int eax; unsigned int eax;
set_thread_flag(TIF_IRET);
if (verify_area(VERIFY_READ, frame, sizeof(*frame))) if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
goto badframe; goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask) if (__get_user(set.sig[0], &frame->sc.oldmask)
...@@ -305,8 +303,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs regs) ...@@ -305,8 +303,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs regs)
stack_t st; stack_t st;
unsigned int eax; unsigned int eax;
set_thread_flag(TIF_IRET);
if (verify_area(VERIFY_READ, frame, sizeof(*frame))) if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
goto badframe; goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <asm/ia32_unistd.h> #include <asm/ia32_unistd.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/segment.h> #include <asm/segment.h>
#include <asm/vsyscall32.h>
#include <linux/linkage.h> #include <linux/linkage.h>
.macro IA32_ARG_FIXUP noebp=0 .macro IA32_ARG_FIXUP noebp=0
...@@ -25,6 +26,99 @@ ...@@ -25,6 +26,99 @@
movl %edx,%edx /* zero extension */ movl %edx,%edx /* zero extension */
.endm .endm
/* clobbers %eax */
.macro CLEAR_RREGS
xorl %eax,%eax
movq %rax,R11(%rsp)
movq %rax,R10(%rsp)
movq %rax,R9(%rsp)
movq %rax,R8(%rsp)
.endm
/*
* 32bit SYSENTER instruction entry.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp user stack
* 0(%ebp) Arg6
*
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(ia32_sysenter_target)
CFI_STARTPROC
swapgs
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
sti
movl %ebp,%ebp /* zero extension */
pushq $__USER32_DS
pushq %rbp
pushfq
movl $VSYSCALL32_SYSEXIT, %r10d
pushq $__USER32_CS
movl %eax, %eax
pushq %r10
pushq %rax
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
jae ia32_badsys
IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz int_ret_from_sys_call
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
RESTORE_ARGS 1,24,1,1,1,1
popfq
popq %rcx /* User %esp */
movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
swapgs
sti /* sti only takes effect after the next instruction */
/* sysexit */
.byte 0xf, 0x35
sysenter_tracesys:
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
movl %ebp, %ebp
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
jmp sysenter_do_call
CFI_ENDPROC
/* /*
* 32bit SYSCALL instruction entry. * 32bit SYSCALL instruction entry.
* *
...@@ -51,7 +145,7 @@ ENTRY(ia32_cstar_target) ...@@ -51,7 +145,7 @@ ENTRY(ia32_cstar_target)
movl %esp,%r8d movl %esp,%r8d
movq %gs:pda_kernelstack,%rsp movq %gs:pda_kernelstack,%rsp
sti sti
SAVE_ARGS 8,1 SAVE_ARGS 8,1,1
movl %eax,%eax /* zero extension */ movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp)
...@@ -66,47 +160,48 @@ ENTRY(ia32_cstar_target) ...@@ -66,47 +160,48 @@ ENTRY(ia32_cstar_target)
/* hardware stack frame is complete now */ /* hardware stack frame is complete now */
1: movl (%r8),%r9d 1: movl (%r8),%r9d
.section __ex_table,"a" .section __ex_table,"a"
.quad 1b,cstar_badarg .quad 1b,ia32_badarg
.previous .previous
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc ia32_tracesys jc cstar_tracesys
cstar_do_call: cstar_do_call:
cmpl $IA32_NR_syscalls,%eax cmpl $IA32_NR_syscalls,%eax
jae ia32_badsys jae ia32_badsys
IA32_ARG_FIXUP 1 IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
.globl cstar_sysret
/* label must directly follow call */
cstar_sysret:
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
cli cli
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz 1f jnz int_ret_from_sys_call
RESTORE_ARGS 1,-ARG_SKIP,1,1 RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx movl RIP-ARGOFFSET(%rsp),%ecx
movl EFLAGS-ARGOFFSET(%rsp),%r11d movl EFLAGS-ARGOFFSET(%rsp),%r11d
movl RSP-ARGOFFSET(%rsp),%esp movl RSP-ARGOFFSET(%rsp),%esp
swapgs swapgs
sysretl sysretl
1:
btc $TIF_IRET,threadinfo_flags(%r10)
jmp int_ret_from_sys_call
cstar_tracesys: cstar_tracesys:
SAVE_REST SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */ movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST RESTORE_REST
movl RSP-ARGOFFSET(%rsp), %r8d
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
1: movl (%r8),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
jmp cstar_do_call jmp cstar_do_call
cstar_badarg: ia32_badarg:
movq $-EFAULT,%rax movq $-EFAULT,%rax
jmp cstar_sysret jmp ia32_sysret
CFI_ENDPROC CFI_ENDPROC
/* /*
...@@ -139,7 +234,7 @@ ENTRY(ia32_syscall) ...@@ -139,7 +234,7 @@ ENTRY(ia32_syscall)
cld cld
/* note the registers are not zero extended to the sf. /* note the registers are not zero extended to the sf.
this could be a problem. */ this could be a problem. */
SAVE_ARGS SAVE_ARGS 0,0,1
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc ia32_tracesys jc ia32_tracesys
...@@ -148,6 +243,7 @@ ia32_do_syscall: ...@@ -148,6 +243,7 @@ ia32_do_syscall:
jae ia32_badsys jae ia32_badsys
IA32_ARG_FIXUP IA32_ARG_FIXUP
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
jmp int_ret_from_sys_call jmp int_ret_from_sys_call
...@@ -200,8 +296,7 @@ ENTRY(ia32_ptregs_common) ...@@ -200,8 +296,7 @@ ENTRY(ia32_ptregs_common)
call *%rax call *%rax
movq %r15, %r11 movq %r15, %r11
RESTORE_REST RESTORE_REST
cmpq $cstar_sysret,%r11 leaq ia32_sysret(%rip),%r11
je int_ret_from_sys_call /* misbalances the call/ret stack. sorry */
pushq %r11 pushq %r11
ret ret
CFI_ENDPROC CFI_ENDPROC
......
...@@ -1876,18 +1876,9 @@ long sys32_quotactl(void) ...@@ -1876,18 +1876,9 @@ long sys32_quotactl(void)
cond_syscall(sys32_ipc) cond_syscall(sys32_ipc)
struct exec_domain ia32_exec_domain = {
.name = "linux/x86",
.pers_low = PER_LINUX32,
.pers_high = PER_LINUX32,
};
static int __init ia32_init (void) static int __init ia32_init (void)
{ {
printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n"); printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
ia32_exec_domain.signal_map = default_exec_domain.signal_map;
ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
register_exec_domain(&ia32_exec_domain);
return 0; return 0;
} }
......
...@@ -13,16 +13,22 @@ ...@@ -13,16 +13,22 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/ia32_unistd.h> #include <asm/ia32_unistd.h>
/* 32bit VDSO mapped into user space. */ /* 32bit VDSOs mapped into user space. */
asm(".section \".init.data\",\"aw\"\n" asm(".section \".init.data\",\"aw\"\n"
"syscall32:\n" "syscall32_syscall:\n"
".incbin \"arch/x86_64/ia32/vsyscall.so\"\n" ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n"
"syscall32_end:\n" "syscall32_syscall_end:\n"
"syscall32_sysenter:\n"
".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n"
"syscall32_sysenter_end:\n"
".previous"); ".previous");
extern unsigned char syscall32[], syscall32_end[]; extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;
char *syscall32_page; char *syscall32_page;
static int use_sysenter __initdata = -1;
/* RED-PEN: This knows too much about high level VM */ /* RED-PEN: This knows too much about high level VM */
/* Alternative would be to generate a vma with appropriate backing options /* Alternative would be to generate a vma with appropriate backing options
...@@ -58,8 +64,28 @@ static int __init init_syscall32(void) ...@@ -58,8 +64,28 @@ static int __init init_syscall32(void)
if (!syscall32_page) if (!syscall32_page)
panic("Cannot allocate syscall32 page"); panic("Cannot allocate syscall32 page");
SetPageReserved(virt_to_page(syscall32_page)); SetPageReserved(virt_to_page(syscall32_page));
memcpy(syscall32_page, syscall32, syscall32_end - syscall32); if (use_sysenter > 0) {
memcpy(syscall32_page, syscall32_sysenter,
syscall32_sysenter_end - syscall32_sysenter);
} else {
memcpy(syscall32_page, syscall32_syscall,
syscall32_syscall_end - syscall32_syscall);
}
return 0; return 0;
} }
__initcall(init_syscall32); __initcall(init_syscall32);
void __init syscall32_cpu_init(void)
{
if (use_sysenter < 0)
use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, 0, 0);
wrmsrl(MSR_IA32_SYSENTER_EIP, ia32_sysenter_target);
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
/* /*
* Code for the vsyscall page. This version uses the syscall instruction. * Common code for the sigreturn entry points on the vsyscall page.
* This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
* to enter the kernel.
* This file is #include'd by vsyscall-*.S to define them after the
* vsyscall entry point. The addresses we get for these entry points
* by doing ".balign 32" must match in both versions of the page.
*/ */
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
.Lpush_ebp:
movl %ecx, %ebp
syscall
popl %ebp
.Lpop_ebp:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .text.sigreturn,"ax" .section .text.sigreturn,"ax"
.balign 32 .balign 32
.globl __kernel_sigreturn .globl __kernel_sigreturn
...@@ -29,7 +15,7 @@ __kernel_sigreturn: ...@@ -29,7 +15,7 @@ __kernel_sigreturn:
.LSTART_sigreturn: .LSTART_sigreturn:
popl %eax popl %eax
movl $__NR_ia32_sigreturn, %eax movl $__NR_ia32_sigreturn, %eax
syscall SYSCALL_ENTER_KERNEL
.LEND_sigreturn: .LEND_sigreturn:
.size __kernel_sigreturn,.-.LSTART_sigreturn .size __kernel_sigreturn,.-.LSTART_sigreturn
...@@ -40,49 +26,11 @@ __kernel_sigreturn: ...@@ -40,49 +26,11 @@ __kernel_sigreturn:
__kernel_rt_sigreturn: __kernel_rt_sigreturn:
.LSTART_rt_sigreturn: .LSTART_rt_sigreturn:
movl $__NR_ia32_rt_sigreturn, %eax movl $__NR_ia32_rt_sigreturn, %eax
syscall SYSCALL_ENTER_KERNEL
.LEND_rt_sigreturn: .LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
.section .eh_frame,"a",@progbits .section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 4
.align 4
.LENDFDE1:
.long .LENDFDE2-.LSTARTFDE2 /* Length FDE */ .long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
.LSTARTFDE2: .LSTARTFDE2:
.long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */ .long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */
......
/*
* Code for the vsyscall page. This version uses the syscall instruction.
*/
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
#include <asm/segment.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
.Lpush_ebp:
movl %ecx, %ebp
syscall
movl $__USER32_DS, %ecx
movl %ecx, %ss
movl %ebp, %ecx
popl %ebp
.Lpop_ebp:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 4
.align 4
.LENDFDE1:
#define SYSCALL_ENTER_KERNEL syscall
#include "vsyscall-sigreturn.S"
/*
* Code for the vsyscall page. This version uses the sysenter instruction.
*/
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ecx
.Lpush_ecx:
push %edx
.Lpush_edx:
push %ebp
.Lenter_kernel:
movl %esp,%ebp
sysenter
.space 7,0x90
jmp .Lenter_kernel
/* 16: System call normal return point is here! */
pop %ebp
.Lpop_ebp:
pop %edx
.Lpop_edx:
pop %ecx
.Lpop_ecx:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpush_ecx-.LSTART_vsyscall
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpush_edx-.Lpush_ecx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lenter_kernel-.Lpush_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ebp-.Lenter_kernel
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x12 /* RA at offset 12 now */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_edx-.Lpop_ebp
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ecx-.Lpop_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x04 /* RA at offset 4 now */
.align 4
.LENDFDE1:
#define SYSCALL_ENTER_KERNEL int $0x80
#include "vsyscall-sigreturn.S"
...@@ -8,10 +8,9 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ...@@ -8,10 +8,9 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o warmreboot.o setup64.o bootflag.o e820.o reboot.o warmreboot.o
obj-y += mce.o obj-y += mce.o acpi/
obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_X86_CPUID) += cpuid.o
......
...@@ -78,6 +78,31 @@ __acpi_map_table ( ...@@ -78,6 +78,31 @@ __acpi_map_table (
return NULL; return NULL;
} }
#ifdef CONFIG_PCI_MMCONFIG
static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_mcfg *mcfg;
if (!phys_addr || !size)
return -EINVAL;
mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
if (!mcfg) {
printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
return -ENODEV;
}
if (mcfg->base_reserved) {
printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
return -ENODEV;
}
pci_mmcfg_base_addr = mcfg->base_address;
return 0;
}
#endif /* CONFIG_PCI_MMCONFIG */
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
...@@ -234,6 +259,24 @@ acpi_parse_nmi_src ( ...@@ -234,6 +259,24 @@ acpi_parse_nmi_src (
#endif /*CONFIG_X86_IO_APIC*/ #endif /*CONFIG_X86_IO_APIC*/
static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_sbf *sb;
if (!phys_addr || !size)
return -EINVAL;
sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
if (!sb) {
printk(KERN_WARNING PREFIX "Unable to map SBF\n");
return -ENODEV;
}
sbf_port = sb->sbf_cmos; /* Save CMOS port */
return 0;
}
#ifdef CONFIG_HPET_TIMER #ifdef CONFIG_HPET_TIMER
static int __init static int __init
acpi_parse_hpet ( acpi_parse_hpet (
...@@ -404,6 +447,8 @@ acpi_boot_init (void) ...@@ -404,6 +447,8 @@ acpi_boot_init (void)
return result; return result;
} }
(void) acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
result = acpi_blacklisted(); result = acpi_blacklisted();
if (result) { if (result) {
printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
...@@ -550,6 +595,12 @@ acpi_boot_init (void) ...@@ -550,6 +595,12 @@ acpi_boot_init (void)
printk("ACPI: no HPET table found (%d).\n", result); printk("ACPI: no HPET table found (%d).\n", result);
#endif #endif
#ifdef CONFIG_PCI_MMCONFIG
result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
if (result)
printk(KERN_ERR PREFIX "Error %d parsing MCFG\n", result);
#endif
return 0; return 0;
} }
......
...@@ -226,7 +226,7 @@ sysret_careful: ...@@ -226,7 +226,7 @@ sysret_careful:
/* Handle a signal */ /* Handle a signal */
sysret_signal: sysret_signal:
sti sti
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME),%edx testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz 1f jz 1f
/* Really a signal */ /* Really a signal */
...@@ -307,7 +307,7 @@ int_very_careful: ...@@ -307,7 +307,7 @@ int_very_careful:
jmp int_restore_rest jmp int_restore_rest
int_signal: int_signal:
testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING),%edx testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
jz 1f jz 1f
movq %rsp,%rdi # &ptregs -> arg1 movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2 xorl %esi,%esi # oldset -> arg2
...@@ -489,7 +489,7 @@ retint_careful: ...@@ -489,7 +489,7 @@ retint_careful:
jmp retint_check jmp retint_check
retint_signal: retint_signal:
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME),%edx testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz retint_swapgs jz retint_swapgs
sti sti
SAVE_REST SAVE_REST
...@@ -572,6 +572,24 @@ ENTRY(spurious_interrupt) ...@@ -572,6 +572,24 @@ ENTRY(spurious_interrupt)
jmp error_entry jmp error_entry
.endm .endm
/* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */
.macro paranoidentry sym
SAVE_ALL
cld
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
testl %edx,%edx
js 1f
swapgs
xorl %ebx,%ebx
1: movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
call \sym
.endm
/* /*
* Exception entry point. This expects an error code/orig_rax on the stack * Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax. * and the exception handler in %rax.
...@@ -625,6 +643,7 @@ error_sti: ...@@ -625,6 +643,7 @@ error_sti:
movq ORIG_RAX(%rsp),%rsi /* get error code */ movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp) movq $-1,ORIG_RAX(%rsp)
call *%rax call *%rax
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
error_exit: error_exit:
movl %ebx,%eax movl %ebx,%eax
RESTORE_REST RESTORE_REST
...@@ -776,48 +795,59 @@ ENTRY(simd_coprocessor_error) ...@@ -776,48 +795,59 @@ ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error zeroentry do_simd_coprocessor_error
ENTRY(device_not_available) ENTRY(device_not_available)
CFI_STARTPROC zeroentry math_state_restore
pushq $-1 #error code
SAVE_ALL
movl $1,%ebx
testl $3,CS(%rsp)
je 1f
xorl %ebx,%ebx
swapgs
1: movq %cr0,%rax
leaq math_state_restore(%rip),%rcx
leaq math_emulate(%rip),%rdx
testl $0x4,%eax
cmoveq %rcx,%rdx
call *%rdx
jmp error_exit
CFI_ENDPROC
/* runs on exception stack */
ENTRY(debug) ENTRY(debug)
zeroentry do_debug CFI_STARTPROC
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
paranoid_stack_switch:
testq %rax,%rax
jz paranoid_exit
/* switch back to process stack to restore the state ptrace touched */
movq %rax,%rsp
jmp paranoid_exit
CFI_ENDPROC
/* runs on exception stack */
ENTRY(nmi) ENTRY(nmi)
CFI_STARTPROC CFI_STARTPROC
pushq $-1 pushq $-1
SAVE_ALL CFI_ADJUST_CFA_OFFSET 8
/* NMI could happen inside the critical section of a swapgs, paranoidentry do_nmi
so it is needed to use this expensive way to check. */ /* ebx: no swapgs flag */
movl $MSR_GS_BASE,%ecx paranoid_exit:
rdmsr testl $3,CS(%rsp)
xorl %ebx,%ebx jnz paranoid_userspace
testl %edx,%edx testl %ebx,%ebx /* swapgs needed? */
js 1f jnz paranoid_restore
swapgs paranoid_swapgs:
movl $1,%ebx
1: movq %rsp,%rdi # regs -> arg1
call do_nmi
/* XXX: should do preemption checks here */
cli cli
testl %ebx,%ebx
jz 2f
swapgs swapgs
2: RESTORE_ALL 8 paranoid_restore:
RESTORE_ALL 8
iretq iretq
paranoid_userspace:
cli
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%edx
testl $_TIF_NEED_RESCHED,%edx
jnz paranoid_resched
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jnz paranoid_signal
jmp paranoid_swapgs
paranoid_resched:
sti
call schedule
jmp paranoid_exit
paranoid_signal:
sti
xorl %esi,%esi /* oldset */
movq %rsp,%rdi /* &pt_regs */
call do_notify_resume
jmp paranoid_exit
CFI_ENDPROC CFI_ENDPROC
ENTRY(int3) ENTRY(int3)
...@@ -838,8 +868,10 @@ ENTRY(coprocessor_segment_overrun) ...@@ -838,8 +868,10 @@ ENTRY(coprocessor_segment_overrun)
ENTRY(reserved) ENTRY(reserved)
zeroentry do_reserved zeroentry do_reserved
/* runs on exception stack */
ENTRY(double_fault) ENTRY(double_fault)
errorentry do_double_fault paranoidentry do_double_fault
jmp paranoid_stack_switch
ENTRY(invalid_TSS) ENTRY(invalid_TSS)
errorentry do_invalid_TSS errorentry do_invalid_TSS
...@@ -847,8 +879,10 @@ ENTRY(invalid_TSS) ...@@ -847,8 +879,10 @@ ENTRY(invalid_TSS)
ENTRY(segment_not_present) ENTRY(segment_not_present)
errorentry do_segment_not_present errorentry do_segment_not_present
/* runs on exception stack */
ENTRY(stack_segment) ENTRY(stack_segment)
errorentry do_stack_segment paranoidentry do_stack_segment
jmp paranoid_stack_switch
ENTRY(general_protection) ENTRY(general_protection)
errorentry do_general_protection errorentry do_general_protection
...@@ -862,8 +896,14 @@ ENTRY(divide_error) ...@@ -862,8 +896,14 @@ ENTRY(divide_error)
ENTRY(spurious_interrupt_bug) ENTRY(spurious_interrupt_bug)
zeroentry do_spurious_interrupt_bug zeroentry do_spurious_interrupt_bug
/* runs on exception stack */
ENTRY(machine_check) ENTRY(machine_check)
zeroentry do_machine_check CFI_STARTPROC
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
jmp paranoid_exit
CFI_ENDPROC
ENTRY(call_debug) ENTRY(call_debug)
zeroentry do_call_debug zeroentry do_call_debug
......
...@@ -95,7 +95,5 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs regs) ...@@ -95,7 +95,5 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs regs)
return -EPERM; return -EPERM;
} }
regs.eflags = (regs.eflags &~ 0x3000UL) | (level << 12); regs.eflags = (regs.eflags &~ 0x3000UL) | (level << 12);
/* Make sure we return the long way (not sysenter) */
set_thread_flag(TIF_IRET);
return 0; return 0;
} }
...@@ -880,6 +880,7 @@ extern FADT_DESCRIPTOR acpi_fadt; ...@@ -880,6 +880,7 @@ extern FADT_DESCRIPTOR acpi_fadt;
void __init mp_config_ioapic_for_sci(int irq) void __init mp_config_ioapic_for_sci(int irq)
{ {
#ifdef CONFIG_ACPI_INTERPRETER
int ioapic; int ioapic;
int ioapic_pin; int ioapic_pin;
struct acpi_table_madt *madt; struct acpi_table_madt *madt;
...@@ -939,6 +940,7 @@ void __init mp_config_ioapic_for_sci(int irq) ...@@ -939,6 +940,7 @@ void __init mp_config_ioapic_for_sci(int irq)
*/ */
io_apic_set_pci_routing(ioapic, ioapic_pin, irq, io_apic_set_pci_routing(ioapic, ioapic_pin, irq,
(flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1)); (flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1));
#endif
} }
#ifdef CONFIG_ACPI_PCI #ifdef CONFIG_ACPI_PCI
......
...@@ -50,6 +50,12 @@ int force_iommu = 0; ...@@ -50,6 +50,12 @@ int force_iommu = 0;
#endif #endif
int iommu_merge = 0; int iommu_merge = 0;
int iommu_sac_force = 0; int iommu_sac_force = 0;
/* If this is disabled the IOMMU will use an optimized flushing strategy
of only flushing when an mapping is reused. With it true the GART is flushed
for every mapping. Problem is that doing the lazy flush seems to trigger
bugs with some popular PCI cards, in particular 3ware (but has been also
also seen with Qlogic at least). */
int iommu_fullflush = 1; int iommu_fullflush = 1;
#define MAX_NB 8 #define MAX_NB 8
......
...@@ -53,7 +53,7 @@ asmlinkage extern void ret_from_fork(void); ...@@ -53,7 +53,7 @@ asmlinkage extern void ret_from_fork(void);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
int hlt_counter; atomic_t hlt_counter = ATOMIC_INIT(0);
/* /*
* Powermanagement idle function, if any.. * Powermanagement idle function, if any..
...@@ -62,14 +62,14 @@ void (*pm_idle)(void); ...@@ -62,14 +62,14 @@ void (*pm_idle)(void);
void disable_hlt(void) void disable_hlt(void)
{ {
hlt_counter++; atomic_inc(&hlt_counter);
} }
EXPORT_SYMBOL(disable_hlt); EXPORT_SYMBOL(disable_hlt);
void enable_hlt(void) void enable_hlt(void)
{ {
hlt_counter--; atomic_dec(&hlt_counter);
} }
EXPORT_SYMBOL(enable_hlt); EXPORT_SYMBOL(enable_hlt);
...@@ -80,7 +80,7 @@ EXPORT_SYMBOL(enable_hlt); ...@@ -80,7 +80,7 @@ EXPORT_SYMBOL(enable_hlt);
*/ */
void default_idle(void) void default_idle(void)
{ {
if (!hlt_counter) { if (!atomic_read(&hlt_counter)) {
local_irq_disable(); local_irq_disable();
if (!need_resched()) if (!need_resched())
safe_halt(); safe_halt();
......
...@@ -218,6 +218,11 @@ static __init void parse_cmdline_early (char ** cmdline_p) ...@@ -218,6 +218,11 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (!memcmp(from, "acpi=ht", 7)) { if (!memcmp(from, "acpi=ht", 7)) {
acpi_ht = 1; acpi_ht = 1;
} }
/* acpi=strict disables out-of-spec workarounds */
else if (!memcmp(from, "acpi=strict", 11)) {
acpi_strict = 1;
}
#endif #endif
if (!memcmp(from, "nolapic", 7) || if (!memcmp(from, "nolapic", 7) ||
...@@ -793,13 +798,12 @@ struct cpu_model_info { ...@@ -793,13 +798,12 @@ struct cpu_model_info {
char *model_names[16]; char *model_names[16];
}; };
/* /* Do some early cpuid on the boot CPU to get some parameter that are
* This does the hard work of actually picking apart the CPU stuff... needed before check_bugs. Everything advanced is in identify_cpu
*/ below. */
void __init identify_cpu(struct cpuinfo_x86 *c) void __init early_identify_cpu(struct cpuinfo_x86 *c)
{ {
int i; u32 tfms;
u32 xlvl, tfms;
c->loops_per_jiffy = loops_per_jiffy; c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1; c->x86_cache_size = -1;
...@@ -807,6 +811,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) ...@@ -807,6 +811,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */
c->x86_clflush_size = 64;
memset(&c->x86_capability, 0, sizeof c->x86_capability); memset(&c->x86_capability, 0, sizeof c->x86_capability);
/* Get vendor name */ /* Get vendor name */
...@@ -816,6 +821,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) ...@@ -816,6 +821,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
(int *)&c->x86_vendor_id[4]); (int *)&c->x86_vendor_id[4]);
get_cpu_vendor(c); get_cpu_vendor(c);
/* Initialize the standard set of capabilities */ /* Initialize the standard set of capabilities */
/* Note that the vendor-specific code below might override */ /* Note that the vendor-specific code below might override */
...@@ -837,6 +843,17 @@ void __init identify_cpu(struct cpuinfo_x86 *c) ...@@ -837,6 +843,17 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
/* Have CPUID level 0 only - unheard of */ /* Have CPUID level 0 only - unheard of */
c->x86 = 4; c->x86 = 4;
} }
}
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
void __init identify_cpu(struct cpuinfo_x86 *c)
{
int i;
u32 xlvl;
early_identify_cpu(c);
/* AMD-defined flags: level 0x80000001 */ /* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000); xlvl = cpuid_eax(0x80000000);
...@@ -854,7 +871,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c) ...@@ -854,7 +871,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[2] = cpuid_edx(0x80860001); c->x86_capability[2] = cpuid_edx(0x80860001);
} }
/* /*
* Vendor-specific initialization. In this section we * Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are * canonicalize the feature flags, meaning if there are
......
...@@ -202,7 +202,7 @@ void __init syscall_init(void) ...@@ -202,7 +202,7 @@ void __init syscall_init(void)
wrmsrl(MSR_LSTAR, system_call); wrmsrl(MSR_LSTAR, system_call);
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, ia32_cstar_target); syscall32_cpu_init ();
#endif #endif
/* Flags to clear on syscall */ /* Flags to clear on syscall */
...@@ -274,6 +274,9 @@ void __init cpu_init (void) ...@@ -274,6 +274,9 @@ void __init cpu_init (void)
asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax"); asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
if (cpu == 0)
early_identify_cpu(&boot_cpu_data);
syscall_init(); syscall_init();
wrmsrl(MSR_FS_BASE, 0); wrmsrl(MSR_FS_BASE, 0);
...@@ -287,7 +290,8 @@ void __init cpu_init (void) ...@@ -287,7 +290,8 @@ void __init cpu_init (void)
*/ */
for (v = 0; v < N_EXCEPTION_STACKS; v++) { for (v = 0; v < N_EXCEPTION_STACKS; v++) {
if (cpu) { if (cpu) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, 0); estacks = (char *)__get_free_pages(GFP_ATOMIC,
EXCEPTION_STACK_ORDER);
if (!estacks) if (!estacks)
panic("Cannot allocate exception stack %ld %d\n", panic("Cannot allocate exception stack %ld %d\n",
v, cpu); v, cpu);
......
...@@ -55,11 +55,16 @@ ...@@ -55,11 +55,16 @@
/* Number of siblings per CPU package */ /* Number of siblings per CPU package */
int smp_num_siblings = 1; int smp_num_siblings = 1;
int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ char phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
/* Bitmask of currently online CPUs */ /* Bitmask of currently online CPUs */
cpumask_t cpu_online_map; cpumask_t cpu_online_map;
/* which CPU (physical APIC ID) maps to which logical CPU number */
volatile char x86_apicid_to_cpu[NR_CPUS];
/* which logical CPU number maps to which CPU (physical APIC ID) */
volatile char x86_cpu_to_apicid[NR_CPUS];
static cpumask_t cpu_callin_map; static cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map; cpumask_t cpu_callout_map;
static cpumask_t smp_commenced_mask; static cpumask_t smp_commenced_mask;
...@@ -70,7 +75,7 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; ...@@ -70,7 +75,7 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
/* Set when the idlers are all forked */ /* Set when the idlers are all forked */
int smp_threads_ready; int smp_threads_ready;
int cpu_sibling_map[NR_CPUS] __cacheline_aligned; char cpu_sibling_map[NR_CPUS] __cacheline_aligned;
/* /*
* Trampoline 80x86 program as an array. * Trampoline 80x86 program as an array.
...@@ -574,6 +579,9 @@ static void __init do_boot_cpu (int apicid) ...@@ -574,6 +579,9 @@ static void __init do_boot_cpu (int apicid)
if (IS_ERR(idle)) if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu); panic("failed fork for CPU %d", cpu);
wake_up_forked_process(idle); wake_up_forked_process(idle);
x86_cpu_to_apicid[cpu] = apicid;
x86_apicid_to_cpu[apicid] = cpu;
/* /*
* We remove it from the pidhash and the runqueue * We remove it from the pidhash and the runqueue
...@@ -885,7 +893,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -885,7 +893,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
break; break;
} }
} }
if (cpu_sibling_map[cpu] == NO_PROC_ID) { if (cpu_sibling_map[cpu] == (char)NO_PROC_ID) {
smp_num_siblings = 1; smp_num_siblings = 1;
printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
} }
......
...@@ -351,24 +351,19 @@ void oops_end(void) ...@@ -351,24 +351,19 @@ void oops_end(void)
void __die(const char * str, struct pt_regs * regs, long err) void __die(const char * str, struct pt_regs * regs, long err)
{ {
int nl = 0;
static int die_counter; static int die_counter;
printk(KERN_EMERG "%s: %04lx [%u]\n", str, err & 0xffff,++die_counter); printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
printk("PREEMPT "); printk("PREEMPT ");
nl = 1;
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
printk("SMP "); printk("SMP ");
nl = 1;
#endif #endif
#ifdef CONFIG_DEBUG_PAGEALLOC #ifdef CONFIG_DEBUG_PAGEALLOC
printk("DEBUG_PAGEALLOC"); printk("DEBUG_PAGEALLOC");
nl = 1;
#endif #endif
if (nl)
printk("\n"); printk("\n");
notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
show_registers(regs); show_registers(regs);
/* Executive summary in case the oops scrolled away */ /* Executive summary in case the oops scrolled away */
printk("RIP "); printk("RIP ");
...@@ -475,14 +470,27 @@ DO_ERROR( 4, SIGSEGV, "overflow", overflow) ...@@ -475,14 +470,27 @@ DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2()) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
DO_ERROR(18, SIGSEGV, "reserved", reserved) DO_ERROR(18, SIGSEGV, "reserved", reserved)
#define DO_ERROR_STACK(trapnr, signr, str, name) \
asmlinkage unsigned long do_##name(struct pt_regs * regs, long error_code) \
{ \
struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
return 0; \
if (regs->cs & 3) \
memcpy(pr, regs, sizeof(struct pt_regs)); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
return (regs->cs & 3) ? (unsigned long)pr : 0; \
}
DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{ {
conditional_sti(regs); conditional_sti(regs);
...@@ -596,12 +604,18 @@ asmlinkage void default_do_nmi(struct pt_regs * regs) ...@@ -596,12 +604,18 @@ asmlinkage void default_do_nmi(struct pt_regs * regs)
inb(0x71); /* dummy */ inb(0x71); /* dummy */
} }
asmlinkage void do_debug(struct pt_regs * regs, long error_code) /* runs on IST stack. */
asmlinkage unsigned long do_debug(struct pt_regs * regs, unsigned long error_code)
{ {
struct pt_regs *processregs;
unsigned long condition; unsigned long condition;
struct task_struct *tsk = current; struct task_struct *tsk = current;
siginfo_t info; siginfo_t info;
processregs = (struct pt_regs *)(current->thread.rsp0)-1;
if (regs->cs & 3)
memcpy(processregs, regs, sizeof(struct pt_regs));
#ifdef CONFIG_CHECKING #ifdef CONFIG_CHECKING
{ {
/* RED-PEN interaction with debugger - could destroy gs */ /* RED-PEN interaction with debugger - could destroy gs */
...@@ -658,17 +672,21 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) ...@@ -658,17 +672,21 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
force_sig_info(SIGTRAP, &info, tsk); force_sig_info(SIGTRAP, &info, tsk);
clear_dr7: clear_dr7:
asm volatile("movq %0,%%db7"::"r"(0UL)); asm volatile("movq %0,%%db7"::"r"(0UL));
notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP); notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
return; out:
return (regs->cs & 3) ? (unsigned long)processregs : 0;
clear_TF_reenable: clear_TF_reenable:
printk("clear_tf_reenable\n");
set_tsk_thread_flag(tsk, TIF_SINGLESTEP); set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
clear_TF: clear_TF:
/* RED-PEN could cause spurious errors */ /* RED-PEN could cause spurious errors */
if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD) if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
!= NOTIFY_BAD)
regs->eflags &= ~TF_MASK; regs->eflags &= ~TF_MASK;
return;
goto out;
} }
/* /*
...@@ -730,7 +748,7 @@ void math_error(void *rip) ...@@ -730,7 +748,7 @@ void math_error(void *rip)
force_sig_info(SIGFPE, &info, task); force_sig_info(SIGFPE, &info, task);
} }
asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) asmlinkage void do_coprocessor_error(struct pt_regs * regs)
{ {
conditional_sti(regs); conditional_sti(regs);
math_error((void *)regs->rip); math_error((void *)regs->rip);
...@@ -789,8 +807,7 @@ static inline void simd_math_error(void *rip) ...@@ -789,8 +807,7 @@ static inline void simd_math_error(void *rip)
force_sig_info(SIGFPE, &info, task); force_sig_info(SIGFPE, &info, task);
} }
asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs, asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs)
long error_code)
{ {
conditional_sti(regs); conditional_sti(regs);
simd_math_error((void *)regs->rip); simd_math_error((void *)regs->rip);
...@@ -818,11 +835,6 @@ asmlinkage void math_state_restore(void) ...@@ -818,11 +835,6 @@ asmlinkage void math_state_restore(void)
me->thread_info->status |= TS_USEDFPU; me->thread_info->status |= TS_USEDFPU;
} }
asmlinkage void math_emulate(void)
{
BUG();
}
void do_call_debug(struct pt_regs *regs) void do_call_debug(struct pt_regs *regs)
{ {
notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
...@@ -831,7 +843,7 @@ void do_call_debug(struct pt_regs *regs) ...@@ -831,7 +843,7 @@ void do_call_debug(struct pt_regs *regs)
void __init trap_init(void) void __init trap_init(void)
{ {
set_intr_gate(0,&divide_error); set_intr_gate(0,&divide_error);
set_intr_gate(1,&debug); set_intr_gate_ist(1,&debug,DEBUG_STACK);
set_intr_gate_ist(2,&nmi,NMI_STACK); set_intr_gate_ist(2,&nmi,NMI_STACK);
set_system_gate(3,&int3); /* int3-5 can be called from all */ set_system_gate(3,&int3); /* int3-5 can be called from all */
set_system_gate(4,&overflow); set_system_gate(4,&overflow);
...@@ -848,7 +860,7 @@ void __init trap_init(void) ...@@ -848,7 +860,7 @@ void __init trap_init(void)
set_intr_gate(15,&spurious_interrupt_bug); set_intr_gate(15,&spurious_interrupt_bug);
set_intr_gate(16,&coprocessor_error); set_intr_gate(16,&coprocessor_error);
set_intr_gate(17,&alignment_check); set_intr_gate(17,&alignment_check);
set_intr_gate(18,&machine_check); set_intr_gate_ist(18,&machine_check, MCE_STACK);
set_intr_gate(19,&simd_coprocessor_error); set_intr_gate(19,&simd_coprocessor_error);
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
......
...@@ -31,9 +31,6 @@ ...@@ -31,9 +31,6 @@
* broken programs will segfault and there's no security risk until we choose to * broken programs will segfault and there's no security risk until we choose to
* fix it. * fix it.
* *
* Add HPET support (port from 2.4). Still needed?
* Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
*
* These are not urgent things that we need to address only before shipping the first * These are not urgent things that we need to address only before shipping the first
* production binary kernels. * production binary kernels.
*/ */
...@@ -89,7 +86,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) ...@@ -89,7 +86,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
if (t < __vxtime.last_tsc) t = __vxtime.last_tsc; if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
usec += ((t - __vxtime.last_tsc) * usec += ((t - __vxtime.last_tsc) *
__vxtime.tsc_quot) >> 32; __vxtime.tsc_quot) >> 32;
/* See comment in x86_64 do_gettimeopfday. */ /* See comment in x86_64 do_gettimeofday. */
} else { } else {
usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) - usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
__vxtime.last) * __vxtime.quot) >> 32; __vxtime.last) * __vxtime.quot) >> 32;
...@@ -106,6 +103,7 @@ static force_inline void do_get_tz(struct timezone * tz) ...@@ -106,6 +103,7 @@ static force_inline void do_get_tz(struct timezone * tz)
*tz = __sys_tz; *tz = __sys_tz;
} }
static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{ {
int ret; int ret;
...@@ -115,6 +113,15 @@ static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) ...@@ -115,6 +113,15 @@ static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
return ret; return ret;
} }
static force_inline long time_syscall(long *t)
{
long secs;
asm volatile("syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
}
static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{ {
if (unlikely(!__sysctl_vsyscall)) if (unlikely(!__sysctl_vsyscall))
...@@ -126,16 +133,15 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz ...@@ -126,16 +133,15 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz
return 0; return 0;
} }
static time_t __vsyscall(1) vtime(time_t * t) /* This will break when the xtime seconds get inaccurate, but that is
* unlikely */
static time_t __vsyscall(1) vtime(time_t *t)
{ {
struct timeval tv;
if (unlikely(!__sysctl_vsyscall)) if (unlikely(!__sysctl_vsyscall))
gettimeofday(&tv, NULL); return time_syscall(t);
else else if (t)
do_vgettimeofday(&tv); *t = __xtime.tv_sec;
if (t) return __xtime.tv_sec;
*t = tv.tv_sec;
return tv.tv_sec;
} }
static long __vsyscall(2) venosys_0(void) static long __vsyscall(2) venosys_0(void)
......
...@@ -225,3 +225,6 @@ EXPORT_SYMBOL_GPL(flush_tlb_all); ...@@ -225,3 +225,6 @@ EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif #endif
EXPORT_SYMBOL(sys_ioctl); EXPORT_SYMBOL(sys_ioctl);
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
...@@ -220,10 +220,14 @@ csum_partial_copy_generic: ...@@ -220,10 +220,14 @@ csum_partial_copy_generic:
/* Exception handlers. Very simple, zeroing is done in the wrappers */ /* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source: .Lbad_source:
movq (%rsp),%rax movq (%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax) movl $-EFAULT,(%rax)
jmp .Lende jmp .Lende
.Lbad_dest: .Lbad_dest:
movq 8(%rsp),%rax movq 8(%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax) movl $-EFAULT,(%rax)
jmp .Lende jmp .Lende
...@@ -11,7 +11,3 @@ void *memcpy_fromio(void *dst,const void*src,unsigned len) ...@@ -11,7 +11,3 @@ void *memcpy_fromio(void *dst,const void*src,unsigned len)
{ {
return __inline_memcpy(dst,__io_virt(src),len); return __inline_memcpy(dst,__io_virt(src),len);
} }
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
...@@ -280,15 +280,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -280,15 +280,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(in_atomic() || !mm)) if (unlikely(in_atomic() || !mm))
goto bad_area_nosemaphore; goto bad_area_nosemaphore;
/* Work around K8 erratum #100
K8 in compat mode occasionally jumps to illegal addresses >4GB.
We catch this here in the page fault handler because these
addresses are not reachable. Just detect this case and return.
Any code segment in LDT is compatibility mode. */
if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
(address >> 32))
return;
again: again:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
...@@ -373,6 +364,16 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -373,6 +364,16 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (is_prefetch(regs, address)) if (is_prefetch(regs, address))
return; return;
/* Work around K8 erratum #100 K8 in compat mode
occasionally jumps to illegal addresses >4GB. We
catch this here in the page fault handler because
these addresses are not reachable. Just detect this
case and return. Any code segment in LDT is
compatibility mode. */
if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
(address >> 32))
return;
if (exception_trace && !unhandled_signal(tsk, SIGSEGV)) { if (exception_trace && !unhandled_signal(tsk, SIGSEGV)) {
printk(KERN_INFO printk(KERN_INFO
"%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", "%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
......
...@@ -37,7 +37,9 @@ ...@@ -37,7 +37,9 @@
#include <asm/proto.h> #include <asm/proto.h>
#include <asm/smp.h> #include <asm/smp.h>
#ifndef Dprintk
#define Dprintk(x...) #define Dprintk(x...)
#endif
extern char _stext[]; extern char _stext[];
...@@ -577,3 +579,32 @@ static __init int x8664_sysctl_init(void) ...@@ -577,3 +579,32 @@ static __init int x8664_sysctl_init(void)
} }
__initcall(x8664_sysctl_init); __initcall(x8664_sysctl_init);
#endif #endif
/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two
different ones: one for 32bit and one for 64bit. Use the appropiate
for the target task. */
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_START,
.vm_end = VSYSCALL_END,
.vm_page_prot = PAGE_READONLY
};
static struct vm_area_struct gate32_vma = {
.vm_start = VSYSCALL32_BASE,
.vm_end = VSYSCALL32_END,
.vm_page_prot = PAGE_READONLY
};
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
return test_tsk_thread_flag(tsk, TIF_IA32) ? &gate32_vma : &gate_vma;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
{
struct vm_area_struct *vma = &gate_vma;
if (test_tsk_thread_flag(task, TIF_IA32))
vma = &gate32_vma;
return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
...@@ -14,7 +14,9 @@ ...@@ -14,7 +14,9 @@
#include <asm/dma.h> #include <asm/dma.h>
#include <asm/numa.h> #include <asm/numa.h>
#ifndef Dprintk
#define Dprintk(x...) #define Dprintk(x...)
#endif
struct pglist_data *node_data[MAXNODE]; struct pglist_data *node_data[MAXNODE];
bootmem_data_t plat_node_bdata[MAX_NUMNODES]; bootmem_data_t plat_node_bdata[MAX_NUMNODES];
......
# #
# Makefile for X86_64 specific PCI routines # Makefile for X86_64 specific PCI routines
# #
# Reuse the i386 PCI subsystem using symlinks # Reuse the i386 PCI subsystem
# #
CFLAGS += -I arch/i386/pci
obj-y := i386.o obj-y := i386.o
obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-$(CONFIG_PCI_DIRECT)+= direct.o
obj-y += fixup.o obj-y += fixup.o
obj-$(CONFIG_ACPI_PCI) += acpi.o obj-$(CONFIG_ACPI_PCI) += acpi.o
obj-y += legacy.o irq.o common.o obj-y += legacy.o irq.o common.o
# mmconfig has a 64bit special
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
$(obj)/direct.c: $(obj)/pci.h direct-y += ../../i386/pci/direct.o
@ln -sf ../../i386/pci/direct.c $(obj)/direct.c acpi-y += ../../i386/pci/acpi.o
$(obj)/legacy.c: $(obj)/pci.h legacy-y += ../../i386/pci/legacy.o
@ln -sf ../../i386/pci/legacy.c $(obj)/legacy.c irq-y += ../../i386/pci/irq.o
$(obj)/common.c: $(obj)/pci.h common-y += ../../i386/pci/common.o
@ln -sf ../../i386/pci/common.c $(obj)/common.c fixup-y += ../../i386/pci/fixup.o
$(obj)/acpi.c: $(obj)/pci.h i386-y += ../../i386/pci/i386.o
@ln -sf ../../i386/pci/acpi.c $(obj)/acpi.c
$(obj)/pci.h:
@ln -sf ../../i386/pci/pci.h $(obj)/pci.h
$(obj)/irq.c: $(obj)/pci.h
@ln -sf ../../i386/pci/irq.c $(obj)/irq.c
$(obj)/fixup.c: $(obj)/pci.h
@ln -sf ../../i386/pci/fixup.c $(obj)/fixup.c
$(obj)/i386.c: $(obj)/pci.h
@ln -sf ../../i386/pci/i386.c $(obj)/i386.c
clean-files += i386.c legacy.c fixup.c acpi.c irq.c pci.h common.c direct.c
/*
* mmconfig.c - Low-level direct PCI config space access via MMCONFIG
*
* This is an 64bit optimized version that always keeps the full mmconfig
* space mapped. This allows lockless config space operation.
*/
#include <linux/pci.h>
#include <linux/init.h>
#include "pci.h"
#define MMCONFIG_APER_SIZE (256*1024*1024)
/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
u32 pci_mmcfg_base_addr;
/* Static virtual mapping of the MMCONFIG aperture */
char *pci_mmcfg_virt;
static inline char *pci_dev_base(int bus, int devfn)
{
return pci_mmcfg_virt + ((bus << 20) | (devfn << 12));
}
static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
{
char *addr = pci_dev_base(bus, devfn);
if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
switch (len) {
case 1:
*value = readb(addr + reg);
break;
case 2:
*value = readw(addr + reg);
break;
case 4:
*value = readl(addr + reg);
break;
}
return 0;
}
static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
{
char *addr = pci_dev_base(bus,devfn);
if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
switch (len) {
case 1:
writeb(value, addr + reg);
break;
case 2:
writew(value, addr + reg);
break;
case 4:
writel(value, addr + reg);
break;
}
/* Dummy read to flush PCI write */
readl(addr);
return 0;
}
static struct pci_raw_ops pci_mmcfg = {
.read = pci_mmcfg_read,
.write = pci_mmcfg_write,
};
static int __init pci_mmcfg_init(void)
{
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return 0;
if (!pci_mmcfg_base_addr)
return 0;
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = ioremap_nocache(pci_mmcfg_base_addr, MMCONFIG_APER_SIZE);
if (!pci_mmcfg_virt) {
printk("PCI: Cannot map mmconfig aperture\n");
return 0;
}
printk(KERN_INFO "PCI: Using MMCONFIG at %lx\n", pci_mmcfg_base_addr);
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
return 0;
}
arch_initcall(pci_mmcfg_init);
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#define ARGOFFSET R11 #define ARGOFFSET R11
#define SWFRAME ORIG_RAX #define SWFRAME ORIG_RAX
.macro SAVE_ARGS addskip=0,norcx=0 .macro SAVE_ARGS addskip=0,norcx=0,nor891011=0
subq $9*8+\addskip,%rsp subq $9*8+\addskip,%rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip CFI_ADJUST_CFA_OFFSET 9*8+\addskip
movq %rdi,8*8(%rsp) movq %rdi,8*8(%rsp)
...@@ -47,6 +47,8 @@ ...@@ -47,6 +47,8 @@
.endif .endif
movq %rax,4*8(%rsp) movq %rax,4*8(%rsp)
CFI_OFFSET rax,4*8-(9*8+\addskip) CFI_OFFSET rax,4*8-(9*8+\addskip)
.if \nor891011
.else
movq %r8,3*8(%rsp) movq %r8,3*8(%rsp)
CFI_OFFSET r8,3*8-(9*8+\addskip) CFI_OFFSET r8,3*8-(9*8+\addskip)
movq %r9,2*8(%rsp) movq %r9,2*8(%rsp)
...@@ -55,17 +57,21 @@ ...@@ -55,17 +57,21 @@
CFI_OFFSET r10,1*8-(9*8+\addskip) CFI_OFFSET r10,1*8-(9*8+\addskip)
movq %r11,(%rsp) movq %r11,(%rsp)
CFI_OFFSET r11,-(9*8+\addskip) CFI_OFFSET r11,-(9*8+\addskip)
.endif
.endm .endm
#define ARG_SKIP 9*8 #define ARG_SKIP 9*8
.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0 .macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0
.if \skipr11 .if \skipr11
.else .else
movq (%rsp),%r11 movq (%rsp),%r11
.endif .endif
.if \skipr8910
.else
movq 1*8(%rsp),%r10 movq 1*8(%rsp),%r10
movq 2*8(%rsp),%r9 movq 2*8(%rsp),%r9
movq 3*8(%rsp),%r8 movq 3*8(%rsp),%r8
.endif
.if \skiprax .if \skiprax
.else .else
movq 4*8(%rsp),%rax movq 4*8(%rsp),%rax
...@@ -74,7 +80,10 @@ ...@@ -74,7 +80,10 @@
.else .else
movq 5*8(%rsp),%rcx movq 5*8(%rsp),%rcx
.endif .endif
.if \skiprdx
.else
movq 6*8(%rsp),%rdx movq 6*8(%rsp),%rdx
.endif
movq 7*8(%rsp),%rsi movq 7*8(%rsp),%rsi
movq 8*8(%rsp),%rdi movq 8*8(%rsp),%rdi
.if ARG_SKIP+\addskip > 0 .if ARG_SKIP+\addskip > 0
......
...@@ -156,6 +156,10 @@ extern inline unsigned int cpuid_edx(unsigned int op) ...@@ -156,6 +156,10 @@ extern inline unsigned int cpuid_edx(unsigned int op)
#define MSR_MTRRcap 0x0fe #define MSR_MTRRcap 0x0fe
#define MSR_IA32_BBL_CR_CTL 0x119 #define MSR_IA32_BBL_CR_CTL 0x119
#define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_SYSENTER_ESP 0x175
#define MSR_IA32_SYSENTER_EIP 0x176
#define MSR_IA32_MCG_CAP 0x179 #define MSR_IA32_MCG_CAP 0x179
#define MSR_IA32_MCG_STATUS 0x17a #define MSR_IA32_MCG_STATUS 0x17a
#define MSR_IA32_MCG_CTL 0x17b #define MSR_IA32_MCG_CTL 0x17b
......
...@@ -137,6 +137,13 @@ extern __inline__ int get_order(unsigned long size) ...@@ -137,6 +137,13 @@ extern __inline__ int get_order(unsigned long size)
#define VM_STACK_DEFAULT_FLAGS \ #define VM_STACK_DEFAULT_FLAGS \
(test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags)
#define CONFIG_ARCH_GATE_AREA 1
#ifndef __ASSEMBLY__
struct task_struct;
struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
int in_gate_area(struct task_struct *task, unsigned long addr);
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -173,7 +173,7 @@ static inline void clear_in_cr4 (unsigned long mask) ...@@ -173,7 +173,7 @@ static inline void clear_in_cr4 (unsigned long mask)
* space during mmap's. * space during mmap's.
*/ */
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000) #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
#define TASK_UNMAPPED_32 (PAGE_ALIGN(0xc5000000)) #define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3) #define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3)
#define TASK_UNMAPPED_BASE \ #define TASK_UNMAPPED_BASE \
(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64) (test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
...@@ -262,7 +262,9 @@ struct thread_struct { ...@@ -262,7 +262,9 @@ struct thread_struct {
#define STACKFAULT_STACK 1 #define STACKFAULT_STACK 1
#define DOUBLEFAULT_STACK 2 #define DOUBLEFAULT_STACK 2
#define NMI_STACK 3 #define NMI_STACK 3
#define N_EXCEPTION_STACKS 3 /* hw limit: 7 */ #define DEBUG_STACK 4
#define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define EXCEPTION_STACK_ORDER 0 #define EXCEPTION_STACK_ORDER 0
...@@ -451,4 +453,6 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) ...@@ -451,4 +453,6 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
ti->task; \ ti->task; \
}) })
#define cache_line_size() (boot_cpu_data.x86_clflush_size)
#endif /* __ASM_X86_64_PROCESSOR_H */ #endif /* __ASM_X86_64_PROCESSOR_H */
...@@ -21,6 +21,7 @@ extern void syscall_init(void); ...@@ -21,6 +21,7 @@ extern void syscall_init(void);
extern void ia32_syscall(void); extern void ia32_syscall(void);
extern void ia32_cstar_target(void); extern void ia32_cstar_target(void);
extern void ia32_sysenter_target(void);
extern void calibrate_delay(void); extern void calibrate_delay(void);
extern void cpu_idle(void); extern void cpu_idle(void);
...@@ -37,6 +38,8 @@ extern int numa_setup(char *opt); ...@@ -37,6 +38,8 @@ extern int numa_setup(char *opt);
extern int setup_early_printk(char *); extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2))); extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
extern void early_identify_cpu(struct cpuinfo_x86 *c);
extern int k8_scan_nodes(unsigned long start, unsigned long end); extern int k8_scan_nodes(unsigned long start, unsigned long end);
extern int numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); extern int numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
...@@ -68,6 +71,7 @@ extern void show_regs(struct pt_regs * regs); ...@@ -68,6 +71,7 @@ extern void show_regs(struct pt_regs * regs);
extern int map_syscall32(struct mm_struct *mm, unsigned long address); extern int map_syscall32(struct mm_struct *mm, unsigned long address);
extern char *syscall32_page; extern char *syscall32_page;
extern void syscall32_cpu_init(void);
extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
......
...@@ -47,7 +47,7 @@ extern void smp_invalidate_rcv(void); /* Process an NMI */ ...@@ -47,7 +47,7 @@ extern void smp_invalidate_rcv(void); /* Process an NMI */
extern void (*mtrr_hook) (void); extern void (*mtrr_hook) (void);
extern void zap_low_mappings(void); extern void zap_low_mappings(void);
void smp_stop_cpu(void); void smp_stop_cpu(void);
extern int cpu_sibling_map[]; extern char cpu_sibling_map[];
#define SMP_TRAMPOLINE_BASE 0x6000 #define SMP_TRAMPOLINE_BASE 0x6000
...@@ -74,7 +74,15 @@ extern __inline int hard_smp_processor_id(void) ...@@ -74,7 +74,15 @@ extern __inline int hard_smp_processor_id(void)
return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
} }
#define safe_smp_processor_id() (disable_apic ? 0 : hard_smp_processor_id()) /*
* Some lowlevel functions might want to know about
* the real APIC ID <-> CPU # mapping.
* AK: why is this volatile?
*/
extern volatile char x86_apicid_to_cpu[NR_CPUS];
extern volatile char x86_cpu_to_apicid[NR_CPUS];
#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu[hard_smp_processor_id()])
#define cpu_online(cpu) cpu_isset(cpu, cpu_online_map) #define cpu_online(cpu) cpu_isset(cpu, cpu_online_map)
#endif /* !ASSEMBLY */ #endif /* !ASSEMBLY */
......
...@@ -82,7 +82,6 @@ static inline struct thread_info *stack_thread_info(void) ...@@ -82,7 +82,6 @@ static inline struct thread_info *stack_thread_info(void)
#else /* !__ASSEMBLY__ */ #else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */ /* how to get the thread information struct from ASM */
/* only works on the process stack. otherwise get it via the PDA. */
#define GET_THREAD_INFO(reg) \ #define GET_THREAD_INFO(reg) \
movq %gs:pda_kernelstack,reg ; \ movq %gs:pda_kernelstack,reg ; \
subq $(THREAD_SIZE-PDA_STACKOFFSET),reg subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
...@@ -118,8 +117,10 @@ static inline struct thread_info *stack_thread_info(void) ...@@ -118,8 +117,10 @@ static inline struct thread_info *stack_thread_info(void)
#define _TIF_FORK (1<<TIF_FORK) #define _TIF_FORK (1<<TIF_FORK)
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ #define _TIF_WORK_MASK (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SINGLESTEP))
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK 0x0000FFFF
#define PREEMPT_ACTIVE 0x4000000 #define PREEMPT_ACTIVE 0x4000000
......
...@@ -3,11 +3,18 @@ ...@@ -3,11 +3,18 @@
/* Values need to match arch/x86_64/ia32/vsyscall.lds */ /* Values need to match arch/x86_64/ia32/vsyscall.lds */
#ifdef __ASSEMBLY__
#define VSYSCALL32_BASE 0xffffe000
#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
#else
#define VSYSCALL32_BASE 0xffffe000UL #define VSYSCALL32_BASE 0xffffe000UL
#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE) #define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400) #define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400)
#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
#define VSYSCALL32_SIGRETURN ((void *)VSYSCALL32_BASE + 0x500) #define VSYSCALL32_SIGRETURN ((void *)VSYSCALL32_BASE + 0x500)
#define VSYSCALL32_RTSIGRETURN ((void *)VSYSCALL32_BASE + 0x600) #define VSYSCALL32_RTSIGRETURN ((void *)VSYSCALL32_BASE + 0x600)
#endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment