Commit 3d614679 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 update for 2.5.5

This patch makes x86-64 compile in 2.5.5 and syncs it with changes in the i386
port. It also fixes some bugs that were discovered in recent testing:
- enhance 32bit emulation and fix bugs.
- fix security hole in vmalloc handling
- Do not use lockless gettimeofday for now because it is buggy.
The patch only changes x86_64 specific files.

-Andi
parent 4fff2c1a
...@@ -29,7 +29,7 @@ define_int CONFIG_X86_L1_CACHE_BYTES 64 ...@@ -29,7 +29,7 @@ define_int CONFIG_X86_L1_CACHE_BYTES 64
define_int CONFIG_X86_L1_CACHE_SHIFT 6 define_int CONFIG_X86_L1_CACHE_SHIFT 6
define_bool CONFIG_X86_TSC y define_bool CONFIG_X86_TSC y
define_bool CONFIG_X86_GOOD_APIC y define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_CMPXCHG define_bool CONFIG_X86_CMPXCHG y
tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR
tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
...@@ -72,6 +72,7 @@ bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG ...@@ -72,6 +72,7 @@ bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
if [ "$CONFIG_HOTPLUG" = "y" ] ; then if [ "$CONFIG_HOTPLUG" = "y" ] ; then
source drivers/pcmcia/Config.in source drivers/pcmcia/Config.in
source drivers/hotplug/Config.in
else else
define_bool CONFIG_PCMCIA n define_bool CONFIG_PCMCIA n
fi fi
...@@ -80,8 +81,8 @@ if [ "$CONFIG_PROC_FS" = "y" ]; then ...@@ -80,8 +81,8 @@ if [ "$CONFIG_PROC_FS" = "y" ]; then
define_bool CONFIG_KCORE_ELF y define_bool CONFIG_KCORE_ELF y
fi fi
# We probably are not going to support a.out, are we? Or should we support a.out in i386 compatibility mode? # We probably are not going to support a.out, are we? Or should we support a.out in i386 compatibility mode?
#tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT #tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
bool 'Power Management support' CONFIG_PM bool 'Power Management support' CONFIG_PM
......
...@@ -37,6 +37,7 @@ CONFIG_X86_L1_CACHE_BYTES=64 ...@@ -37,6 +37,7 @@ CONFIG_X86_L1_CACHE_BYTES=64
CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y CONFIG_X86_GOOD_APIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_MSR=y CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y CONFIG_X86_CPUID=y
# CONFIG_MATH_EMULATION is not set # CONFIG_MATH_EMULATION is not set
...@@ -59,16 +60,7 @@ CONFIG_BINFMT_ELF=y ...@@ -59,16 +60,7 @@ CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_MISC is not set # CONFIG_BINFMT_MISC is not set
CONFIG_PM=y CONFIG_PM=y
CONFIG_IA32_EMULATION=y CONFIG_IA32_EMULATION=y
CONFIG_ACPI=y # CONFIG_ACPI is not set
CONFIG_ACPI_DEBUG=y
CONFIG_ACPI_BUSMGR=y
CONFIG_ACPI_SYS=y
CONFIG_ACPI_CPU=y
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_EC=y
CONFIG_ACPI_CMBATT=y
CONFIG_ACPI_THERMAL=y
# #
# Memory Technology Devices (MTD) # Memory Technology Devices (MTD)
...@@ -99,9 +91,8 @@ CONFIG_ACPI_THERMAL=y ...@@ -99,9 +91,8 @@ CONFIG_ACPI_THERMAL=y
# CONFIG_BLK_DEV_DAC960 is not set # CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_LOOP is not set # CONFIG_BLK_DEV_LOOP is not set
# CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_NBD is not set
CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_RAM is not set
CONFIG_BLK_DEV_RAM_SIZE=4096 # CONFIG_BLK_DEV_INITRD is not set
CONFIG_BLK_DEV_INITRD=y
# #
# Multi-device support (RAID and LVM) # Multi-device support (RAID and LVM)
...@@ -388,7 +379,6 @@ CONFIG_EXT2_FS=y ...@@ -388,7 +379,6 @@ CONFIG_EXT2_FS=y
# CONFIG_UDF_RW is not set # CONFIG_UDF_RW is not set
# CONFIG_UFS_FS is not set # CONFIG_UFS_FS is not set
# CONFIG_UFS_FS_WRITE is not set # CONFIG_UFS_FS_WRITE is not set
CONFIG_SIMICSFS=y
# #
# Network File Systems # Network File Systems
......
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/processor.h> #include <asm/processor.h>
struct file;
struct elf_phdr;
#define IA32_EMULATOR 1 #define IA32_EMULATOR 1
#define IA32_PAGE_OFFSET 0xE0000000 #define IA32_PAGE_OFFSET 0xE0000000
...@@ -77,7 +80,6 @@ do { \ ...@@ -77,7 +80,6 @@ do { \
__asm__("movl %0,%%fs": :"r" (0)); \ __asm__("movl %0,%%fs": :"r" (0)); \
__asm__("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \ __asm__("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
wrmsrl(MSR_KERNEL_GS_BASE, 0); \ wrmsrl(MSR_KERNEL_GS_BASE, 0); \
set_thread_flag(TIF_IA32); \
(regs)->rip = (new_rip); \ (regs)->rip = (new_rip); \
(regs)->rsp = (new_rsp); \ (regs)->rsp = (new_rsp); \
(regs)->eflags = 0x200; \ (regs)->eflags = 0x200; \
...@@ -87,6 +89,8 @@ do { \ ...@@ -87,6 +89,8 @@ do { \
} while(0) } while(0)
#define elf_map elf32_map
MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
...@@ -102,6 +106,7 @@ static void elf32_init(struct pt_regs *); ...@@ -102,6 +106,7 @@ static void elf32_init(struct pt_regs *);
static void elf32_init(struct pt_regs *regs) static void elf32_init(struct pt_regs *regs)
{ {
struct task_struct *me = current;
regs->rdi = 0; regs->rdi = 0;
regs->rsi = 0; regs->rsi = 0;
regs->rdx = 0; regs->rdx = 0;
...@@ -109,9 +114,13 @@ static void elf32_init(struct pt_regs *regs) ...@@ -109,9 +114,13 @@ static void elf32_init(struct pt_regs *regs)
regs->rax = 0; regs->rax = 0;
regs->rbx = 0; regs->rbx = 0;
regs->rbp = 0; regs->rbp = 0;
current->thread.fs = 0; current->thread.gs = 0; me->thread.fs = 0;
current->thread.fsindex = 0; current->thread.gsindex = 0; me->thread.gs = 0;
current->thread.ds = __USER_DS; current->thread.es == __USER_DS; me->thread.fsindex = 0;
me->thread.gsindex = 0;
me->thread.ds = __USER_DS;
me->thread.es = __USER_DS;
set_thread_flag(TIF_IA32);
} }
extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address); extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address);
...@@ -162,4 +171,17 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm) ...@@ -162,4 +171,17 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm)
return 0; return 0;
} }
static unsigned long
elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
{
unsigned long map_addr;
struct task_struct *me = current;
down_write(&me->mm->mmap_sem);
map_addr = do_mmap(filep, ELF_PAGESTART(addr),
eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type|MAP_32BIT,
eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
up_write(&me->mm->mmap_sem);
return(map_addr);
}
...@@ -3083,8 +3083,6 @@ COMPATIBLE_IOCTL(BLKROSET) ...@@ -3083,8 +3083,6 @@ COMPATIBLE_IOCTL(BLKROSET)
COMPATIBLE_IOCTL(BLKROGET) COMPATIBLE_IOCTL(BLKROGET)
COMPATIBLE_IOCTL(BLKRRPART) COMPATIBLE_IOCTL(BLKRRPART)
COMPATIBLE_IOCTL(BLKFLSBUF) COMPATIBLE_IOCTL(BLKFLSBUF)
COMPATIBLE_IOCTL(BLKRASET)
COMPATIBLE_IOCTL(BLKFRASET)
COMPATIBLE_IOCTL(BLKSECTSET) COMPATIBLE_IOCTL(BLKSECTSET)
COMPATIBLE_IOCTL(BLKSSZGET) COMPATIBLE_IOCTL(BLKSSZGET)
...@@ -3596,10 +3594,8 @@ HANDLE_IOCTL(SIOCDELRT, routing_ioctl) ...@@ -3596,10 +3594,8 @@ HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCRTMSG, ret_einval)
HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
HANDLE_IOCTL(BLKRAGET, w_long)
HANDLE_IOCTL(BLKGETSIZE, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long)
HANDLE_IOCTL(0x1260, broken_blkgetsize) HANDLE_IOCTL(0x1260, broken_blkgetsize)
HANDLE_IOCTL(BLKFRAGET, w_long)
HANDLE_IOCTL(BLKSECTGET, w_long) HANDLE_IOCTL(BLKSECTGET, w_long)
HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans) HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans) HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans)
......
...@@ -82,7 +82,7 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs r ...@@ -82,7 +82,7 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs r
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
saveset = current->blocked; saveset = current->blocked;
siginitset(&current->blocked, mask); siginitset(&current->blocked, mask);
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
regs.rax = -EINTR; regs.rax = -EINTR;
...@@ -225,7 +225,7 @@ asmlinkage int sys32_sigreturn(struct pt_regs regs) ...@@ -225,7 +225,7 @@ asmlinkage int sys32_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE); sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
current->blocked = set; current->blocked = set;
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->sc, &eax)) if (restore_sigcontext(&regs, &frame->sc, &eax))
...@@ -252,7 +252,7 @@ asmlinkage int sys32_rt_sigreturn(struct pt_regs regs) ...@@ -252,7 +252,7 @@ asmlinkage int sys32_rt_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE); sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
current->blocked = set; current->blocked = set;
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax)) if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
......
...@@ -140,7 +140,6 @@ void cpu_idle (void) ...@@ -140,7 +140,6 @@ void cpu_idle (void)
while (!need_resched()) while (!need_resched())
idle(); idle();
schedule(); schedule();
check_pgt_cache();
} }
} }
......
...@@ -420,9 +420,11 @@ asmlinkage void syscall_trace(struct pt_regs *regs) ...@@ -420,9 +420,11 @@ asmlinkage void syscall_trace(struct pt_regs *regs)
current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
? 0x80 : 0); ? 0x80 : 0);
preempt_disable();
current->state = TASK_STOPPED; current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
/* /*
* this isn't the same as continuing with a signal, but it will do * this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the * for normal use. strace only continues with a signal if the
......
...@@ -89,7 +89,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs) ...@@ -89,7 +89,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
saveset = current->blocked; saveset = current->blocked;
current->blocked = newset; current->blocked = newset;
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
#if DEBUG_SIG #if DEBUG_SIG
printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n", printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
...@@ -200,7 +200,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs) ...@@ -200,7 +200,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE); sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
current->blocked = set; current->blocked = set;
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax)) if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
...@@ -431,7 +431,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, ...@@ -431,7 +431,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
spin_lock_irq(&current->sigmask_lock); spin_lock_irq(&current->sigmask_lock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask); sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
sigaddset(&current->blocked,sig); sigaddset(&current->blocked,sig);
recalc_sigpending(current); recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock); spin_unlock_irq(&current->sigmask_lock);
} }
} }
...@@ -473,9 +473,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) ...@@ -473,9 +473,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
/* Let the debugger run. */ /* Let the debugger run. */
current->exit_code = signr; current->exit_code = signr;
preempt_disable();
current->state = TASK_STOPPED; current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
/* We're back. Did the debugger cancel the sig? */ /* We're back. Did the debugger cancel the sig? */
if (!(signr = current->exit_code)) if (!(signr = current->exit_code))
...@@ -530,12 +532,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) ...@@ -530,12 +532,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
case SIGSTOP: { case SIGSTOP: {
struct signal_struct *sig; struct signal_struct *sig;
preempt_disable();
current->state = TASK_STOPPED; current->state = TASK_STOPPED;
current->exit_code = signr; current->exit_code = signr;
sig = current->p_pptr->sig; sig = current->p_pptr->sig;
if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
notify_parent(current, SIGCHLD); notify_parent(current, SIGCHLD);
schedule(); schedule();
preempt_enable();
continue; continue;
} }
......
...@@ -120,6 +120,62 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; ...@@ -120,6 +120,62 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
extern spinlock_t i8259A_lock; extern spinlock_t i8259A_lock;
static inline unsigned long do_fast_gettimeoffset(void)
{
register unsigned long eax, edx;
/* Read the Time Stamp Counter */
rdtsc(eax,edx);
/* .. relative to previous jiffy (32 bits is enough) */
eax -= last_tsc_low; /* tsc_low delta */
/*
* Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
* = (tsc_low delta) * (usecs_per_clock)
* = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
*
* Using a mull instead of a divl saves up to 31 clock cycles
* in the critical path.
*/
edx = (eax*fast_gettimeoffset_quotient) >> 32;
/* our adjusted time offset in microseconds */
return delay_at_last_interrupt + edx;
}
/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags;
unsigned long usec, sec;
read_lock_irqsave(&xtime_lock, flags);
usec = do_gettimeoffset();
{
unsigned long lost = jiffies - wall_jiffies;
if (lost)
usec += lost * (1000000 / HZ);
}
sec = xtime.tv_sec;
usec += xtime.tv_usec;
read_unlock_irqrestore(&xtime_lock, flags);
while (usec >= 1000000) {
usec -= 1000000;
sec++;
}
tv->tv_sec = sec;
tv->tv_usec = usec;
}
void do_settimeofday(struct timeval *tv) void do_settimeofday(struct timeval *tv)
{ {
write_lock_irq(&xtime_lock); write_lock_irq(&xtime_lock);
...@@ -484,7 +540,7 @@ void __init time_init(void) ...@@ -484,7 +540,7 @@ void __init time_init(void)
* clock/second. Our precision is about 100 ppm. * clock/second. Our precision is about 100 ppm.
*/ */
{ {
cpu_khz = ((1000000*(1UL<<32)) / tsc_quotient); /* FIXME: is it right? */ cpu_khz = ((1000*(1UL<<32)) / tsc_quotient);
printk("Detected %ld Hz processor.\n", cpu_khz); printk("Detected %ld Hz processor.\n", cpu_khz);
} }
} }
......
...@@ -60,9 +60,6 @@ static inline void timeval_normalize(struct timeval * tv) ...@@ -60,9 +60,6 @@ static inline void timeval_normalize(struct timeval * tv)
long __vxtime_sequence[2] __section_vxtime_sequence; long __vxtime_sequence[2] __section_vxtime_sequence;
/* The rest of the kernel knows it as this. */
extern void do_gettimeofday(struct timeval *tv) __attribute__((alias("do_vgettimeofday")));
inline void do_vgettimeofday(struct timeval * tv) inline void do_vgettimeofday(struct timeval * tv)
{ {
long sequence; long sequence;
......
...@@ -89,7 +89,6 @@ EXPORT_SYMBOL_NOVERS(__put_user_4); ...@@ -89,7 +89,6 @@ EXPORT_SYMBOL_NOVERS(__put_user_4);
EXPORT_SYMBOL(strtok); EXPORT_SYMBOL(strtok);
EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(simple_strtol);
EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strncpy_from_user); EXPORT_SYMBOL(strncpy_from_user);
......
...@@ -112,7 +112,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -112,7 +112,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
mm = tsk->mm; mm = tsk->mm;
info.si_code = SEGV_MAPERR; info.si_code = SEGV_MAPERR;
if (address >= TASK_SIZE) if (address >= TASK_SIZE && !(error_code & 5))
goto vmalloc_fault; goto vmalloc_fault;
......
/* /*
* linux/arch/i386/mm/init.c * linux/arch/x86_64/mm/init.c
* *
* Copyright (C) 1995 Linus Torvalds * Copyright (C) 1995 Linus Torvalds
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz> * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2002 Andi Kleen <ak@suse.de>
*/ */
#include <linux/config.h> #include <linux/config.h>
...@@ -39,28 +40,6 @@ mmu_gather_t mmu_gathers[NR_CPUS]; ...@@ -39,28 +40,6 @@ mmu_gather_t mmu_gathers[NR_CPUS];
static unsigned long totalram_pages; static unsigned long totalram_pages;
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
if(read_pda(pgtable_cache_sz) > high) {
do {
if (read_pda(pgd_quick)) {
pgd_free_slow(pgd_alloc_one_fast());
freed++;
}
if (read_pda(pmd_quick)) {
pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
freed++;
}
if (read_pda(pte_quick)) {
pte_free_slow(pte_alloc_one_fast(NULL, 0));
freed++;
}
} while(read_pda(pgtable_cache_sz) > low);
}
return freed;
}
/* /*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move * physical space so we can cache the place of the first one and move
...@@ -89,7 +68,6 @@ void show_mem(void) ...@@ -89,7 +68,6 @@ void show_mem(void)
printk("%d reserved pages\n",reserved); printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared); printk("%d pages shared\n",shared);
printk("%d pages swap cached\n",cached); printk("%d pages swap cached\n",cached);
printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
show_buffers(); show_buffers();
} }
...@@ -138,12 +116,12 @@ static void set_pte_phys(unsigned long vaddr, ...@@ -138,12 +116,12 @@ static void set_pte_phys(unsigned long vaddr,
if (pmd_none(*pmd)) { if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage(); pte = (pte_t *) spp_getpage();
set_pmd(pmd, __pmd(__pa(pte) + 0x7)); set_pmd(pmd, __pmd(__pa(pte) + 0x7));
if (pte != pte_offset(pmd, 0)) { if (pte != pte_offset_kernel(pmd, 0)) {
printk("PAGETABLE BUG #02!\n"); printk("PAGETABLE BUG #02!\n");
return; return;
} }
} }
pte = pte_offset(pmd, vaddr); pte = pte_offset_kernel(pmd, vaddr);
if (pte_val(*pte)) if (pte_val(*pte))
pte_ERROR(*pte); pte_ERROR(*pte);
set_pte(pte, mk_pte_phys(phys, prot)); set_pte(pte, mk_pte_phys(phys, prot));
......
...@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo ...@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (address >= end) if (address >= end)
BUG(); BUG();
do { do {
pte_t * pte = pte_alloc(&init_mm, pmd, address); pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags); remap_area_pte(pte, address, end - address, address + phys_addr, flags);
......
...@@ -42,10 +42,6 @@ int main(void) ...@@ -42,10 +42,6 @@ int main(void)
ENTRY(irqrsp); ENTRY(irqrsp);
ENTRY(irqcount); ENTRY(irqcount);
ENTRY(irqstack); ENTRY(irqstack);
ENTRY(pgd_quick);
ENTRY(pmd_quick);
ENTRY(pte_quick);
ENTRY(pgtable_cache_sz);
ENTRY(cpunumber); ENTRY(cpunumber);
ENTRY(irqstackptr); ENTRY(irqstackptr);
ENTRY(me); ENTRY(me);
......
...@@ -413,6 +413,16 @@ static __inline__ unsigned long __ffs(unsigned long word) ...@@ -413,6 +413,16 @@ static __inline__ unsigned long __ffs(unsigned long word)
#ifdef __KERNEL__ #ifdef __KERNEL__
static inline int sched_find_first_bit(unsigned long *b)
{
if (b[0])
return __ffs(b[0]);
if (b[1])
return __ffs(b[1]) + 64;
if (b[2])
return __ffs(b[2]) + 128;
}
/** /**
* ffs - find first bit set * ffs - find first bit set
* @x: the word to search * @x: the word to search
......
...@@ -6,33 +6,6 @@ ...@@ -6,33 +6,6 @@
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
/*
* Every architecture must define this function. It's the fastest
* way of searching a 168-bit bitmap where the first 128 bits are
* unlikely to be set. It's guaranteed that at least one of the 168
* bits is cleared.
*/
#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
# error update this function.
#endif
static inline int __sched_find_first_bit(unsigned long *b)
{
if (b[0])
return __ffs(b[0]);
if (b[1])
return __ffs(b[1]) + 64;
if (b[2])
return __ffs(b[2]) + 128;
}
static inline int sched_find_first_bit(unsigned long *b)
{
int n = __sched_find_first_bit(b);
BUG_ON((unsigned)n > 167);
return n;
}
/* /*
* possibly do the LDT unload here? * possibly do the LDT unload here?
*/ */
......
...@@ -112,6 +112,8 @@ static unsigned long start_kernel_map __attribute__((unused)) = __START_KERNEL_m ...@@ -112,6 +112,8 @@ static unsigned long start_kernel_map __attribute__((unused)) = __START_KERNEL_m
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) #define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) #define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -19,11 +19,6 @@ struct x8664_pda { ...@@ -19,11 +19,6 @@ struct x8664_pda {
struct task_struct *pcurrent; /* Current process */ struct task_struct *pcurrent; /* Current process */
int irqcount; /* Irq nesting counter. Starts with -1 */ int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */ int cpunumber; /* Logical CPU number */
/* XXX: could be a single list */
unsigned long *pgd_quick;
unsigned long *pmd_quick;
unsigned long *pte_quick;
unsigned long pgtable_cache_sz;
char *irqstackptr; char *irqstackptr;
unsigned int __softirq_pending; unsigned int __softirq_pending;
unsigned int __local_irq_count; unsigned int __local_irq_count;
......
...@@ -8,173 +8,74 @@ ...@@ -8,173 +8,74 @@
#include <linux/threads.h> #include <linux/threads.h>
#include <linux/mm.h> #include <linux/mm.h>
#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL) #define pmd_populate_kernel(mm, pmd, pte) \
#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL)
#define pmd_populate(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte))) set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
#define pgd_populate(mm, pgd, pmd) \ #define pgd_populate(mm, pgd, pmd) \
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd))) set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd)))
extern __inline__ pmd_t *get_pmd_slow(void) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{ {
pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); set_pmd(pmd, __pmd(_PAGE_TABLE |
((u64)(pte - mem_map) << PAGE_SHIFT)));
if (ret)
memset(ret, 0, PAGE_SIZE);
return ret;
} }
extern __inline__ pmd_t *get_pmd_fast(void) extern __inline__ pmd_t *get_pmd(void)
{ {
unsigned long *ret; return (pmd_t *)get_zeroed_page(GFP_KERNEL);
preempt_disable();
ret = read_pda(pmd_quick);
if (ret) {
write_pda(pmd_quick, (unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
if (!ret)
ret = (unsigned long *)get_pmd_slow();
return (pmd_t *)ret;
} }
extern __inline__ void pmd_free(pmd_t *pmd) extern __inline__ void pmd_free(pmd_t *pmd)
{
preempt_disable();
*(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick);
write_pda(pmd_quick,(unsigned long *) pmd);
inc_pgcache_size();
preempt_enable();
}
extern __inline__ void pmd_free_slow(pmd_t *pmd)
{ {
if ((unsigned long)pmd & (PAGE_SIZE-1)) if ((unsigned long)pmd & (PAGE_SIZE-1))
BUG(); BUG();
free_page((unsigned long)pmd); free_page((unsigned long)pmd);
} }
static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
{
unsigned long *ret;
preempt_disable();
ret = (unsigned long *)read_pda(pmd_quick);
if (__builtin_expect(ret != NULL, 1)) {
write_pda(pmd_quick, (unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
return (pmd_t *)ret;
}
static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr) static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
{ {
pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); return (pmd_t *) get_zeroed_page(GFP_KERNEL);
if (__builtin_expect(pmd != NULL, 1))
clear_page(pmd);
return pmd;
}
static inline pgd_t *pgd_alloc_one_fast (void)
{
unsigned long *ret;
preempt_disable();
ret = read_pda(pgd_quick);
if (likely(ret != NULL)) {
write_pda(pgd_quick,(unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
return (pgd_t *) ret;
} }
static inline pgd_t *pgd_alloc (struct mm_struct *mm) static inline pgd_t *pgd_alloc (struct mm_struct *mm)
{ {
/* the VM system never calls pgd_alloc_one_fast(), so we do it here. */ return (pgd_t *)get_zeroed_page(GFP_KERNEL);
pgd_t *pgd = pgd_alloc_one_fast();
if (pgd == NULL) {
pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
if (__builtin_expect(pgd != NULL, 1))
clear_page(pgd);
}
return pgd;
} }
static inline void pgd_free (pgd_t *pgd) static inline void pgd_free (pgd_t *pgd)
{
preempt_disable();
*(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick);
write_pda(pgd_quick,(unsigned long *) pgd);
inc_pgcache_size();
preempt_enable();
}
static inline void pgd_free_slow (pgd_t *pgd)
{ {
if ((unsigned long)pgd & (PAGE_SIZE-1)) if ((unsigned long)pgd & (PAGE_SIZE-1))
BUG(); BUG();
free_page((unsigned long)pgd); free_page((unsigned long)pgd);
} }
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{ {
pte_t *pte; return (pte_t *) get_zeroed_page(GFP_KERNEL);
pte = (pte_t *) __get_free_page(GFP_KERNEL);
if (pte)
clear_page(pte);
return pte;
} }
extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{ {
unsigned long *ret; void *p = (void *)get_zeroed_page(GFP_KERNEL);
if (!p)
preempt_disable(); return NULL;
if(__builtin_expect((ret = read_pda(pte_quick)) != NULL, !0)) { return virt_to_page(p);
write_pda(pte_quick, (unsigned long *)(*ret));
ret[0] = ret[1];
dec_pgcache_size();
}
preempt_enable();
return (pte_t *)ret;
} }
/* Should really implement gc for free page table pages. This could be done with /* Should really implement gc for free page table pages. This could be
a reference count in struct page. */ done with a reference count in struct page. */
extern __inline__ void pte_free(pte_t *pte)
{
preempt_disable();
*(unsigned long *)pte = (unsigned long) read_pda(pte_quick);
write_pda(pte_quick, (unsigned long *) pte);
inc_pgcache_size();
preempt_enable();
}
extern __inline__ void pte_free_slow(pte_t *pte) extern __inline__ void pte_free_kernel(pte_t *pte)
{ {
if ((unsigned long)pte & (PAGE_SIZE-1)) if ((unsigned long)pte & (PAGE_SIZE-1))
BUG(); BUG();
free_page((unsigned long)pte); free_page((unsigned long)pte);
} }
extern inline void pte_free(struct page *pte)
{
__free_page(pte);
}
extern int do_check_pgt_cache(int, int);
/* /*
* TLB flushing: * TLB flushing:
......
...@@ -26,7 +26,7 @@ extern pgd_t level3_ident_pgt[512], swapper_pg_dir[512]; ...@@ -26,7 +26,7 @@ extern pgd_t level3_ident_pgt[512], swapper_pg_dir[512];
extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_kernel_pgt[512];
extern void paging_init(void); extern void paging_init(void);
/* Caches aren't brain-dead on the intel. */ /* Caches aren't brain-dead. */
#define flush_cache_all() do { } while (0) #define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0) #define flush_cache_mm(mm) do { } while (0)
#define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0)
...@@ -35,6 +35,7 @@ extern void paging_init(void); ...@@ -35,6 +35,7 @@ extern void paging_init(void);
#define flush_dcache_page(page) do { } while (0) #define flush_dcache_page(page) do { } while (0)
#define flush_icache_range(start, end) do { } while (0) #define flush_icache_range(start, end) do { } while (0)
#define flush_icache_page(vma,pg) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0)
#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
#define __flush_tlb() \ #define __flush_tlb() \
do { \ do { \
...@@ -341,8 +342,10 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -341,8 +342,10 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define page_pte(page) page_pte_prot(page, __pgprot(0)) #define page_pte(page) page_pte_prot(page, __pgprot(0))
#define pmd_page(pmd) \ #define pmd_page_kernel(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
/* to find an entry in a page-table-directory. */ /* to find an entry in a page-table-directory. */
#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
...@@ -360,9 +363,15 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ...@@ -360,9 +363,15 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the third-level page table.. */ /* Find an entry in the third-level page table.. */
#define __pte_offset(address) \ #define __pte_offset(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
__pte_offset(address)) __pte_offset(address))
#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
#define pte_unmap(pte) /* NOP */
#define pte_unmap_nested(pte) /* NOP */
/* never use these in the common code */ /* never use these in the common code */
#define level4_page(level4) ((unsigned long) __va(level4_val(level4) & PAGE_MASK)) #define level4_page(level4) ((unsigned long) __va(level4_val(level4) & PAGE_MASK))
#define level4_index(address) ((address >> LEVEL4_SHIFT) & (PTRS_PER_LEVEL4-1)) #define level4_index(address) ((address >> LEVEL4_SHIFT) & (PTRS_PER_LEVEL4-1))
......
...@@ -18,7 +18,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next); ...@@ -18,7 +18,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
#define prepare_to_switch() do { } while(0) #define prepare_to_switch() do { } while(0)
#define switch_to(prev,next,last) do { \ #define switch_to(prev,next) do { \
asm volatile("pushq %%rbp\n\t" \ asm volatile("pushq %%rbp\n\t" \
"pushq %%rbx\n\t" \ "pushq %%rbx\n\t" \
"pushq %%r8\n\t" \ "pushq %%r8\n\t" \
...@@ -30,10 +30,10 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next); ...@@ -30,10 +30,10 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
"pushq %%r14\n\t" \ "pushq %%r14\n\t" \
"pushq %%r15\n\t" \ "pushq %%r15\n\t" \
"movq %%rsp,%0\n\t" /* save RSP */ \ "movq %%rsp,%0\n\t" /* save RSP */ \
"movq %3,%%rsp\n\t" /* restore RSP */ \ "movq %2,%%rsp\n\t" /* restore RSP */ \
"leaq 1f(%%rip),%%rbp\n\t" \ "leaq 1f(%%rip),%%rbp\n\t" \
"movq %%rbp,%1\n\t" /* save RIP */ \ "movq %%rbp,%1\n\t" /* save RIP */ \
"pushq %4\n\t" /* setup new RIP */ \ "pushq %3\n\t" /* setup new RIP */ \
"jmp __switch_to\n\t" \ "jmp __switch_to\n\t" \
"1:\t" \ "1:\t" \
"popq %%r15\n\t" \ "popq %%r15\n\t" \
...@@ -46,8 +46,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next); ...@@ -46,8 +46,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
"popq %%r8\n\t" \ "popq %%r8\n\t" \
"popq %%rbx\n\t" \ "popq %%rbx\n\t" \
"popq %%rbp\n\t" \ "popq %%rbp\n\t" \
:"=m" (prev->thread.rsp),"=m" (prev->thread.rip), \ :"=m" (prev->thread.rsp),"=m" (prev->thread.rip) \
"=b" (last) \
:"m" (next->thread.rsp),"m" (next->thread.rip), \ :"m" (next->thread.rsp),"m" (next->thread.rip), \
"b" (prev), "S" (next), "D" (prev)); \ "b" (prev), "S" (next), "D" (prev)); \
} while (0) } while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment