Commit 3d614679 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 update for 2.5.5

This patch makes x86-64 compile in 2.5.5 and syncs it with changes in the i386
port. It also fixes some bugs that were discovered in recent testing:
- enhance 32bit emulation and fix bugs.
- fix security hole in vmalloc handling
- Do not use lockless gettimeofday for now because it is buggy.
The patch only changes x86_64 specific files.

-Andi
parent 4fff2c1a
......@@ -29,7 +29,7 @@ define_int CONFIG_X86_L1_CACHE_BYTES 64
define_int CONFIG_X86_L1_CACHE_SHIFT 6
define_bool CONFIG_X86_TSC y
define_bool CONFIG_X86_GOOD_APIC y
define_bool CONFIG_X86_CMPXCHG
define_bool CONFIG_X86_CMPXCHG y
tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR
tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
......@@ -72,6 +72,7 @@ bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
if [ "$CONFIG_HOTPLUG" = "y" ] ; then
source drivers/pcmcia/Config.in
source drivers/hotplug/Config.in
else
define_bool CONFIG_PCMCIA n
fi
......@@ -80,8 +81,8 @@ if [ "$CONFIG_PROC_FS" = "y" ]; then
define_bool CONFIG_KCORE_ELF y
fi
# We probably are not going to support a.out, are we? Or should we support a.out in i386 compatibility mode?
#tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
#tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
bool 'Power Management support' CONFIG_PM
......
......@@ -37,6 +37,7 @@ CONFIG_X86_L1_CACHE_BYTES=64
CONFIG_X86_L1_CACHE_SHIFT=6
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_CMPXCHG=y
CONFIG_X86_MSR=y
CONFIG_X86_CPUID=y
# CONFIG_MATH_EMULATION is not set
......@@ -59,16 +60,7 @@ CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_MISC is not set
CONFIG_PM=y
CONFIG_IA32_EMULATION=y
CONFIG_ACPI=y
CONFIG_ACPI_DEBUG=y
CONFIG_ACPI_BUSMGR=y
CONFIG_ACPI_SYS=y
CONFIG_ACPI_CPU=y
CONFIG_ACPI_BUTTON=y
CONFIG_ACPI_AC=y
CONFIG_ACPI_EC=y
CONFIG_ACPI_CMBATT=y
CONFIG_ACPI_THERMAL=y
# CONFIG_ACPI is not set
#
# Memory Technology Devices (MTD)
......@@ -99,9 +91,8 @@ CONFIG_ACPI_THERMAL=y
# CONFIG_BLK_DEV_DAC960 is not set
# CONFIG_BLK_DEV_LOOP is not set
# CONFIG_BLK_DEV_NBD is not set
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
# CONFIG_BLK_DEV_RAM is not set
# CONFIG_BLK_DEV_INITRD is not set
#
# Multi-device support (RAID and LVM)
......@@ -388,7 +379,6 @@ CONFIG_EXT2_FS=y
# CONFIG_UDF_RW is not set
# CONFIG_UFS_FS is not set
# CONFIG_UFS_FS_WRITE is not set
CONFIG_SIMICSFS=y
#
# Network File Systems
......
......@@ -12,6 +12,9 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
struct file;
struct elf_phdr;
#define IA32_EMULATOR 1
#define IA32_PAGE_OFFSET 0xE0000000
......@@ -77,7 +80,6 @@ do { \
__asm__("movl %0,%%fs": :"r" (0)); \
__asm__("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
wrmsrl(MSR_KERNEL_GS_BASE, 0); \
set_thread_flag(TIF_IA32); \
(regs)->rip = (new_rip); \
(regs)->rsp = (new_rsp); \
(regs)->eflags = 0x200; \
......@@ -87,6 +89,8 @@ do { \
} while(0)
#define elf_map elf32_map
MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
......@@ -102,6 +106,7 @@ static void elf32_init(struct pt_regs *);
static void elf32_init(struct pt_regs *regs)
{
struct task_struct *me = current;
regs->rdi = 0;
regs->rsi = 0;
regs->rdx = 0;
......@@ -109,9 +114,13 @@ static void elf32_init(struct pt_regs *regs)
regs->rax = 0;
regs->rbx = 0;
regs->rbp = 0;
current->thread.fs = 0; current->thread.gs = 0;
current->thread.fsindex = 0; current->thread.gsindex = 0;
current->thread.ds = __USER_DS; current->thread.es == __USER_DS;
me->thread.fs = 0;
me->thread.gs = 0;
me->thread.fsindex = 0;
me->thread.gsindex = 0;
me->thread.ds = __USER_DS;
me->thread.es = __USER_DS;
set_thread_flag(TIF_IA32);
}
extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address);
......@@ -162,4 +171,17 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm)
return 0;
}
static unsigned long
elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
{
unsigned long map_addr;
struct task_struct *me = current;
down_write(&me->mm->mmap_sem);
map_addr = do_mmap(filep, ELF_PAGESTART(addr),
eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type|MAP_32BIT,
eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
up_write(&me->mm->mmap_sem);
return(map_addr);
}
......@@ -3083,8 +3083,6 @@ COMPATIBLE_IOCTL(BLKROSET)
COMPATIBLE_IOCTL(BLKROGET)
COMPATIBLE_IOCTL(BLKRRPART)
COMPATIBLE_IOCTL(BLKFLSBUF)
COMPATIBLE_IOCTL(BLKRASET)
COMPATIBLE_IOCTL(BLKFRASET)
COMPATIBLE_IOCTL(BLKSECTSET)
COMPATIBLE_IOCTL(BLKSSZGET)
......@@ -3596,10 +3594,8 @@ HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
HANDLE_IOCTL(SIOCRTMSG, ret_einval)
HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
HANDLE_IOCTL(BLKRAGET, w_long)
HANDLE_IOCTL(BLKGETSIZE, w_long)
HANDLE_IOCTL(0x1260, broken_blkgetsize)
HANDLE_IOCTL(BLKFRAGET, w_long)
HANDLE_IOCTL(BLKSECTGET, w_long)
HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans)
......
......@@ -82,7 +82,7 @@ sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs r
spin_lock_irq(&current->sigmask_lock);
saveset = current->blocked;
siginitset(&current->blocked, mask);
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
regs.rax = -EINTR;
......@@ -225,7 +225,7 @@ asmlinkage int sys32_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
current->blocked = set;
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->sc, &eax))
......@@ -252,7 +252,7 @@ asmlinkage int sys32_rt_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
current->blocked = set;
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
......
......@@ -140,7 +140,6 @@ void cpu_idle (void)
while (!need_resched())
idle();
schedule();
check_pgt_cache();
}
}
......
......@@ -420,9 +420,11 @@ asmlinkage void syscall_trace(struct pt_regs *regs)
current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
? 0x80 : 0);
preempt_disable();
current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD);
schedule();
preempt_enable();
/*
* this isn't the same as continuing with a signal, but it will do
* for normal use. strace only continues with a signal if the
......
......@@ -89,7 +89,7 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
spin_lock_irq(&current->sigmask_lock);
saveset = current->blocked;
current->blocked = newset;
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
#if DEBUG_SIG
printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
......@@ -200,7 +200,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
current->blocked = set;
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
......@@ -431,7 +431,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
spin_lock_irq(&current->sigmask_lock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
sigaddset(&current->blocked,sig);
recalc_sigpending(current);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
}
}
......@@ -473,9 +473,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
/* Let the debugger run. */
current->exit_code = signr;
preempt_disable();
current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD);
schedule();
preempt_enable();
/* We're back. Did the debugger cancel the sig? */
if (!(signr = current->exit_code))
......@@ -530,12 +532,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
case SIGSTOP: {
struct signal_struct *sig;
preempt_disable();
current->state = TASK_STOPPED;
current->exit_code = signr;
sig = current->p_pptr->sig;
if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
notify_parent(current, SIGCHLD);
schedule();
preempt_enable();
continue;
}
......
......@@ -120,6 +120,62 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
extern spinlock_t i8259A_lock;
static inline unsigned long do_fast_gettimeoffset(void)
{
register unsigned long eax, edx;
/* Read the Time Stamp Counter */
rdtsc(eax,edx);
/* .. relative to previous jiffy (32 bits is enough) */
eax -= last_tsc_low; /* tsc_low delta */
/*
* Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
* = (tsc_low delta) * (usecs_per_clock)
* = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
*
* Using a mull instead of a divl saves up to 31 clock cycles
* in the critical path.
*/
edx = (eax*fast_gettimeoffset_quotient) >> 32;
/* our adjusted time offset in microseconds */
return delay_at_last_interrupt + edx;
}
/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags;
unsigned long usec, sec;
read_lock_irqsave(&xtime_lock, flags);
usec = do_gettimeoffset();
{
unsigned long lost = jiffies - wall_jiffies;
if (lost)
usec += lost * (1000000 / HZ);
}
sec = xtime.tv_sec;
usec += xtime.tv_usec;
read_unlock_irqrestore(&xtime_lock, flags);
while (usec >= 1000000) {
usec -= 1000000;
sec++;
}
tv->tv_sec = sec;
tv->tv_usec = usec;
}
void do_settimeofday(struct timeval *tv)
{
write_lock_irq(&xtime_lock);
......@@ -484,7 +540,7 @@ void __init time_init(void)
* clock/second. Our precision is about 100 ppm.
*/
{
cpu_khz = ((1000000*(1UL<<32)) / tsc_quotient); /* FIXME: is it right? */
cpu_khz = ((1000*(1UL<<32)) / tsc_quotient);
printk("Detected %ld Hz processor.\n", cpu_khz);
}
}
......
......@@ -60,9 +60,6 @@ static inline void timeval_normalize(struct timeval * tv)
long __vxtime_sequence[2] __section_vxtime_sequence;
/* The rest of the kernel knows it as this. */
extern void do_gettimeofday(struct timeval *tv) __attribute__((alias("do_vgettimeofday")));
inline void do_vgettimeofday(struct timeval * tv)
{
long sequence;
......
......@@ -89,7 +89,6 @@ EXPORT_SYMBOL_NOVERS(__put_user_4);
EXPORT_SYMBOL(strtok);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(simple_strtol);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strncpy_from_user);
......
......@@ -112,7 +112,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
mm = tsk->mm;
info.si_code = SEGV_MAPERR;
if (address >= TASK_SIZE)
if (address >= TASK_SIZE && !(error_code & 5))
goto vmalloc_fault;
......
/*
* linux/arch/i386/mm/init.c
* linux/arch/x86_64/mm/init.c
*
* Copyright (C) 1995 Linus Torvalds
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2002 Andi Kleen <ak@suse.de>
*/
#include <linux/config.h>
......@@ -39,28 +40,6 @@ mmu_gather_t mmu_gathers[NR_CPUS];
static unsigned long totalram_pages;
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
if(read_pda(pgtable_cache_sz) > high) {
do {
if (read_pda(pgd_quick)) {
pgd_free_slow(pgd_alloc_one_fast());
freed++;
}
if (read_pda(pmd_quick)) {
pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
freed++;
}
if (read_pda(pte_quick)) {
pte_free_slow(pte_alloc_one_fast(NULL, 0));
freed++;
}
} while(read_pda(pgtable_cache_sz) > low);
}
return freed;
}
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
* physical space so we can cache the place of the first one and move
......@@ -89,7 +68,6 @@ void show_mem(void)
printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared);
printk("%d pages swap cached\n",cached);
printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
show_buffers();
}
......@@ -138,12 +116,12 @@ static void set_pte_phys(unsigned long vaddr,
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
set_pmd(pmd, __pmd(__pa(pte) + 0x7));
if (pte != pte_offset(pmd, 0)) {
if (pte != pte_offset_kernel(pmd, 0)) {
printk("PAGETABLE BUG #02!\n");
return;
}
}
pte = pte_offset(pmd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
if (pte_val(*pte))
pte_ERROR(*pte);
set_pte(pte, mk_pte_phys(phys, prot));
......
......@@ -49,7 +49,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
if (address >= end)
BUG();
do {
pte_t * pte = pte_alloc(&init_mm, pmd, address);
pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
......
......@@ -42,10 +42,6 @@ int main(void)
ENTRY(irqrsp);
ENTRY(irqcount);
ENTRY(irqstack);
ENTRY(pgd_quick);
ENTRY(pmd_quick);
ENTRY(pte_quick);
ENTRY(pgtable_cache_sz);
ENTRY(cpunumber);
ENTRY(irqstackptr);
ENTRY(me);
......
......@@ -413,6 +413,16 @@ static __inline__ unsigned long __ffs(unsigned long word)
#ifdef __KERNEL__
static inline int sched_find_first_bit(unsigned long *b)
{
if (b[0])
return __ffs(b[0]);
if (b[1])
return __ffs(b[1]) + 64;
if (b[2])
return __ffs(b[2]) + 128;
}
/**
* ffs - find first bit set
* @x: the word to search
......
......@@ -6,33 +6,6 @@
#include <asm/atomic.h>
#include <asm/pgalloc.h>
/*
* Every architecture must define this function. It's the fastest
* way of searching a 168-bit bitmap where the first 128 bits are
* unlikely to be set. It's guaranteed that at least one of the 168
* bits is cleared.
*/
#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
# error update this function.
#endif
static inline int __sched_find_first_bit(unsigned long *b)
{
if (b[0])
return __ffs(b[0]);
if (b[1])
return __ffs(b[1]) + 64;
if (b[2])
return __ffs(b[2]) + 128;
}
static inline int sched_find_first_bit(unsigned long *b)
{
int n = __sched_find_first_bit(b);
BUG_ON((unsigned)n > 167);
return n;
}
/*
* possibly do the LDT unload here?
*/
......
......@@ -112,6 +112,8 @@ static unsigned long start_kernel_map __attribute__((unused)) = __START_KERNEL_m
#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
#define VALID_PAGE(page) ((page - mem_map) < max_mapnr)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#endif /* __KERNEL__ */
......
......@@ -19,11 +19,6 @@ struct x8664_pda {
struct task_struct *pcurrent; /* Current process */
int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */
/* XXX: could be a single list */
unsigned long *pgd_quick;
unsigned long *pmd_quick;
unsigned long *pte_quick;
unsigned long pgtable_cache_sz;
char *irqstackptr;
unsigned int __softirq_pending;
unsigned int __local_irq_count;
......
......@@ -8,173 +8,74 @@
#include <linux/threads.h>
#include <linux/mm.h>
#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL)
#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL)
#define pmd_populate(mm, pmd, pte) \
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
#define pgd_populate(mm, pgd, pmd) \
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd)))
extern __inline__ pmd_t *get_pmd_slow(void)
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
if (ret)
memset(ret, 0, PAGE_SIZE);
return ret;
set_pmd(pmd, __pmd(_PAGE_TABLE |
((u64)(pte - mem_map) << PAGE_SHIFT)));
}
extern __inline__ pmd_t *get_pmd_fast(void)
extern __inline__ pmd_t *get_pmd(void)
{
unsigned long *ret;
preempt_disable();
ret = read_pda(pmd_quick);
if (ret) {
write_pda(pmd_quick, (unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
if (!ret)
ret = (unsigned long *)get_pmd_slow();
return (pmd_t *)ret;
return (pmd_t *)get_zeroed_page(GFP_KERNEL);
}
extern __inline__ void pmd_free(pmd_t *pmd)
{
preempt_disable();
*(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick);
write_pda(pmd_quick,(unsigned long *) pmd);
inc_pgcache_size();
preempt_enable();
}
extern __inline__ void pmd_free_slow(pmd_t *pmd)
{
if ((unsigned long)pmd & (PAGE_SIZE-1))
BUG();
free_page((unsigned long)pmd);
}
static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
{
unsigned long *ret;
preempt_disable();
ret = (unsigned long *)read_pda(pmd_quick);
if (__builtin_expect(ret != NULL, 1)) {
write_pda(pmd_quick, (unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
return (pmd_t *)ret;
}
static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
{
pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
if (__builtin_expect(pmd != NULL, 1))
clear_page(pmd);
return pmd;
}
static inline pgd_t *pgd_alloc_one_fast (void)
{
unsigned long *ret;
preempt_disable();
ret = read_pda(pgd_quick);
if (likely(ret != NULL)) {
write_pda(pgd_quick,(unsigned long *)(*ret));
ret[0] = 0;
dec_pgcache_size();
}
preempt_enable();
return (pgd_t *) ret;
return (pmd_t *) get_zeroed_page(GFP_KERNEL);
}
static inline pgd_t *pgd_alloc (struct mm_struct *mm)
{
/* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
pgd_t *pgd = pgd_alloc_one_fast();
if (pgd == NULL) {
pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
if (__builtin_expect(pgd != NULL, 1))
clear_page(pgd);
}
return pgd;
return (pgd_t *)get_zeroed_page(GFP_KERNEL);
}
static inline void pgd_free (pgd_t *pgd)
{
preempt_disable();
*(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick);
write_pda(pgd_quick,(unsigned long *) pgd);
inc_pgcache_size();
preempt_enable();
}
static inline void pgd_free_slow (pgd_t *pgd)
{
if ((unsigned long)pgd & (PAGE_SIZE-1))
BUG();
free_page((unsigned long)pgd);
}
static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
pte = (pte_t *) __get_free_page(GFP_KERNEL);
if (pte)
clear_page(pte);
return pte;
return (pte_t *) get_zeroed_page(GFP_KERNEL);
}
extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
unsigned long *ret;
preempt_disable();
if(__builtin_expect((ret = read_pda(pte_quick)) != NULL, !0)) {
write_pda(pte_quick, (unsigned long *)(*ret));
ret[0] = ret[1];
dec_pgcache_size();
}
preempt_enable();
return (pte_t *)ret;
void *p = (void *)get_zeroed_page(GFP_KERNEL);
if (!p)
return NULL;
return virt_to_page(p);
}
/* Should really implement gc for free page table pages. This could be done with
a reference count in struct page. */
extern __inline__ void pte_free(pte_t *pte)
{
preempt_disable();
*(unsigned long *)pte = (unsigned long) read_pda(pte_quick);
write_pda(pte_quick, (unsigned long *) pte);
inc_pgcache_size();
preempt_enable();
}
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
extern __inline__ void pte_free_slow(pte_t *pte)
extern __inline__ void pte_free_kernel(pte_t *pte)
{
if ((unsigned long)pte & (PAGE_SIZE-1))
BUG();
free_page((unsigned long)pte);
}
extern inline void pte_free(struct page *pte)
{
__free_page(pte);
}
extern int do_check_pgt_cache(int, int);
/*
* TLB flushing:
......
......@@ -26,7 +26,7 @@ extern pgd_t level3_ident_pgt[512], swapper_pg_dir[512];
extern pmd_t level2_kernel_pgt[512];
extern void paging_init(void);
/* Caches aren't brain-dead on the intel. */
/* Caches aren't brain-dead. */
#define flush_cache_all() do { } while (0)
#define flush_cache_mm(mm) do { } while (0)
#define flush_cache_range(vma, start, end) do { } while (0)
......@@ -35,6 +35,7 @@ extern void paging_init(void);
#define flush_dcache_page(page) do { } while (0)
#define flush_icache_range(start, end) do { } while (0)
#define flush_icache_page(vma,pg) do { } while (0)
#define flush_icache_user_range(vma,pg,adr,len) do { } while (0)
#define __flush_tlb() \
do { \
......@@ -341,8 +342,10 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define page_pte(page) page_pte_prot(page, __pgprot(0))
#define pmd_page(pmd) \
#define pmd_page_kernel(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
#define pmd_page(pmd) \
(mem_map + (pmd_val(pmd) >> PAGE_SHIFT))
/* to find an entry in a page-table-directory. */
#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
......@@ -360,9 +363,15 @@ extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
/* Find an entry in the third-level page table.. */
#define __pte_offset(address) \
((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
__pte_offset(address))
#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
#define pte_unmap(pte) /* NOP */
#define pte_unmap_nested(pte) /* NOP */
/* never use these in the common code */
#define level4_page(level4) ((unsigned long) __va(level4_val(level4) & PAGE_MASK))
#define level4_index(address) ((address >> LEVEL4_SHIFT) & (PTRS_PER_LEVEL4-1))
......
......@@ -18,7 +18,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
#define prepare_to_switch() do { } while(0)
#define switch_to(prev,next,last) do { \
#define switch_to(prev,next) do { \
asm volatile("pushq %%rbp\n\t" \
"pushq %%rbx\n\t" \
"pushq %%r8\n\t" \
......@@ -30,10 +30,10 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
"pushq %%r14\n\t" \
"pushq %%r15\n\t" \
"movq %%rsp,%0\n\t" /* save RSP */ \
"movq %3,%%rsp\n\t" /* restore RSP */ \
"movq %2,%%rsp\n\t" /* restore RSP */ \
"leaq 1f(%%rip),%%rbp\n\t" \
"movq %%rbp,%1\n\t" /* save RIP */ \
"pushq %4\n\t" /* setup new RIP */ \
"pushq %3\n\t" /* setup new RIP */ \
"jmp __switch_to\n\t" \
"1:\t" \
"popq %%r15\n\t" \
......@@ -46,8 +46,7 @@ extern void __switch_to(struct task_struct *prev, struct task_struct *next);
"popq %%r8\n\t" \
"popq %%rbx\n\t" \
"popq %%rbp\n\t" \
:"=m" (prev->thread.rsp),"=m" (prev->thread.rip), \
"=b" (last) \
:"=m" (prev->thread.rsp),"=m" (prev->thread.rip) \
:"m" (next->thread.rsp),"m" (next->thread.rip), \
"b" (prev), "S" (next), "D" (prev)); \
} while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment