Commit 7669a225 authored by Tony Luck's avatar Tony Luck

Pull context-bitmap into release branch

parents cb8a55e4 58cd9082
...@@ -461,6 +461,7 @@ setup_arch (char **cmdline_p) ...@@ -461,6 +461,7 @@ setup_arch (char **cmdline_p)
#endif #endif
cpu_init(); /* initialize the bootstrap CPU */ cpu_init(); /* initialize the bootstrap CPU */
mmu_context_init(); /* initialize context_id bitmap */
#ifdef CONFIG_ACPI #ifdef CONFIG_ACPI
acpi_boot_init(); acpi_boot_init();
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
* Modified RID allocation for SMP * Modified RID allocation for SMP
* Goutham Rao <goutham.rao@intel.com> * Goutham Rao <goutham.rao@intel.com>
* IPI based ptc implementation and A-step IPI implementation. * IPI based ptc implementation and A-step IPI implementation.
* Rohit Seth <rohit.seth@intel.com>
* Ken Chen <kenneth.w.chen@intel.com>
*/ */
#include <linux/config.h> #include <linux/config.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -16,78 +18,75 @@ ...@@ -16,78 +18,75 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/bootmem.h>
#include <asm/delay.h> #include <asm/delay.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/pal.h> #include <asm/pal.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/dma.h>
static struct { static struct {
unsigned long mask; /* mask of supported purge page-sizes */ unsigned long mask; /* mask of supported purge page-sizes */
unsigned long max_bits; /* log2() of largest supported purge page-size */ unsigned long max_bits; /* log2 of largest supported purge page-size */
} purge; } purge;
struct ia64_ctx ia64_ctx = { struct ia64_ctx ia64_ctx = {
.lock = SPIN_LOCK_UNLOCKED, .lock = SPIN_LOCK_UNLOCKED,
.next = 1, .next = 1,
.limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
.max_ctx = ~0U .max_ctx = ~0U
}; };
DEFINE_PER_CPU(u8, ia64_need_tlb_flush); DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
/*
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
* Called after cpu_init() has setup ia64_ctx.max_ctx based on
* maximum RID that is supported by boot CPU.
*/
void __init
mmu_context_init (void)
{
ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
}
/* /*
* Acquire the ia64_ctx.lock before calling this function! * Acquire the ia64_ctx.lock before calling this function!
*/ */
void void
wrap_mmu_context (struct mm_struct *mm) wrap_mmu_context (struct mm_struct *mm)
{ {
unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; int i, cpu;
struct task_struct *tsk; unsigned long flush_bit;
int i;
if (ia64_ctx.next > max_ctx) for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
ia64_ctx.next = 300; /* skip daemons */ flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
ia64_ctx.limit = max_ctx + 1; ia64_ctx.bitmap[i] ^= flush_bit;
}
/* use offset at 300 to skip daemons */
ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, 300);
ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, ia64_ctx.next);
/* /*
* Scan all the task's mm->context and set proper safe range * can't call flush_tlb_all() here because of race condition
* with O(1) scheduler [EF]
*/ */
cpu = get_cpu(); /* prevent preemption/migration */
read_lock(&tasklist_lock); for_each_online_cpu(i)
repeat: if (i != cpu)
for_each_process(tsk) { per_cpu(ia64_need_tlb_flush, i) = 1;
if (!tsk->mm) put_cpu();
continue;
tsk_context = tsk->mm->context;
if (tsk_context == ia64_ctx.next) {
if (++ia64_ctx.next >= ia64_ctx.limit) {
/* empty range: reset the range limit and start over */
if (ia64_ctx.next > max_ctx)
ia64_ctx.next = 300;
ia64_ctx.limit = max_ctx + 1;
goto repeat;
}
}
if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
ia64_ctx.limit = tsk_context;
}
read_unlock(&tasklist_lock);
/* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
{
int cpu = get_cpu(); /* prevent preemption/migration */
for_each_online_cpu(i) {
if (i != cpu)
per_cpu(ia64_need_tlb_flush, i) = 1;
}
put_cpu();
}
local_flush_tlb_all(); local_flush_tlb_all();
} }
void void
ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned long nbits)
{ {
static DEFINE_SPINLOCK(ptcg_lock); static DEFINE_SPINLOCK(ptcg_lock);
...@@ -135,7 +134,8 @@ local_flush_tlb_all (void) ...@@ -135,7 +134,8 @@ local_flush_tlb_all (void)
} }
void void
flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long size = end - start; unsigned long size = end - start;
...@@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long ...@@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
#endif #endif
nbits = ia64_fls(size + 0xfff); nbits = ia64_fls(size + 0xfff);
while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
(nbits < purge.max_bits))
++nbits; ++nbits;
if (nbits > purge.max_bits) if (nbits > purge.max_bits)
nbits = purge.max_bits; nbits = purge.max_bits;
...@@ -191,5 +192,5 @@ ia64_tlb_init (void) ...@@ -191,5 +192,5 @@ ia64_tlb_init (void)
local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
} }
...@@ -7,12 +7,13 @@ ...@@ -7,12 +7,13 @@
*/ */
/* /*
* Routines to manage the allocation of task context numbers. Task context numbers are * Routines to manage the allocation of task context numbers. Task context
* used to reduce or eliminate the need to perform TLB flushes due to context switches. * numbers are used to reduce or eliminate the need to perform TLB flushes
* Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not * due to context switches. Context numbers are implemented using ia-64
* consider the region number when performing a TLB lookup, we need to assign a unique * region ids. Since the IA-64 TLB does not consider the region number when
* region id to each region in a process. We use the least significant three bits in a * performing a TLB lookup, we need to assign a unique region id to each
* region id for this purpose. * region in a process. We use the least significant three bits in aregion
* id for this purpose.
*/ */
#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ #define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */
...@@ -32,13 +33,17 @@ ...@@ -32,13 +33,17 @@
struct ia64_ctx { struct ia64_ctx {
spinlock_t lock; spinlock_t lock;
unsigned int next; /* next context number to use */ unsigned int next; /* next context number to use */
unsigned int limit; /* next >= limit => must call wrap_mmu_context() */ unsigned int limit; /* available free range */
unsigned int max_ctx; /* max. context value supported by all CPUs */ unsigned int max_ctx; /* max. context value supported by all CPUs */
/* call wrap_mmu_context when next >= max */
unsigned long *bitmap; /* bitmap size is max_ctx+1 */
unsigned long *flushmap;/* pending rid to be flushed */
}; };
extern struct ia64_ctx ia64_ctx; extern struct ia64_ctx ia64_ctx;
DECLARE_PER_CPU(u8, ia64_need_tlb_flush); DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
extern void mmu_context_init (void);
extern void wrap_mmu_context (struct mm_struct *mm); extern void wrap_mmu_context (struct mm_struct *mm);
static inline void static inline void
...@@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk) ...@@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
} }
/* /*
* When the context counter wraps around all TLBs need to be flushed because an old * When the context counter wraps around all TLBs need to be flushed because
* context number might have been reused. This is signalled by the ia64_need_tlb_flush * an old context number might have been reused. This is signalled by the
* per-CPU variable, which is checked in the routine below. Called by activate_mm(). * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
* <efocht@ess.nec.de> * below. Called by activate_mm(). <efocht@ess.nec.de>
*/ */
static inline void static inline void
delayed_tlb_flush (void) delayed_tlb_flush (void)
...@@ -60,11 +65,9 @@ delayed_tlb_flush (void) ...@@ -60,11 +65,9 @@ delayed_tlb_flush (void)
if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
spin_lock_irqsave(&ia64_ctx.lock, flags); spin_lock_irqsave(&ia64_ctx.lock, flags);
{ if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { local_flush_tlb_all();
local_flush_tlb_all(); __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
__ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
}
} }
spin_unlock_irqrestore(&ia64_ctx.lock, flags); spin_unlock_irqrestore(&ia64_ctx.lock, flags);
} }
...@@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm) ...@@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm)
unsigned long flags; unsigned long flags;
nv_mm_context_t context = mm->context; nv_mm_context_t context = mm->context;
if (unlikely(!context)) { if (likely(context))
spin_lock_irqsave(&ia64_ctx.lock, flags); goto out;
{
/* re-check, now that we've got the lock: */ spin_lock_irqsave(&ia64_ctx.lock, flags);
context = mm->context; /* re-check, now that we've got the lock: */
if (context == 0) { context = mm->context;
cpus_clear(mm->cpu_vm_mask); if (context == 0) {
if (ia64_ctx.next >= ia64_ctx.limit) cpus_clear(mm->cpu_vm_mask);
wrap_mmu_context(mm); if (ia64_ctx.next >= ia64_ctx.limit) {
mm->context = context = ia64_ctx.next++; ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
} ia64_ctx.max_ctx, ia64_ctx.next);
ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
ia64_ctx.max_ctx, ia64_ctx.next);
if (ia64_ctx.next >= ia64_ctx.max_ctx)
wrap_mmu_context(mm);
} }
spin_unlock_irqrestore(&ia64_ctx.lock, flags); mm->context = context = ia64_ctx.next++;
__set_bit(context, ia64_ctx.bitmap);
} }
spin_unlock_irqrestore(&ia64_ctx.lock, flags);
out:
/* /*
* Ensure we're not starting to use "context" before any old * Ensure we're not starting to use "context" before any old
* uses of it are gone from our TLB. * uses of it are gone from our TLB.
...@@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm) ...@@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm)
} }
/* /*
* Initialize context number to some sane value. MM is guaranteed to be a brand-new * Initialize context number to some sane value. MM is guaranteed to be a
* address-space, so no TLB flushing is needed, ever. * brand-new address-space, so no TLB flushing is needed, ever.
*/ */
static inline int static inline int
init_new_context (struct task_struct *p, struct mm_struct *mm) init_new_context (struct task_struct *p, struct mm_struct *mm)
...@@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm) ...@@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm)
if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
cpu_set(smp_processor_id(), mm->cpu_vm_mask); cpu_set(smp_processor_id(), mm->cpu_vm_mask);
reload_context(context); reload_context(context);
/* in the unlikely event of a TLB-flush by another thread, redo the load: */ /*
* in the unlikely event of a TLB-flush by another thread,
* redo the load.
*/
} while (unlikely(context != mm->context)); } while (unlikely(context != mm->context));
} }
...@@ -175,8 +188,8 @@ static inline void ...@@ -175,8 +188,8 @@ static inline void
activate_mm (struct mm_struct *prev, struct mm_struct *next) activate_mm (struct mm_struct *prev, struct mm_struct *next)
{ {
/* /*
* We may get interrupts here, but that's OK because interrupt handlers cannot * We may get interrupts here, but that's OK because interrupt
* touch user-space. * handlers cannot touch user-space.
*/ */
ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
activate_context(next); activate_context(next);
......
...@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm) ...@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
if (!mm) if (!mm)
return; return;
set_bit(mm->context, ia64_ctx.flushmap);
mm->context = 0; mm->context = 0;
if (atomic_read(&mm->mm_users) == 0) if (atomic_read(&mm->mm_users) == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment