Commit fb8f6499 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] remove the BKL by turning it into a semaphore

This is the current remove-BKL patch.  I test-booted it on x86 and x64, trying
every conceivable combination of SMP, PREEMPT and PREEMPT_BKL.  All other
architectures should compile as well.  (most of the testing was done with the
zaphod patch undone but it applies cleanly on vanilla -mm3 as well and should
work fine.)

this is the debugging-enabled variant of the patch which has two main
debugging features:

 - debug potentially illegal smp_processor_id() use. Has caught a number
   of real bugs - e.g. look at the printk.c fix in the patch.

 - make it possible to enable/disable the BKL via a .config. If this 
   goes upstream we dont want this of course, but for now it gives
   people a chance to find out whether any particular problem was caused
   by this patch.

This patch has one important fix over the previous BKL patch: on PREEMPT
kernels if we preempted BKL-using code then the code still auto-dropped the
BKL by mistake.  This caused a number of breakages for testers, which
breakages went away once this bug was fixed.

Also the debugging mechanism has been improved alot relative to the previous
BKL patch.

Would be nice to test-drive this in -mm.  There will likely be some more
smp_processor_id() false positives but they are 1) harmless 2) easy to fix up.
We could as well find more real smp_processor_id() related breakages as well.

The most noteworthy fact is that no BKL-using code was found yet that relied
on smp_processor_id(), which is promising from a compatibility POV.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 8a1a48b7
...@@ -517,6 +517,17 @@ config PREEMPT ...@@ -517,6 +517,17 @@ config PREEMPT
Say Y here if you are building a kernel for a desktop, embedded Say Y here if you are building a kernel for a desktop, embedded
or real-time system. Say N if you are unsure. or real-time system. Say N if you are unsure.
config PREEMPT_BKL
bool "Preempt The Big Kernel Lock"
depends on PREEMPT || SMP
default y
help
This option reduces the latency of the kernel by making the
big kernel lock preemptible.
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.
config X86_UP_APIC config X86_UP_APIC
bool "Local APIC support on uniprocessors" if !SMP bool "Local APIC support on uniprocessors" if !SMP
depends on !(X86_VISWS || X86_VOYAGER) depends on !(X86_VISWS || X86_VOYAGER)
......
...@@ -145,7 +145,7 @@ static void poll_idle (void) ...@@ -145,7 +145,7 @@ static void poll_idle (void)
*/ */
void cpu_idle (void) void cpu_idle (void)
{ {
int cpu = smp_processor_id(); int cpu = _smp_processor_id();
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
while (1) { while (1) {
......
...@@ -306,7 +306,7 @@ void die(const char * str, struct pt_regs * regs, long err) ...@@ -306,7 +306,7 @@ void die(const char * str, struct pt_regs * regs, long err)
}; };
static int die_counter; static int die_counter;
if (die.lock_owner != smp_processor_id()) { if (die.lock_owner != _smp_processor_id()) {
console_verbose(); console_verbose();
spin_lock_irq(&die.lock); spin_lock_irq(&die.lock);
die.lock_owner = smp_processor_id(); die.lock_owner = smp_processor_id();
......
...@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long xloops) ...@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long xloops)
xloops *= 4; xloops *= 4;
__asm__("mull %0" __asm__("mull %0"
:"=d" (xloops), "=&a" (d0) :"=d" (xloops), "=&a" (d0)
:"1" (xloops),"0" (current_cpu_data.loops_per_jiffy * (HZ/4))); :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
__delay(++xloops); __delay(++xloops);
} }
......
...@@ -24,7 +24,7 @@ inline void __const_udelay(unsigned long xloops) ...@@ -24,7 +24,7 @@ inline void __const_udelay(unsigned long xloops)
__asm__("dmulu.l %0, %2\n\t" __asm__("dmulu.l %0, %2\n\t"
"sts mach, %0" "sts mach, %0"
: "=r" (xloops) : "=r" (xloops)
: "0" (xloops), "r" (current_cpu_data.loops_per_jiffy) : "0" (xloops), "r" (cpu_data[_smp_processor_id()].loops_per_jiffy)
: "macl", "mach"); : "macl", "mach");
__delay(xloops * HZ); __delay(xloops * HZ);
} }
......
...@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n) ...@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n)
{ {
n *= 4; n *= 4;
n *= (cpu_data(smp_processor_id()).udelay_val * (HZ/4)); n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4));
n >>= 32; n >>= 32;
__delay(n + 1); __delay(n + 1);
......
...@@ -249,6 +249,17 @@ config PREEMPT ...@@ -249,6 +249,17 @@ config PREEMPT
Say Y here if you are feeling brave and building a kernel for a Say Y here if you are feeling brave and building a kernel for a
desktop, embedded or real-time system. Say N if you are unsure. desktop, embedded or real-time system. Say N if you are unsure.
config PREEMPT_BKL
bool "Preempt The Big Kernel Lock"
depends on PREEMPT || SMP
default y
help
This option reduces the latency of the kernel by making the
big kernel lock preemptible.
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.
config SCHED_SMT config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support" bool "SMT (Hyperthreading) scheduler support"
depends on SMP depends on SMP
......
...@@ -34,7 +34,7 @@ void __delay(unsigned long loops) ...@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
inline void __const_udelay(unsigned long xloops) inline void __const_udelay(unsigned long xloops)
{ {
__delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ); __delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ);
} }
void __udelay(unsigned long usecs) void __udelay(unsigned long usecs)
......
...@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[]; ...@@ -50,7 +50,7 @@ extern u8 x86_cpu_to_apicid[];
* from the initial startup. We map APIC_BASE very early in page_setup(), * from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case. * so this is correct in the x86 case.
*/ */
#define smp_processor_id() (current_thread_info()->cpu) #define __smp_processor_id() (current_thread_info()->cpu)
extern cpumask_t cpu_callout_map; extern cpumask_t cpu_callout_map;
#define cpu_possible_map cpu_callout_map #define cpu_possible_map cpu_callout_map
......
...@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void) ...@@ -66,7 +66,7 @@ static inline int num_booting_cpus(void)
return cpus_weight(cpu_callout_map); return cpus_weight(cpu_callout_map);
} }
#define smp_processor_id() read_pda(cpunumber) #define __smp_processor_id() read_pda(cpunumber)
extern __inline int hard_smp_processor_id(void) extern __inline int hard_smp_processor_id(void)
{ {
......
...@@ -61,12 +61,16 @@ ...@@ -61,12 +61,16 @@
#define in_softirq() (softirq_count()) #define in_softirq() (softirq_count())
#define in_interrupt() (irq_count()) #define in_interrupt() (irq_count())
#ifdef CONFIG_PREEMPT #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked()) # define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
#else
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
#endif
#ifdef CONFIG_PREEMPT
# define preemptible() (preempt_count() == 0 && !irqs_disabled()) # define preemptible() (preempt_count() == 0 && !irqs_disabled())
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1) # define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
#else #else
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
# define preemptible() 0 # define preemptible() 0
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
#endif #endif
...@@ -77,10 +81,10 @@ extern void synchronize_irq(unsigned int irq); ...@@ -77,10 +81,10 @@ extern void synchronize_irq(unsigned int irq);
# define synchronize_irq(irq) barrier() # define synchronize_irq(irq) barrier()
#endif #endif
#define nmi_enter() (preempt_count() += HARDIRQ_OFFSET) #define nmi_enter() irq_enter()
#define nmi_exit() (preempt_count() -= HARDIRQ_OFFSET) #define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET)
#define irq_enter() (preempt_count() += HARDIRQ_OFFSET) #define irq_enter() add_preempt_count(HARDIRQ_OFFSET)
extern void irq_exit(void); extern void irq_exit(void);
#endif /* LINUX_HARDIRQ_H */ #endif /* LINUX_HARDIRQ_H */
...@@ -70,9 +70,9 @@ extern void enable_irq(unsigned int irq); ...@@ -70,9 +70,9 @@ extern void enable_irq(unsigned int irq);
/* SoftIRQ primitives. */ /* SoftIRQ primitives. */
#define local_bh_disable() \ #define local_bh_disable() \
do { preempt_count() += SOFTIRQ_OFFSET; barrier(); } while (0) do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0)
#define __local_bh_enable() \ #define __local_bh_enable() \
do { barrier(); preempt_count() -= SOFTIRQ_OFFSET; } while (0) do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
extern void local_bh_enable(void); extern void local_bh_enable(void);
......
...@@ -9,17 +9,18 @@ ...@@ -9,17 +9,18 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/linkage.h> #include <linux/linkage.h>
#define preempt_count() (current_thread_info()->preempt_count) #ifdef CONFIG_DEBUG_PREEMPT
extern void fastcall add_preempt_count(int val);
extern void fastcall sub_preempt_count(int val);
#else
# define add_preempt_count(val) do { preempt_count() += (val); } while (0)
# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0)
#endif
#define inc_preempt_count() \ #define inc_preempt_count() add_preempt_count(1)
do { \ #define dec_preempt_count() sub_preempt_count(1)
preempt_count()++; \
} while (0)
#define dec_preempt_count() \ #define preempt_count() (current_thread_info()->preempt_count)
do { \
preempt_count()--; \
} while (0)
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
......
...@@ -98,7 +98,9 @@ void smp_prepare_boot_cpu(void); ...@@ -98,7 +98,9 @@ void smp_prepare_boot_cpu(void);
* These macros fold the SMP functionality into a single CPU system * These macros fold the SMP functionality into a single CPU system
*/ */
#define smp_processor_id() 0 #if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
# define smp_processor_id() 0
#endif
#define hard_smp_processor_id() 0 #define hard_smp_processor_id() 0
#define smp_threads_ready 1 #define smp_threads_ready 1
#define smp_call_function(func,info,retry,wait) ({ 0; }) #define smp_call_function(func,info,retry,wait) ({ 0; })
...@@ -109,6 +111,33 @@ static inline void smp_send_reschedule(int cpu) { } ...@@ -109,6 +111,33 @@ static inline void smp_send_reschedule(int cpu) { }
#endif /* !SMP */ #endif /* !SMP */
/*
* DEBUG_PREEMPT support: check whether smp_processor_id() is being
* used in a preemption-safe way.
*
* An architecture has to enable this debugging code explicitly.
* It can do so by renaming the smp_processor_id() macro to
* __smp_processor_id(). This should only be done after some minimal
* testing, because usually there are a number of false positives
* that an architecture will trigger.
*
* To fix a false positive (i.e. smp_processor_id() use that the
* debugging code reports but which use for some reason is legal),
* change the smp_processor_id() reference to _smp_processor_id(),
* which is the nondebug variant. NOTE: don't use this to hack around
* real bugs.
*/
#ifdef __smp_processor_id
# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
extern unsigned int smp_processor_id(void);
# else
# define smp_processor_id() __smp_processor_id()
# endif
# define _smp_processor_id() __smp_processor_id()
#else
# define _smp_processor_id() smp_processor_id()
#endif
#define get_cpu() ({ preempt_disable(); smp_processor_id(); }) #define get_cpu() ({ preempt_disable(); smp_processor_id(); })
#define put_cpu() preempt_enable() #define put_cpu() preempt_enable()
#define put_cpu_no_resched() preempt_enable_no_resched() #define put_cpu_no_resched() preempt_enable_no_resched()
......
...@@ -9,15 +9,15 @@ ...@@ -9,15 +9,15 @@
#define kernel_locked() (current->lock_depth >= 0) #define kernel_locked() (current->lock_depth >= 0)
extern int __lockfunc get_kernel_lock(void); extern int __lockfunc __reacquire_kernel_lock(void);
extern void __lockfunc put_kernel_lock(void); extern void __lockfunc __release_kernel_lock(void);
/* /*
* Release/re-acquire global kernel lock for the scheduler * Release/re-acquire global kernel lock for the scheduler
*/ */
#define release_kernel_lock(tsk) do { \ #define release_kernel_lock(tsk) do { \
if (unlikely((tsk)->lock_depth >= 0)) \ if (unlikely((tsk)->lock_depth >= 0)) \
put_kernel_lock(); \ __release_kernel_lock(); \
} while (0) } while (0)
/* /*
...@@ -26,16 +26,16 @@ extern void __lockfunc put_kernel_lock(void); ...@@ -26,16 +26,16 @@ extern void __lockfunc put_kernel_lock(void);
* reacquire_kernel_lock() so that the compiler can see * reacquire_kernel_lock() so that the compiler can see
* it at compile-time. * it at compile-time.
*/ */
#ifdef CONFIG_SMP #if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_BKL)
#define return_value_on_smp return # define return_value_on_smp return
#else #else
#define return_value_on_smp # define return_value_on_smp
#endif #endif
static inline int reacquire_kernel_lock(struct task_struct *task) static inline int reacquire_kernel_lock(struct task_struct *task)
{ {
if (unlikely(task->lock_depth >= 0)) if (unlikely(task->lock_depth >= 0))
return_value_on_smp get_kernel_lock(); return_value_on_smp __reacquire_kernel_lock();
return 0; return 0;
} }
......
...@@ -105,7 +105,7 @@ struct rt_cache_stat ...@@ -105,7 +105,7 @@ struct rt_cache_stat
extern struct rt_cache_stat *rt_cache_stat; extern struct rt_cache_stat *rt_cache_stat;
#define RT_CACHE_STAT_INC(field) \ #define RT_CACHE_STAT_INC(field) \
(per_cpu_ptr(rt_cache_stat, smp_processor_id())->field++) (per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)
extern struct ip_rt_acct *ip_rt_acct; extern struct ip_rt_acct *ip_rt_acct;
......
...@@ -128,18 +128,18 @@ struct linux_mib { ...@@ -128,18 +128,18 @@ struct linux_mib {
#define SNMP_STAT_USRPTR(name) (name[1]) #define SNMP_STAT_USRPTR(name) (name[1])
#define SNMP_INC_STATS_BH(mib, field) \ #define SNMP_INC_STATS_BH(mib, field) \
(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field]++) (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
#define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \ #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \
(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field + (offset)]++) (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
#define SNMP_INC_STATS_USER(mib, field) \ #define SNMP_INC_STATS_USER(mib, field) \
(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field]++) (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
#define SNMP_INC_STATS(mib, field) \ #define SNMP_INC_STATS(mib, field) \
(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]++) (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
#define SNMP_DEC_STATS(mib, field) \ #define SNMP_DEC_STATS(mib, field) \
(per_cpu_ptr(mib[!in_softirq()], smp_processor_id())->mibs[field]--) (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
#define SNMP_ADD_STATS_BH(mib, field, addend) \ #define SNMP_ADD_STATS_BH(mib, field, addend) \
(per_cpu_ptr(mib[0], smp_processor_id())->mibs[field] += addend) (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
#define SNMP_ADD_STATS_USER(mib, field, addend) \ #define SNMP_ADD_STATS_USER(mib, field, addend) \
(per_cpu_ptr(mib[1], smp_processor_id())->mibs[field] += addend) (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)
#endif #endif
...@@ -444,6 +444,10 @@ asmlinkage void __init start_kernel(void) ...@@ -444,6 +444,10 @@ asmlinkage void __init start_kernel(void)
* time - but meanwhile we still have a functioning scheduler. * time - but meanwhile we still have a functioning scheduler.
*/ */
sched_init(); sched_init();
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable(); preempt_disable();
build_all_zonelists(); build_all_zonelists();
page_alloc_init(); page_alloc_init();
......
...@@ -379,7 +379,7 @@ static void module_unload_init(struct module *mod) ...@@ -379,7 +379,7 @@ static void module_unload_init(struct module *mod)
for (i = 0; i < NR_CPUS; i++) for (i = 0; i < NR_CPUS; i++)
local_set(&mod->ref[i].count, 0); local_set(&mod->ref[i].count, 0);
/* Hold reference count during initialization. */ /* Hold reference count during initialization. */
local_set(&mod->ref[smp_processor_id()].count, 1); local_set(&mod->ref[_smp_processor_id()].count, 1);
/* Backwards compatibility macros put refcount during init. */ /* Backwards compatibility macros put refcount during init. */
mod->waiter = current; mod->waiter = current;
} }
......
...@@ -645,8 +645,9 @@ void release_console_sem(void) ...@@ -645,8 +645,9 @@ void release_console_sem(void)
_con_start = con_start; _con_start = con_start;
_log_end = log_end; _log_end = log_end;
con_start = log_end; /* Flush */ con_start = log_end; /* Flush */
spin_unlock_irqrestore(&logbuf_lock, flags); spin_unlock(&logbuf_lock);
call_console_drivers(_con_start, _log_end); call_console_drivers(_con_start, _log_end);
local_irq_restore(flags);
} }
console_locked = 0; console_locked = 0;
console_may_schedule = 0; console_may_schedule = 0;
......
...@@ -2506,6 +2506,38 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq) ...@@ -2506,6 +2506,38 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
} }
#endif #endif
#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
void fastcall add_preempt_count(int val)
{
/*
* Underflow?
*/
BUG_ON(((int)preempt_count() < 0));
preempt_count() += val;
/*
* Spinlock count overflowing soon?
*/
BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
}
EXPORT_SYMBOL(add_preempt_count);
void fastcall sub_preempt_count(int val)
{
/*
* Underflow?
*/
BUG_ON(val > preempt_count());
/*
* Is the spinlock portion underflowing?
*/
BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
#endif
/* /*
* schedule() is the main scheduler function. * schedule() is the main scheduler function.
*/ */
...@@ -2688,7 +2720,10 @@ EXPORT_SYMBOL(schedule); ...@@ -2688,7 +2720,10 @@ EXPORT_SYMBOL(schedule);
asmlinkage void __sched preempt_schedule(void) asmlinkage void __sched preempt_schedule(void)
{ {
struct thread_info *ti = current_thread_info(); struct thread_info *ti = current_thread_info();
#ifdef CONFIG_PREEMPT_BKL
struct task_struct *task = current;
int saved_lock_depth;
#endif
/* /*
* If there is a non-zero preempt_count or interrupts are disabled, * If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return.. * we do not want to preempt the current task. Just return..
...@@ -2697,9 +2732,21 @@ asmlinkage void __sched preempt_schedule(void) ...@@ -2697,9 +2732,21 @@ asmlinkage void __sched preempt_schedule(void)
return; return;
need_resched: need_resched:
ti->preempt_count = PREEMPT_ACTIVE; add_preempt_count(PREEMPT_ACTIVE);
/*
* We keep the big kernel semaphore locked, but we
* clear ->lock_depth so that schedule() doesnt
* auto-release the semaphore:
*/
#ifdef CONFIG_PREEMPT_BKL
saved_lock_depth = task->lock_depth;
task->lock_depth = -1;
#endif
schedule(); schedule();
ti->preempt_count = 0; #ifdef CONFIG_PREEMPT_BKL
task->lock_depth = saved_lock_depth;
#endif
sub_preempt_count(PREEMPT_ACTIVE);
/* we could miss a preemption opportunity between schedule and now */ /* we could miss a preemption opportunity between schedule and now */
barrier(); barrier();
...@@ -3436,9 +3483,9 @@ asmlinkage long sys_sched_yield(void) ...@@ -3436,9 +3483,9 @@ asmlinkage long sys_sched_yield(void)
static inline void __cond_resched(void) static inline void __cond_resched(void)
{ {
do { do {
preempt_count() += PREEMPT_ACTIVE; add_preempt_count(PREEMPT_ACTIVE);
schedule(); schedule();
preempt_count() -= PREEMPT_ACTIVE; sub_preempt_count(PREEMPT_ACTIVE);
} while (need_resched()); } while (need_resched());
} }
...@@ -3522,7 +3569,7 @@ EXPORT_SYMBOL(yield); ...@@ -3522,7 +3569,7 @@ EXPORT_SYMBOL(yield);
*/ */
void __sched io_schedule(void) void __sched io_schedule(void)
{ {
struct runqueue *rq = this_rq(); struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
atomic_inc(&rq->nr_iowait); atomic_inc(&rq->nr_iowait);
schedule(); schedule();
...@@ -3533,7 +3580,7 @@ EXPORT_SYMBOL(io_schedule); ...@@ -3533,7 +3580,7 @@ EXPORT_SYMBOL(io_schedule);
long __sched io_schedule_timeout(long timeout) long __sched io_schedule_timeout(long timeout)
{ {
struct runqueue *rq = this_rq(); struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
long ret; long ret;
atomic_inc(&rq->nr_iowait); atomic_inc(&rq->nr_iowait);
...@@ -3741,7 +3788,7 @@ void __devinit init_idle(task_t *idle, int cpu) ...@@ -3741,7 +3788,7 @@ void __devinit init_idle(task_t *idle, int cpu)
spin_unlock_irqrestore(&rq->lock, flags); spin_unlock_irqrestore(&rq->lock, flags);
/* Set the preempt count _outside_ the spinlocks! */ /* Set the preempt count _outside_ the spinlocks! */
#ifdef CONFIG_PREEMPT #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
idle->thread_info->preempt_count = (idle->lock_depth >= 0); idle->thread_info->preempt_count = (idle->lock_depth >= 0);
#else #else
idle->thread_info->preempt_count = 0; idle->thread_info->preempt_count = 0;
......
...@@ -142,7 +142,7 @@ void local_bh_enable(void) ...@@ -142,7 +142,7 @@ void local_bh_enable(void)
* Keep preemption disabled until we are done with * Keep preemption disabled until we are done with
* softirq processing: * softirq processing:
*/ */
preempt_count() -= SOFTIRQ_OFFSET - 1; sub_preempt_count(SOFTIRQ_OFFSET - 1);
if (unlikely(!in_interrupt() && local_softirq_pending())) if (unlikely(!in_interrupt() && local_softirq_pending()))
do_softirq(); do_softirq();
...@@ -163,7 +163,7 @@ EXPORT_SYMBOL(local_bh_enable); ...@@ -163,7 +163,7 @@ EXPORT_SYMBOL(local_bh_enable);
*/ */
void irq_exit(void) void irq_exit(void)
{ {
preempt_count() -= IRQ_EXIT_OFFSET; sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending()) if (!in_interrupt() && local_softirq_pending())
invoke_softirq(); invoke_softirq();
preempt_enable_no_resched(); preempt_enable_no_resched();
......
...@@ -95,7 +95,7 @@ static int stop_machine(void) ...@@ -95,7 +95,7 @@ static int stop_machine(void)
stopmachine_state = STOPMACHINE_WAIT; stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) { for_each_online_cpu(i) {
if (i == smp_processor_id()) if (i == _smp_processor_id())
continue; continue;
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
if (ret < 0) if (ret < 0)
...@@ -177,7 +177,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, ...@@ -177,7 +177,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
/* If they don't care which CPU fn runs on, bind to any online one. */ /* If they don't care which CPU fn runs on, bind to any online one. */
if (cpu == NR_CPUS) if (cpu == NR_CPUS)
cpu = smp_processor_id(); cpu = _smp_processor_id();
p = kthread_create(do_stop, &smdata, "kstopmachine"); p = kthread_create(do_stop, &smdata, "kstopmachine");
if (!IS_ERR(p)) { if (!IS_ERR(p)) {
......
...@@ -465,7 +465,14 @@ static inline void __run_timers(tvec_base_t *base) ...@@ -465,7 +465,14 @@ static inline void __run_timers(tvec_base_t *base)
smp_wmb(); smp_wmb();
timer->base = NULL; timer->base = NULL;
spin_unlock_irq(&base->lock); spin_unlock_irq(&base->lock);
{
u32 preempt_count = preempt_count();
fn(data); fn(data);
if (preempt_count != preempt_count()) {
printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
BUG();
}
}
spin_lock_irq(&base->lock); spin_lock_irq(&base->lock);
goto repeat; goto repeat;
} }
......
...@@ -48,6 +48,16 @@ config DEBUG_SLAB ...@@ -48,6 +48,16 @@ config DEBUG_SLAB
allocation as well as poisoning memory on free to catch use of freed allocation as well as poisoning memory on free to catch use of freed
memory. This can make kmalloc/kfree-intensive workloads much slower. memory. This can make kmalloc/kfree-intensive workloads much slower.
config DEBUG_PREEMPT
bool "Debug preemptible kernel"
depends on PREEMPT && X86
default y
help
If you say Y here then the kernel will use a debug variant of the
commonly used smp_processor_id() function and will print warnings
if kernel code uses it in a preemption-unsafe way. Also, the kernel
will detect preemption count underflows.
config DEBUG_SPINLOCK config DEBUG_SPINLOCK
bool "Spinlock debugging" bool "Spinlock debugging"
depends on DEBUG_KERNEL && (ALPHA || ARM || X86 || IA64 || M32R || MIPS || PARISC || PPC32 || (SUPERH && !SUPERH64) || SPARC32 || SPARC64 || USERMODE || X86_64) depends on DEBUG_KERNEL && (ALPHA || ARM || X86 || IA64 || M32R || MIPS || PARISC || PPC32 || (SUPERH && !SUPERH64) || SPARC32 || SPARC64 || USERMODE || X86_64)
......
...@@ -7,6 +7,141 @@ ...@@ -7,6 +7,141 @@
*/ */
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/kallsyms.h>
#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \
defined(CONFIG_DEBUG_PREEMPT)
/*
* Debugging check.
*/
unsigned int smp_processor_id(void)
{
unsigned long preempt_count = preempt_count();
int this_cpu = __smp_processor_id();
cpumask_t this_mask;
if (likely(preempt_count))
goto out;
if (irqs_disabled())
goto out;
/*
* Kernel threads bound to a single CPU can safely use
* smp_processor_id():
*/
this_mask = cpumask_of_cpu(this_cpu);
if (cpus_equal(current->cpus_allowed, this_mask))
goto out;
/*
* It is valid to assume CPU-locality during early bootup:
*/
if (system_state != SYSTEM_RUNNING)
goto out;
/*
* Avoid recursion:
*/
preempt_disable();
if (!printk_ratelimit())
goto out_enable;
printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid);
print_symbol("caller is %s\n", (long)__builtin_return_address(0));
dump_stack();
out_enable:
preempt_enable_no_resched();
out:
return this_cpu;
}
EXPORT_SYMBOL(smp_processor_id);
#endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */
#ifdef CONFIG_PREEMPT_BKL
/*
* The 'big kernel semaphore'
*
* This mutex is taken and released recursively by lock_kernel()
* and unlock_kernel(). It is transparently dropped and reaquired
* over schedule(). It is used to protect legacy code that hasn't
* been migrated to a proper locking design yet.
*
* Note: code locked by this semaphore will only be serialized against
* other code using the same locking facility. The code guarantees that
* the task remains on the same CPU.
*
* Don't use in new code.
*/
DECLARE_MUTEX(kernel_sem);
/*
* Re-acquire the kernel semaphore.
*
* This function is called with preemption off.
*
* We are executing in schedule() so the code must be extremely careful
* about recursion, both due to the down() and due to the enabling of
* preemption. schedule() will re-check the preemption flag after
* reacquiring the semaphore.
*/
int __lockfunc __reacquire_kernel_lock(void)
{
struct task_struct *task = current;
int saved_lock_depth = task->lock_depth;
BUG_ON(saved_lock_depth < 0);
task->lock_depth = -1;
preempt_enable_no_resched();
down(&kernel_sem);
preempt_disable();
task->lock_depth = saved_lock_depth;
return 0;
}
void __lockfunc __release_kernel_lock(void)
{
up(&kernel_sem);
}
/*
* Getting the big kernel semaphore.
*/
void __lockfunc lock_kernel(void)
{
struct task_struct *task = current;
int depth = task->lock_depth + 1;
if (likely(!depth))
/*
* No recursion worries - we set up lock_depth _after_
*/
down(&kernel_sem);
task->lock_depth = depth;
}
void __lockfunc unlock_kernel(void)
{
struct task_struct *task = current;
BUG_ON(task->lock_depth < 0);
if (likely(--task->lock_depth < 0))
up(&kernel_sem);
}
#else
/* /*
* The 'big kernel lock' * The 'big kernel lock'
...@@ -34,7 +169,7 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; ...@@ -34,7 +169,7 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
* (This works on UP too - _raw_spin_trylock will never * (This works on UP too - _raw_spin_trylock will never
* return false in that case) * return false in that case)
*/ */
int __lockfunc get_kernel_lock(void) int __lockfunc __reacquire_kernel_lock(void)
{ {
while (!_raw_spin_trylock(&kernel_flag)) { while (!_raw_spin_trylock(&kernel_flag)) {
if (test_thread_flag(TIF_NEED_RESCHED)) if (test_thread_flag(TIF_NEED_RESCHED))
...@@ -45,7 +180,7 @@ int __lockfunc get_kernel_lock(void) ...@@ -45,7 +180,7 @@ int __lockfunc get_kernel_lock(void)
return 0; return 0;
} }
void __lockfunc put_kernel_lock(void) void __lockfunc __release_kernel_lock(void)
{ {
_raw_spin_unlock(&kernel_flag); _raw_spin_unlock(&kernel_flag);
preempt_enable_no_resched(); preempt_enable_no_resched();
...@@ -122,5 +257,8 @@ void __lockfunc unlock_kernel(void) ...@@ -122,5 +257,8 @@ void __lockfunc unlock_kernel(void)
__unlock_kernel(); __unlock_kernel();
} }
#endif
EXPORT_SYMBOL(lock_kernel); EXPORT_SYMBOL(lock_kernel);
EXPORT_SYMBOL(unlock_kernel); EXPORT_SYMBOL(unlock_kernel);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment