Commit fbed0bc0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking changes from Ingo Molnar:
 "Various updates:

   - Futex scalability improvements: remove page lock use for shared
     futex get_futex_key(), which speeds up 'perf bench futex hash'
     benchmarks by over 40% on a 60-core Westmere.  This makes anon-mem
     shared futexes perform close to private futexes.  (Mel Gorman)

   - lockdep hash collision detection and fix (Alfredo Alvarez
     Fernandez)

   - lockdep testing enhancements (Alfredo Alvarez Fernandez)

   - robustify lockdep init by using hlists (Andrew Morton, Andrey
     Ryabinin)

   - mutex and csd_lock micro-optimizations (Davidlohr Bueso)

   - small x86 barriers tweaks (Michael S Tsirkin)

   - qspinlock updates (Waiman Long)"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits)
  locking/csd_lock: Use smp_cond_acquire() in csd_lock_wait()
  locking/csd_lock: Explicitly inline csd_lock*() helpers
  futex: Replace barrier() in unqueue_me() with READ_ONCE()
  locking/lockdep: Detect chain_key collisions
  locking/lockdep: Prevent chain_key collisions
  tools/lib/lockdep: Fix link creation warning
  tools/lib/lockdep: Add tests for AA and ABBA locking
  tools/lib/lockdep: Add userspace version of READ_ONCE()
  tools/lib/lockdep: Fix the build on recent kernels
  locking/qspinlock: Move __ARCH_SPIN_LOCK_UNLOCKED to qspinlock_types.h
  locking/mutex: Allow next waiter lockless wakeup
  locking/pvqspinlock: Enable slowpath locking count tracking
  locking/qspinlock: Use smp_cond_acquire() in pending code
  locking/pvqspinlock: Move lock stealing count tracking code into pv_queued_spin_steal_lock()
  locking/mcs: Fix mcs_spin_lock() ordering
  futex: Remove requirement for lock_page() in get_futex_key()
  futex: Rename barrier references in ordering guarantees
  locking/atomics: Update comment about READ_ONCE() and structures
  locking/lockdep: Eliminate lockdep_init()
  locking/lockdep: Convert hash tables to hlists
  ...
parents d37a14bb 38460a21
...@@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr) ...@@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr)
*/ */
set_ist(_vectors_start); set_ist(_vectors_start);
lockdep_init();
/* /*
* dtb is passed in from bootloader. * dtb is passed in from bootloader.
* fdt is linked in blob. * fdt is linked in blob.
......
...@@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, ...@@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
memset(__bss_start, 0, __bss_stop-__bss_start); memset(__bss_start, 0, __bss_stop-__bss_start);
memset(_ssbss, 0, _esbss-_ssbss); memset(_ssbss, 0, _esbss-_ssbss);
lockdep_init();
/* initialize device tree for usage in early_printk */ /* initialize device tree for usage in early_printk */
early_init_devtree(_fdt_start); early_init_devtree(_fdt_start);
......
...@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ ...@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
notrace void __init machine_init(u64 dt_ptr) notrace void __init machine_init(u64 dt_ptr)
{ {
lockdep_init();
/* Enable early debugging if any specified (see udbg.h) */ /* Enable early debugging if any specified (see udbg.h) */
udbg_early_init(); udbg_early_init();
......
...@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr) ...@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
setup_paca(&boot_paca); setup_paca(&boot_paca);
fixup_boot_paca(); fixup_boot_paca();
/* Initialize lockdep early or else spinlocks will blow */
lockdep_init();
/* -------- printk is now safe to use ------- */ /* -------- printk is now safe to use ------- */
/* Enable early debugging if any specified (see udbg.h) */ /* Enable early debugging if any specified (see udbg.h) */
......
...@@ -448,7 +448,6 @@ void __init startup_init(void) ...@@ -448,7 +448,6 @@ void __init startup_init(void)
rescue_initrd(); rescue_initrd();
clear_bss_section(); clear_bss_section();
init_kernel_storage_key(); init_kernel_storage_key();
lockdep_init();
lockdep_off(); lockdep_off();
setup_lowcore_early(); setup_lowcore_early();
setup_facility_list(); setup_facility_list();
......
...@@ -696,14 +696,6 @@ tlb_fixup_done: ...@@ -696,14 +696,6 @@ tlb_fixup_done:
call __bzero call __bzero
sub %o1, %o0, %o1 sub %o1, %o0, %o1
#ifdef CONFIG_LOCKDEP
/* We have this call this super early, as even prom_init can grab
* spinlocks and thus call into the lockdep code.
*/
call lockdep_init
nop
#endif
call prom_init call prom_init
mov %l7, %o0 ! OpenPROM cif handler mov %l7, %o0 ! OpenPROM cif handler
......
...@@ -6,18 +6,17 @@ ...@@ -6,18 +6,17 @@
/* /*
* Force strict CPU ordering. * Force strict CPU ordering.
* And yes, this is required on UP too when we're talking * And yes, this might be required on UP too when we're talking
* to devices. * to devices.
*/ */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* #define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
* Some non-Intel clones support out of order store. wmb() ceases to be a X86_FEATURE_XMM2) ::: "memory", "cc")
* nop for these. #define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
*/ X86_FEATURE_XMM2) ::: "memory", "cc")
#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) X86_FEATURE_XMM2) ::: "memory", "cc")
#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
#else #else
#define mb() asm volatile("mfence":::"memory") #define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory") #define rmb() asm volatile("lfence":::"memory")
......
...@@ -418,9 +418,9 @@ static void mwait_idle(void) ...@@ -418,9 +418,9 @@ static void mwait_idle(void)
if (!current_set_polling_and_test()) { if (!current_set_polling_and_test()) {
trace_cpu_idle_rcuidle(1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
smp_mb(); /* quirk */ mb(); /* quirk */
clflush((void *)&current_thread_info()->flags); clflush((void *)&current_thread_info()->flags);
smp_mb(); /* quirk */ mb(); /* quirk */
} }
__monitor((void *)&current_thread_info()->flags, 0, 0); __monitor((void *)&current_thread_info()->flags, 0, 0);
......
...@@ -1520,12 +1520,6 @@ __init void lguest_init(void) ...@@ -1520,12 +1520,6 @@ __init void lguest_init(void)
*/ */
reserve_top_address(lguest_data.reserve_mem); reserve_top_address(lguest_data.reserve_mem);
/*
* If we don't initialize the lock dependency checker now, it crashes
* atomic_notifier_chain_register, then paravirt_disable_iospace.
*/
lockdep_init();
/* Hook in our special panic hypercall code. */ /* Hook in our special panic hypercall code. */
atomic_notifier_chain_register(&panic_notifier_list, &paniced); atomic_notifier_chain_register(&panic_notifier_list, &paniced);
......
...@@ -119,11 +119,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock) ...@@ -119,11 +119,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
} }
#endif #endif
/*
* Initializier
*/
#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
/* /*
* Remapping spinlock architecture specific functions to the corresponding * Remapping spinlock architecture specific functions to the corresponding
* queued spinlock functions. * queued spinlock functions.
......
...@@ -32,6 +32,11 @@ typedef struct qspinlock { ...@@ -32,6 +32,11 @@ typedef struct qspinlock {
atomic_t val; atomic_t val;
} arch_spinlock_t; } arch_spinlock_t;
/*
* Initializier
*/
#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
/* /*
* Bitfields in the atomic value: * Bitfields in the atomic value:
* *
......
...@@ -263,8 +263,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ...@@ -263,8 +263,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
* In contrast to ACCESS_ONCE these two macros will also work on aggregate * In contrast to ACCESS_ONCE these two macros will also work on aggregate
* data types like structs or unions. If the size of the accessed data * data types like structs or unions. If the size of the accessed data
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits) * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
* READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
* compile-time warning. * least two memcpy()s: one for the __builtin_memcpy() and then one for
* the macro doing the copy of variable - '__u' allocated on the stack.
* *
* Their two major use cases are: (1) Mediating communication between * Their two major use cases are: (1) Mediating communication between
* process-level code and irq/NMI handlers, all running on the same CPU, * process-level code and irq/NMI handlers, all running on the same CPU,
......
...@@ -261,7 +261,6 @@ struct held_lock { ...@@ -261,7 +261,6 @@ struct held_lock {
/* /*
* Initialization, self-test and debugging-output methods: * Initialization, self-test and debugging-output methods:
*/ */
extern void lockdep_init(void);
extern void lockdep_info(void); extern void lockdep_info(void);
extern void lockdep_reset(void); extern void lockdep_reset(void);
extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_reset_lock(struct lockdep_map *lock);
...@@ -392,7 +391,6 @@ static inline void lockdep_on(void) ...@@ -392,7 +391,6 @@ static inline void lockdep_on(void)
# define lockdep_set_current_reclaim_state(g) do { } while (0) # define lockdep_set_current_reclaim_state(g) do { } while (0)
# define lockdep_clear_current_reclaim_state() do { } while (0) # define lockdep_clear_current_reclaim_state() do { } while (0)
# define lockdep_trace_alloc(g) do { } while (0) # define lockdep_trace_alloc(g) do { } while (0)
# define lockdep_init() do { } while (0)
# define lockdep_info() do { } while (0) # define lockdep_info() do { } while (0)
# define lockdep_init_map(lock, name, key, sub) \ # define lockdep_init_map(lock, name, key, sub) \
do { (void)(name); (void)(key); } while (0) do { (void)(name); (void)(key); } while (0)
......
...@@ -499,11 +499,6 @@ asmlinkage __visible void __init start_kernel(void) ...@@ -499,11 +499,6 @@ asmlinkage __visible void __init start_kernel(void)
char *command_line; char *command_line;
char *after_dashes; char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init();
set_task_stack_end_magic(&init_task); set_task_stack_end_magic(&init_task);
smp_setup_processor_id(); smp_setup_processor_id();
debug_objects_early_init(); debug_objects_early_init();
......
...@@ -124,16 +124,16 @@ ...@@ -124,16 +124,16 @@
* futex_wait(futex, val); * futex_wait(futex, val);
* *
* waiters++; (a) * waiters++; (a)
* mb(); (A) <-- paired with -. * smp_mb(); (A) <-- paired with -.
* | * |
* lock(hash_bucket(futex)); | * lock(hash_bucket(futex)); |
* | * |
* uval = *futex; | * uval = *futex; |
* | *futex = newval; * | *futex = newval;
* | sys_futex(WAKE, futex); * | sys_futex(WAKE, futex);
* | futex_wake(futex); * | futex_wake(futex);
* | * |
* `-------> mb(); (B) * `--------> smp_mb(); (B)
* if (uval == val) * if (uval == val)
* queue(); * queue();
* unlock(hash_bucket(futex)); * unlock(hash_bucket(futex));
...@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key) ...@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key)
/* /*
* Ensure futex_get_mm() implies a full barrier such that * Ensure futex_get_mm() implies a full barrier such that
* get_futex_key() implies a full barrier. This is relied upon * get_futex_key() implies a full barrier. This is relied upon
* as full barrier (B), see the ordering comment above. * as smp_mb(); (B), see the ordering comment above.
*/ */
smp_mb__after_atomic(); smp_mb__after_atomic();
} }
...@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key) ...@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE: case FUT_OFF_INODE:
ihold(key->shared.inode); /* implies MB (B) */ ihold(key->shared.inode); /* implies smp_mb(); (B) */
break; break;
case FUT_OFF_MMSHARED: case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies MB (B) */ futex_get_mm(key); /* implies smp_mb(); (B) */
break; break;
default: default:
/* /*
...@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key) ...@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key)
* mm, therefore the only purpose of calling get_futex_key_refs * mm, therefore the only purpose of calling get_futex_key_refs
* is because we need the barrier for the lockless waiter check. * is because we need the barrier for the lockless waiter check.
*/ */
smp_mb(); /* explicit MB (B) */ smp_mb(); /* explicit smp_mb(); (B) */
} }
} }
...@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
if (!fshared) { if (!fshared) {
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
get_futex_key_refs(key); /* implies MB (B) */ get_futex_key_refs(key); /* implies smp_mb(); (B) */
return 0; return 0;
} }
...@@ -520,7 +520,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -520,7 +520,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
else else
err = 0; err = 0;
lock_page(page); /*
* The treatment of mapping from this point on is critical. The page
* lock protects many things but in this context the page lock
* stabilizes mapping, prevents inode freeing in the shared
* file-backed region case and guards against movement to swap cache.
*
* Strictly speaking the page lock is not needed in all cases being
* considered here and page lock forces unnecessarily serialization
* From this point on, mapping will be re-verified if necessary and
* page lock will be acquired only if it is unavoidable
*/
page = compound_head(page);
mapping = READ_ONCE(page->mapping);
/* /*
* If page->mapping is NULL, then it cannot be a PageAnon * If page->mapping is NULL, then it cannot be a PageAnon
* page; but it might be the ZERO_PAGE or in the gate area or * page; but it might be the ZERO_PAGE or in the gate area or
...@@ -536,19 +549,31 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -536,19 +549,31 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
* shmem_writepage move it from filecache to swapcache beneath us: * shmem_writepage move it from filecache to swapcache beneath us:
* an unlikely race, but we do need to retry for page->mapping. * an unlikely race, but we do need to retry for page->mapping.
*/ */
mapping = compound_head(page)->mapping; if (unlikely(!mapping)) {
if (!mapping) { int shmem_swizzled;
int shmem_swizzled = PageSwapCache(page);
/*
* Page lock is required to identify which special case above
* applies. If this is really a shmem page then the page lock
* will prevent unexpected transitions.
*/
lock_page(page);
shmem_swizzled = PageSwapCache(page) || page->mapping;
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
if (shmem_swizzled) if (shmem_swizzled)
goto again; goto again;
return -EFAULT; return -EFAULT;
} }
/* /*
* Private mappings are handled in a simple way. * Private mappings are handled in a simple way.
* *
* If the futex key is stored on an anonymous page, then the associated
* object is the mm which is implicitly pinned by the calling process.
*
* NOTE: When userspace waits on a MAP_SHARED mapping, even if * NOTE: When userspace waits on a MAP_SHARED mapping, even if
* it's a read-only handle, it's expected that futexes attach to * it's a read-only handle, it's expected that futexes attach to
* the object not the particular process. * the object not the particular process.
...@@ -566,16 +591,74 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) ...@@ -566,16 +591,74 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
get_futex_key_refs(key); /* implies smp_mb(); (B) */
} else { } else {
struct inode *inode;
/*
* The associated futex object in this case is the inode and
* the page->mapping must be traversed. Ordinarily this should
* be stabilised under page lock but it's not strictly
* necessary in this case as we just want to pin the inode, not
* update the radix tree or anything like that.
*
* The RCU read lock is taken as the inode is finally freed
* under RCU. If the mapping still matches expectations then the
* mapping->host can be safely accessed as being a valid inode.
*/
rcu_read_lock();
if (READ_ONCE(page->mapping) != mapping) {
rcu_read_unlock();
put_page(page);
goto again;
}
inode = READ_ONCE(mapping->host);
if (!inode) {
rcu_read_unlock();
put_page(page);
goto again;
}
/*
* Take a reference unless it is about to be freed. Previously
* this reference was taken by ihold under the page lock
* pinning the inode in place so i_lock was unnecessary. The
* only way for this check to fail is if the inode was
* truncated in parallel so warn for now if this happens.
*
* We are not calling into get_futex_key_refs() in file-backed
* cases, therefore a successful atomic_inc return below will
* guarantee that get_futex_key() will still imply smp_mb(); (B).
*/
if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
rcu_read_unlock();
put_page(page);
goto again;
}
/* Should be impossible but lets be paranoid for now */
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
err = -EFAULT;
rcu_read_unlock();
iput(inode);
goto out;
}
key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = mapping->host; key->shared.inode = inode;
key->shared.pgoff = basepage_index(page); key->shared.pgoff = basepage_index(page);
rcu_read_unlock();
} }
get_futex_key_refs(key); /* implies MB (B) */
out: out:
unlock_page(page);
put_page(page); put_page(page);
return err; return err;
} }
...@@ -1864,7 +1947,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) ...@@ -1864,7 +1947,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
q->lock_ptr = &hb->lock; q->lock_ptr = &hb->lock;
spin_lock(&hb->lock); /* implies MB (A) */ spin_lock(&hb->lock); /* implies smp_mb(); (A) */
return hb; return hb;
} }
...@@ -1927,8 +2010,12 @@ static int unqueue_me(struct futex_q *q) ...@@ -1927,8 +2010,12 @@ static int unqueue_me(struct futex_q *q)
/* In the common case we don't take the spinlock, which is nice. */ /* In the common case we don't take the spinlock, which is nice. */
retry: retry:
lock_ptr = q->lock_ptr; /*
barrier(); * q->lock_ptr can change between this read and the following spin_lock.
* Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
* optimizing lock_ptr out of the logic below.
*/
lock_ptr = READ_ONCE(q->lock_ptr);
if (lock_ptr != NULL) { if (lock_ptr != NULL) {
spin_lock(lock_ptr); spin_lock(lock_ptr);
/* /*
......
...@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void) ...@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void)
return ret; return ret;
} }
static int lockdep_initialized;
unsigned long nr_list_entries; unsigned long nr_list_entries;
static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
...@@ -433,19 +431,6 @@ unsigned int nr_process_chains; ...@@ -433,19 +431,6 @@ unsigned int nr_process_chains;
unsigned int max_lockdep_depth; unsigned int max_lockdep_depth;
#ifdef CONFIG_DEBUG_LOCKDEP #ifdef CONFIG_DEBUG_LOCKDEP
/*
* We cannot printk in early bootup code. Not even early_printk()
* might work. So we mark any initialization errors and printk
* about it later on, in lockdep_info().
*/
static int lockdep_init_error;
static const char *lock_init_error;
static unsigned long lockdep_init_trace_data[20];
static struct stack_trace lockdep_init_trace = {
.max_entries = ARRAY_SIZE(lockdep_init_trace_data),
.entries = lockdep_init_trace_data,
};
/* /*
* Various lockdep statistics: * Various lockdep statistics:
*/ */
...@@ -669,20 +654,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) ...@@ -669,20 +654,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
struct hlist_head *hash_head; struct hlist_head *hash_head;
struct lock_class *class; struct lock_class *class;
#ifdef CONFIG_DEBUG_LOCKDEP
/*
* If the architecture calls into lockdep before initializing
* the hashes then we'll warn about it later. (we cannot printk
* right now)
*/
if (unlikely(!lockdep_initialized)) {
lockdep_init();
lockdep_init_error = 1;
lock_init_error = lock->name;
save_stack_trace(&lockdep_init_trace);
}
#endif
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
debug_locks_off(); debug_locks_off();
printk(KERN_ERR printk(KERN_ERR
...@@ -2010,6 +1981,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) ...@@ -2010,6 +1981,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
return lock_classes + chain_hlocks[chain->base + i]; return lock_classes + chain_hlocks[chain->base + i];
} }
/*
* Returns the index of the first held_lock of the current chain
*/
static inline int get_first_held_lock(struct task_struct *curr,
struct held_lock *hlock)
{
int i;
struct held_lock *hlock_curr;
for (i = curr->lockdep_depth - 1; i >= 0; i--) {
hlock_curr = curr->held_locks + i;
if (hlock_curr->irq_context != hlock->irq_context)
break;
}
return ++i;
}
/*
* Checks whether the chain and the current held locks are consistent
* in depth and also in content. If they are not it most likely means
* that there was a collision during the calculation of the chain_key.
* Returns: 0 not passed, 1 passed
*/
static int check_no_collision(struct task_struct *curr,
struct held_lock *hlock,
struct lock_chain *chain)
{
#ifdef CONFIG_DEBUG_LOCKDEP
int i, j, id;
i = get_first_held_lock(curr, hlock);
if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
return 0;
for (j = 0; j < chain->depth - 1; j++, i++) {
id = curr->held_locks[i].class_idx - 1;
if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
return 0;
}
#endif
return 1;
}
/* /*
* Look up a dependency chain. If the key is not present yet then * Look up a dependency chain. If the key is not present yet then
* add it and return 1 - in this case the new dependency chain is * add it and return 1 - in this case the new dependency chain is
...@@ -2023,7 +2041,6 @@ static inline int lookup_chain_cache(struct task_struct *curr, ...@@ -2023,7 +2041,6 @@ static inline int lookup_chain_cache(struct task_struct *curr,
struct lock_class *class = hlock_class(hlock); struct lock_class *class = hlock_class(hlock);
struct hlist_head *hash_head = chainhashentry(chain_key); struct hlist_head *hash_head = chainhashentry(chain_key);
struct lock_chain *chain; struct lock_chain *chain;
struct held_lock *hlock_curr;
int i, j; int i, j;
/* /*
...@@ -2041,6 +2058,9 @@ static inline int lookup_chain_cache(struct task_struct *curr, ...@@ -2041,6 +2058,9 @@ static inline int lookup_chain_cache(struct task_struct *curr,
if (chain->chain_key == chain_key) { if (chain->chain_key == chain_key) {
cache_hit: cache_hit:
debug_atomic_inc(chain_lookup_hits); debug_atomic_inc(chain_lookup_hits);
if (!check_no_collision(curr, hlock, chain))
return 0;
if (very_verbose(class)) if (very_verbose(class))
printk("\nhash chain already cached, key: " printk("\nhash chain already cached, key: "
"%016Lx tail class: [%p] %s\n", "%016Lx tail class: [%p] %s\n",
...@@ -2078,13 +2098,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, ...@@ -2078,13 +2098,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
chain = lock_chains + nr_lock_chains++; chain = lock_chains + nr_lock_chains++;
chain->chain_key = chain_key; chain->chain_key = chain_key;
chain->irq_context = hlock->irq_context; chain->irq_context = hlock->irq_context;
/* Find the first held_lock of current chain */ i = get_first_held_lock(curr, hlock);
for (i = curr->lockdep_depth - 1; i >= 0; i--) {
hlock_curr = curr->held_locks + i;
if (hlock_curr->irq_context != hlock->irq_context)
break;
}
i++;
chain->depth = curr->lockdep_depth + 1 - i; chain->depth = curr->lockdep_depth + 1 - i;
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = nr_chain_hlocks; chain->base = nr_chain_hlocks;
...@@ -2172,7 +2186,7 @@ static void check_chain_key(struct task_struct *curr) ...@@ -2172,7 +2186,7 @@ static void check_chain_key(struct task_struct *curr)
{ {
#ifdef CONFIG_DEBUG_LOCKDEP #ifdef CONFIG_DEBUG_LOCKDEP
struct held_lock *hlock, *prev_hlock = NULL; struct held_lock *hlock, *prev_hlock = NULL;
unsigned int i, id; unsigned int i;
u64 chain_key = 0; u64 chain_key = 0;
for (i = 0; i < curr->lockdep_depth; i++) { for (i = 0; i < curr->lockdep_depth; i++) {
...@@ -2189,17 +2203,16 @@ static void check_chain_key(struct task_struct *curr) ...@@ -2189,17 +2203,16 @@ static void check_chain_key(struct task_struct *curr)
(unsigned long long)hlock->prev_chain_key); (unsigned long long)hlock->prev_chain_key);
return; return;
} }
id = hlock->class_idx - 1;
/* /*
* Whoops ran out of static storage again? * Whoops ran out of static storage again?
*/ */
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
return; return;
if (prev_hlock && (prev_hlock->irq_context != if (prev_hlock && (prev_hlock->irq_context !=
hlock->irq_context)) hlock->irq_context))
chain_key = 0; chain_key = 0;
chain_key = iterate_chain_key(chain_key, id); chain_key = iterate_chain_key(chain_key, hlock->class_idx);
prev_hlock = hlock; prev_hlock = hlock;
} }
if (chain_key != curr->curr_chain_key) { if (chain_key != curr->curr_chain_key) {
...@@ -3077,7 +3090,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -3077,7 +3090,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
struct task_struct *curr = current; struct task_struct *curr = current;
struct lock_class *class = NULL; struct lock_class *class = NULL;
struct held_lock *hlock; struct held_lock *hlock;
unsigned int depth, id; unsigned int depth;
int chain_head = 0; int chain_head = 0;
int class_idx; int class_idx;
u64 chain_key; u64 chain_key;
...@@ -3180,11 +3193,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -3180,11 +3193,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* The 'key ID' is what is the most compact key value to drive * The 'key ID' is what is the most compact key value to drive
* the hash, not class->key. * the hash, not class->key.
*/ */
id = class - lock_classes;
/* /*
* Whoops, we did it again.. ran straight out of our static allocation. * Whoops, we did it again.. ran straight out of our static allocation.
*/ */
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
return 0; return 0;
chain_key = curr->curr_chain_key; chain_key = curr->curr_chain_key;
...@@ -3202,7 +3214,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -3202,7 +3214,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
chain_key = 0; chain_key = 0;
chain_head = 1; chain_head = 1;
} }
chain_key = iterate_chain_key(chain_key, id); chain_key = iterate_chain_key(chain_key, class_idx);
if (nest_lock && !__lock_is_held(nest_lock)) if (nest_lock && !__lock_is_held(nest_lock))
return print_lock_nested_lock_not_held(curr, hlock, ip); return print_lock_nested_lock_not_held(curr, hlock, ip);
...@@ -4013,28 +4025,6 @@ void lockdep_reset_lock(struct lockdep_map *lock) ...@@ -4013,28 +4025,6 @@ void lockdep_reset_lock(struct lockdep_map *lock)
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
void lockdep_init(void)
{
int i;
/*
* Some architectures have their own start_kernel()
* code which calls lockdep_init(), while we also
* call lockdep_init() from the start_kernel() itself,
* and we want to initialize the hashes only once:
*/
if (lockdep_initialized)
return;
for (i = 0; i < CLASSHASH_SIZE; i++)
INIT_HLIST_HEAD(classhash_table + i);
for (i = 0; i < CHAINHASH_SIZE; i++)
INIT_HLIST_HEAD(chainhash_table + i);
lockdep_initialized = 1;
}
void __init lockdep_info(void) void __init lockdep_info(void)
{ {
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
...@@ -4061,14 +4051,6 @@ void __init lockdep_info(void) ...@@ -4061,14 +4051,6 @@ void __init lockdep_info(void)
printk(" per task-struct memory footprint: %lu bytes\n", printk(" per task-struct memory footprint: %lu bytes\n",
sizeof(struct held_lock) * MAX_LOCK_DEPTH); sizeof(struct held_lock) * MAX_LOCK_DEPTH);
#ifdef CONFIG_DEBUG_LOCKDEP
if (lockdep_init_error) {
printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
printk("Call stack leading to lockdep invocation was:\n");
print_stack_trace(&lockdep_init_trace, 0);
}
#endif
} }
static void static void
......
...@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) ...@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0; node->locked = 0;
node->next = NULL; node->next = NULL;
prev = xchg_acquire(lock, node); /*
* We rely on the full barrier with global transitivity implied by the
* below xchg() to order the initialization stores above against any
* observation of @node. And to provide the ACQUIRE ordering associated
* with a LOCK primitive.
*/
prev = xchg(lock, node);
if (likely(prev == NULL)) { if (likely(prev == NULL)) {
/* /*
* Lock acquired, don't need to set node->locked to 1. Threads * Lock acquired, don't need to set node->locked to 1. Threads
......
...@@ -716,6 +716,7 @@ static inline void ...@@ -716,6 +716,7 @@ static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested) __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{ {
unsigned long flags; unsigned long flags;
WAKE_Q(wake_q);
/* /*
* As a performance measurement, release the lock before doing other * As a performance measurement, release the lock before doing other
...@@ -743,11 +744,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) ...@@ -743,11 +744,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
struct mutex_waiter, list); struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter); debug_mutex_wake_waiter(lock, waiter);
wake_q_add(&wake_q, waiter->task);
wake_up_process(waiter->task);
} }
spin_unlock_mutex(&lock->wait_lock, flags); spin_unlock_mutex(&lock->wait_lock, flags);
wake_up_q(&wake_q);
} }
/* /*
......
...@@ -358,8 +358,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) ...@@ -358,8 +358,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* sequentiality; this is because not all clear_pending_set_locked() * sequentiality; this is because not all clear_pending_set_locked()
* implementations imply full barriers. * implementations imply full barriers.
*/ */
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK) smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
cpu_relax();
/* /*
* take ownership and clear the pending bit. * take ownership and clear the pending bit.
...@@ -435,7 +434,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) ...@@ -435,7 +434,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* *
* The PV pv_wait_head_or_lock function, if active, will acquire * The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the * the lock and return a non-zero value. So we have to skip the
* smp_load_acquire() call. As the next PV queue head hasn't been * smp_cond_acquire() call. As the next PV queue head hasn't been
* designated yet, there is no way for the locked value to become * designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the * _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe. * atomic_cmpxchg_relaxed() calls will be safe.
...@@ -466,7 +465,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) ...@@ -466,7 +465,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
break; break;
} }
/* /*
* The smp_load_acquire() call above has provided the necessary * The smp_cond_acquire() call above has provided the necessary
* acquire semantics required for locking. At most two * acquire semantics required for locking. At most two
* iterations of this loop may be ran. * iterations of this loop may be ran.
*/ */
......
...@@ -54,6 +54,11 @@ struct pv_node { ...@@ -54,6 +54,11 @@ struct pv_node {
u8 state; u8 state;
}; };
/*
* Include queued spinlock statistics code
*/
#include "qspinlock_stat.h"
/* /*
* By replacing the regular queued_spin_trylock() with the function below, * By replacing the regular queued_spin_trylock() with the function below,
* it will be called once when a lock waiter enter the PV slowpath before * it will be called once when a lock waiter enter the PV slowpath before
...@@ -65,9 +70,11 @@ struct pv_node { ...@@ -65,9 +70,11 @@ struct pv_node {
static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
{ {
struct __qspinlock *l = (void *)lock; struct __qspinlock *l = (void *)lock;
int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && qstat_inc(qstat_pv_lock_stealing, ret);
(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0); return ret;
} }
/* /*
...@@ -137,11 +144,6 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock) ...@@ -137,11 +144,6 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
} }
#endif /* _Q_PENDING_BITS == 8 */ #endif /* _Q_PENDING_BITS == 8 */
/*
* Include queued spinlock statistics code
*/
#include "qspinlock_stat.h"
/* /*
* Lock and MCS node addresses hash table for fast lookup * Lock and MCS node addresses hash table for fast lookup
* *
...@@ -398,6 +400,11 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) ...@@ -398,6 +400,11 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
if (READ_ONCE(pn->state) == vcpu_hashed) if (READ_ONCE(pn->state) == vcpu_hashed)
lp = (struct qspinlock **)1; lp = (struct qspinlock **)1;
/*
* Tracking # of slowpath locking operations
*/
qstat_inc(qstat_pv_lock_slowpath, true);
for (;; waitcnt++) { for (;; waitcnt++) {
/* /*
* Set correct vCPU state to be used by queue node wait-early * Set correct vCPU state to be used by queue node wait-early
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation * pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
* pv_lock_slowpath - # of locking operations via the slowpath
* pv_lock_stealing - # of lock stealing operations * pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups * pv_spurious_wakeup - # of spurious wakeups
* pv_wait_again - # of vCPU wait's that happened after a vCPU kick * pv_wait_again - # of vCPU wait's that happened after a vCPU kick
...@@ -45,6 +46,7 @@ enum qlock_stats { ...@@ -45,6 +46,7 @@ enum qlock_stats {
qstat_pv_kick_wake, qstat_pv_kick_wake,
qstat_pv_latency_kick, qstat_pv_latency_kick,
qstat_pv_latency_wake, qstat_pv_latency_wake,
qstat_pv_lock_slowpath,
qstat_pv_lock_stealing, qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup, qstat_pv_spurious_wakeup,
qstat_pv_wait_again, qstat_pv_wait_again,
...@@ -70,6 +72,7 @@ static const char * const qstat_names[qstat_num + 1] = { ...@@ -70,6 +72,7 @@ static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick", [qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake", [qstat_pv_latency_wake] = "pv_latency_wake",
[qstat_pv_lock_slowpath] = "pv_lock_slowpath",
[qstat_pv_lock_stealing] = "pv_lock_stealing", [qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again", [qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early", [qstat_pv_wait_early] = "pv_wait_early",
...@@ -279,19 +282,6 @@ static inline void __pv_wait(u8 *ptr, u8 val) ...@@ -279,19 +282,6 @@ static inline void __pv_wait(u8 *ptr, u8 val)
#define pv_kick(c) __pv_kick(c) #define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v) #define pv_wait(p, v) __pv_wait(p, v)
/*
* PV unfair trylock count tracking function
*/
static inline int qstat_spin_steal_lock(struct qspinlock *lock)
{
int ret = pv_queued_spin_steal_lock(lock);
qstat_inc(qstat_pv_lock_stealing, ret);
return ret;
}
#undef queued_spin_trylock
#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
#else /* CONFIG_QUEUED_LOCK_STAT */ #else /* CONFIG_QUEUED_LOCK_STAT */
static inline void qstat_inc(enum qlock_stats stat, bool cond) { } static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
......
...@@ -105,13 +105,12 @@ void __init call_function_init(void) ...@@ -105,13 +105,12 @@ void __init call_function_init(void)
* previous function call. For multi-cpu calls its even more interesting * previous function call. For multi-cpu calls its even more interesting
* as we'll have to ensure no other cpu is observing our csd. * as we'll have to ensure no other cpu is observing our csd.
*/ */
static void csd_lock_wait(struct call_single_data *csd) static __always_inline void csd_lock_wait(struct call_single_data *csd)
{ {
while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK) smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
cpu_relax();
} }
static void csd_lock(struct call_single_data *csd) static __always_inline void csd_lock(struct call_single_data *csd)
{ {
csd_lock_wait(csd); csd_lock_wait(csd);
csd->flags |= CSD_FLAG_LOCK; csd->flags |= CSD_FLAG_LOCK;
...@@ -124,7 +123,7 @@ static void csd_lock(struct call_single_data *csd) ...@@ -124,7 +123,7 @@ static void csd_lock(struct call_single_data *csd)
smp_wmb(); smp_wmb();
} }
static void csd_unlock(struct call_single_data *csd) static __always_inline void csd_unlock(struct call_single_data *csd)
{ {
WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
......
...@@ -46,8 +46,11 @@ struct test_key { ...@@ -46,8 +46,11 @@ struct test_key {
bool (*test_key)(void); bool (*test_key)(void);
}; };
#define test_key_func(key, branch) \ #define test_key_func(key, branch) \
({bool func(void) { return branch(key); } func; }) static bool key ## _ ## branch(void) \
{ \
return branch(&key); \
}
static void invert_key(struct static_key *key) static void invert_key(struct static_key *key)
{ {
...@@ -92,6 +95,25 @@ static int verify_keys(struct test_key *keys, int size, bool invert) ...@@ -92,6 +95,25 @@ static int verify_keys(struct test_key *keys, int size, bool invert)
return 0; return 0;
} }
test_key_func(old_true_key, static_key_true)
test_key_func(old_false_key, static_key_false)
test_key_func(true_key, static_branch_likely)
test_key_func(true_key, static_branch_unlikely)
test_key_func(false_key, static_branch_likely)
test_key_func(false_key, static_branch_unlikely)
test_key_func(base_old_true_key, static_key_true)
test_key_func(base_inv_old_true_key, static_key_true)
test_key_func(base_old_false_key, static_key_false)
test_key_func(base_inv_old_false_key, static_key_false)
test_key_func(base_true_key, static_branch_likely)
test_key_func(base_true_key, static_branch_unlikely)
test_key_func(base_inv_true_key, static_branch_likely)
test_key_func(base_inv_true_key, static_branch_unlikely)
test_key_func(base_false_key, static_branch_likely)
test_key_func(base_false_key, static_branch_unlikely)
test_key_func(base_inv_false_key, static_branch_likely)
test_key_func(base_inv_false_key, static_branch_unlikely)
static int __init test_static_key_init(void) static int __init test_static_key_init(void)
{ {
int ret; int ret;
...@@ -102,95 +124,95 @@ static int __init test_static_key_init(void) ...@@ -102,95 +124,95 @@ static int __init test_static_key_init(void)
{ {
.init_state = true, .init_state = true,
.key = &old_true_key, .key = &old_true_key,
.test_key = test_key_func(&old_true_key, static_key_true), .test_key = &old_true_key_static_key_true,
}, },
{ {
.init_state = false, .init_state = false,
.key = &old_false_key, .key = &old_false_key,
.test_key = test_key_func(&old_false_key, static_key_false), .test_key = &old_false_key_static_key_false,
}, },
/* internal keys - new keys */ /* internal keys - new keys */
{ {
.init_state = true, .init_state = true,
.key = &true_key.key, .key = &true_key.key,
.test_key = test_key_func(&true_key, static_branch_likely), .test_key = &true_key_static_branch_likely,
}, },
{ {
.init_state = true, .init_state = true,
.key = &true_key.key, .key = &true_key.key,
.test_key = test_key_func(&true_key, static_branch_unlikely), .test_key = &true_key_static_branch_unlikely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &false_key.key, .key = &false_key.key,
.test_key = test_key_func(&false_key, static_branch_likely), .test_key = &false_key_static_branch_likely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &false_key.key, .key = &false_key.key,
.test_key = test_key_func(&false_key, static_branch_unlikely), .test_key = &false_key_static_branch_unlikely,
}, },
/* external keys - old keys */ /* external keys - old keys */
{ {
.init_state = true, .init_state = true,
.key = &base_old_true_key, .key = &base_old_true_key,
.test_key = test_key_func(&base_old_true_key, static_key_true), .test_key = &base_old_true_key_static_key_true,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_inv_old_true_key, .key = &base_inv_old_true_key,
.test_key = test_key_func(&base_inv_old_true_key, static_key_true), .test_key = &base_inv_old_true_key_static_key_true,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_old_false_key, .key = &base_old_false_key,
.test_key = test_key_func(&base_old_false_key, static_key_false), .test_key = &base_old_false_key_static_key_false,
}, },
{ {
.init_state = true, .init_state = true,
.key = &base_inv_old_false_key, .key = &base_inv_old_false_key,
.test_key = test_key_func(&base_inv_old_false_key, static_key_false), .test_key = &base_inv_old_false_key_static_key_false,
}, },
/* external keys - new keys */ /* external keys - new keys */
{ {
.init_state = true, .init_state = true,
.key = &base_true_key.key, .key = &base_true_key.key,
.test_key = test_key_func(&base_true_key, static_branch_likely), .test_key = &base_true_key_static_branch_likely,
}, },
{ {
.init_state = true, .init_state = true,
.key = &base_true_key.key, .key = &base_true_key.key,
.test_key = test_key_func(&base_true_key, static_branch_unlikely), .test_key = &base_true_key_static_branch_unlikely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_inv_true_key.key, .key = &base_inv_true_key.key,
.test_key = test_key_func(&base_inv_true_key, static_branch_likely), .test_key = &base_inv_true_key_static_branch_likely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_inv_true_key.key, .key = &base_inv_true_key.key,
.test_key = test_key_func(&base_inv_true_key, static_branch_unlikely), .test_key = &base_inv_true_key_static_branch_unlikely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_false_key.key, .key = &base_false_key.key,
.test_key = test_key_func(&base_false_key, static_branch_likely), .test_key = &base_false_key_static_branch_likely,
}, },
{ {
.init_state = false, .init_state = false,
.key = &base_false_key.key, .key = &base_false_key.key,
.test_key = test_key_func(&base_false_key, static_branch_unlikely), .test_key = &base_false_key_static_branch_unlikely,
}, },
{ {
.init_state = true, .init_state = true,
.key = &base_inv_false_key.key, .key = &base_inv_false_key.key,
.test_key = test_key_func(&base_inv_false_key, static_branch_likely), .test_key = &base_inv_false_key_static_branch_likely,
}, },
{ {
.init_state = true, .init_state = true,
.key = &base_inv_false_key.key, .key = &base_inv_false_key.key,
.test_key = test_key_func(&base_inv_false_key, static_branch_unlikely), .test_key = &base_inv_false_key_static_branch_unlikely,
}, },
}; };
......
...@@ -100,7 +100,7 @@ include $(srctree)/tools/build/Makefile.include ...@@ -100,7 +100,7 @@ include $(srctree)/tools/build/Makefile.include
do_compile_shared_library = \ do_compile_shared_library = \
($(print_shared_lib_compile) \ ($(print_shared_lib_compile) \
$(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so)) $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
do_build_static_lib = \ do_build_static_lib = \
($(print_static_lib_build) \ ($(print_static_lib_build) \
......
...@@ -11,11 +11,6 @@ static __thread struct task_struct current_obj; ...@@ -11,11 +11,6 @@ static __thread struct task_struct current_obj;
bool debug_locks = true; bool debug_locks = true;
bool debug_locks_silent; bool debug_locks_silent;
__attribute__((constructor)) static void liblockdep_init(void)
{
lockdep_init();
}
__attribute__((destructor)) static void liblockdep_exit(void) __attribute__((destructor)) static void liblockdep_exit(void)
{ {
debug_check_no_locks_held(); debug_check_no_locks_held();
......
...@@ -44,7 +44,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, ...@@ -44,7 +44,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
void lock_release(struct lockdep_map *lock, int nested, void lock_release(struct lockdep_map *lock, int nested,
unsigned long ip); unsigned long ip);
extern void debug_check_no_locks_freed(const void *from, unsigned long len); extern void debug_check_no_locks_freed(const void *from, unsigned long len);
extern void lockdep_init(void);
#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
{ .name = (_name), .key = (void *)(_key), } { .name = (_name), .key = (void *)(_key), }
......
#include <linux/lockdep.h> #include <linux/lockdep.h>
/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
#define hlist_for_each_entry_rcu hlist_for_each_entry
#define hlist_add_head_rcu hlist_add_head
#define hlist_del_rcu hlist_del
#include "../../../kernel/locking/lockdep.c" #include "../../../kernel/locking/lockdep.c"
...@@ -439,7 +439,5 @@ __attribute__((constructor)) static void init_preload(void) ...@@ -439,7 +439,5 @@ __attribute__((constructor)) static void init_preload(void)
ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
#endif #endif
lockdep_init();
__init_state = done; __init_state = done;
} }
#include <liblockdep/mutex.h> #include <liblockdep/mutex.h>
void main(void) int main(void)
{ {
pthread_mutex_t a, b; pthread_mutex_t a;
pthread_mutex_init(&a, NULL); pthread_mutex_init(&a, NULL);
pthread_mutex_init(&b, NULL);
pthread_mutex_lock(&a); pthread_mutex_lock(&a);
pthread_mutex_lock(&b);
pthread_mutex_lock(&a); pthread_mutex_lock(&a);
return 0;
} }
#include <liblockdep/mutex.h>
void main(void)
{
pthread_mutex_t a, b;
pthread_mutex_init(&a, NULL);
pthread_mutex_init(&b, NULL);
pthread_mutex_lock(&a);
pthread_mutex_lock(&b);
pthread_mutex_lock(&a);
}
#include <stdio.h>
#include <pthread.h>
pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
pthread_barrier_t bar;
void *ba_lock(void *arg)
{
int ret, i;
pthread_mutex_lock(&b);
if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
pthread_barrier_destroy(&bar);
pthread_mutex_lock(&a);
pthread_mutex_unlock(&a);
pthread_mutex_unlock(&b);
}
int main(void)
{
pthread_t t;
pthread_barrier_init(&bar, NULL, 2);
if (pthread_create(&t, NULL, ba_lock, NULL)) {
fprintf(stderr, "pthread_create() failed\n");
return 1;
}
pthread_mutex_lock(&a);
if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
pthread_barrier_destroy(&bar);
pthread_mutex_lock(&b);
pthread_mutex_unlock(&b);
pthread_mutex_unlock(&a);
pthread_join(t, NULL);
return 0;
}
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#define __used __attribute__((__unused__)) #define __used __attribute__((__unused__))
#define unlikely #define unlikely
#define READ_ONCE(x) (x)
#define WRITE_ONCE(x, val) x=(val) #define WRITE_ONCE(x, val) x=(val)
#define RCU_INIT_POINTER(p, v) p=(v) #define RCU_INIT_POINTER(p, v) p=(v)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment