Commit 4b9fd8a8 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Continued user-access cleanups in the futex code.

   - percpu-rwsem rewrite that uses its own waitqueue and atomic_t
     instead of an embedded rwsem. This addresses a couple of
     weaknesses, but the primary motivation was complications on the -rt
     kernel.

   - Introduce raw lock nesting detection on lockdep
     (CONFIG_PROVE_RAW_LOCK_NESTING=y), document the raw_lock vs. normal
     lock differences. This too originates from -rt.

   - Reuse lockdep zapped chain_hlocks entries, to conserve RAM
     footprint on distro-ish kernels running into the "BUG:
     MAX_LOCKDEP_CHAIN_HLOCKS too low!" depletion of the lockdep
     chain-entries pool.

   - Misc cleanups, smaller fixes and enhancements - see the changelog
     for details"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits)
  fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t
  thermal/x86_pkg_temp: Make pkg_temp_lock a raw_spinlock_t
  Documentation/locking/locktypes: Minor copy editor fixes
  Documentation/locking/locktypes: Further clarifications and wordsmithing
  m68knommu: Remove mm.h include from uaccess_no.h
  x86: get rid of user_atomic_cmpxchg_inatomic()
  generic arch_futex_atomic_op_inuser() doesn't need access_ok()
  x86: don't reload after cmpxchg in unsafe_atomic_op2() loop
  x86: convert arch_futex_atomic_op_inuser() to user_access_begin/user_access_end()
  objtool: whitelist __sanitizer_cov_trace_switch()
  [parisc, s390, sparc64] no need for access_ok() in futex handling
  sh: no need of access_ok() in arch_futex_atomic_op_inuser()
  futex: arch_futex_atomic_op_inuser() calling conventions change
  completion: Use lockdep_assert_RT_in_threaded_ctx() in complete_all()
  lockdep: Add posixtimer context tracing bits
  lockdep: Annotate irq_work
  lockdep: Add hrtimer context tracing bits
  lockdep: Introduce wait-type checks
  completion: Use simple wait queues
  sched/swait: Prepare usage in completions
  ...
parents a776c270 f1e67e35
......@@ -7,6 +7,7 @@ locking
.. toctree::
:maxdepth: 1
locktypes
lockdep-design
lockstat
locktorture
......
This diff is collapsed.
......@@ -31,7 +31,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -53,8 +54,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -75,10 +75,12 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
#ifndef CONFIG_ARC_HAS_LLSC
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
#endif
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
......@@ -101,7 +103,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS;
}
pagefault_enable();
#ifndef CONFIG_ARC_HAS_LLSC
preempt_enable();
#endif
......
......@@ -134,10 +134,12 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret, tmp;
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
#ifndef CONFIG_SMP
preempt_disable();
#endif
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
......@@ -159,7 +161,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
#ifndef CONFIG_SMP
preempt_enable();
#endif
......
......@@ -48,7 +48,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
int oldval = 0, ret, tmp;
u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
pagefault_disable();
if (!access_ok(_uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -75,8 +76,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -11,7 +11,6 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/version.h>
#include <asm/segment.h>
......
......@@ -36,7 +36,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -62,8 +63,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -10,7 +10,6 @@
/*
* User space memory access functions
*/
#include <linux/mm.h>
#include <asm/sections.h>
/*
......
......@@ -50,7 +50,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -74,8 +75,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -35,7 +35,6 @@
#include <linux/compiler.h>
#include <linux/page-flags.h>
#include <linux/mm.h>
#include <asm/intrinsics.h>
#include <asm/pgtable.h>
......
......@@ -681,3 +681,4 @@ machine_power_off (void)
machine_halt();
}
EXPORT_SYMBOL(ia64_delay_loop);
......@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/efi.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/meminit.h>
......
......@@ -5,7 +5,6 @@
/*
* User space memory access functions
*/
#include <linux/mm.h>
#include <linux/string.h>
#include <asm/segment.h>
......
......@@ -34,7 +34,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -56,8 +57,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -12,7 +12,6 @@
#define _ASM_MICROBLAZE_UACCESS_H
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/mmu.h>
#include <asm/page.h>
......
......@@ -89,7 +89,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -116,8 +117,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -66,8 +66,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("move %0, %3", ret, oldval, tmp, uaddr,
......@@ -93,8 +93,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -11,7 +11,6 @@
#include <asm/errno.h>
#include <asm/memory.h>
#include <asm/types.h>
#include <linux/mm.h>
#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t"
......
......@@ -35,7 +35,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -57,8 +58,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -40,7 +40,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
u32 tmp;
_futex_spin_lock_irqsave(uaddr, &flags);
pagefault_disable();
ret = -EFAULT;
if (unlikely(get_user(oldval, uaddr) != 0))
......@@ -73,7 +72,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -EFAULT;
out_pagefault_enable:
pagefault_enable();
_futex_spin_unlock_irqrestore(uaddr, &flags);
if (!ret)
......
......@@ -35,8 +35,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
allow_read_write_user(uaddr, uaddr, sizeof(*uaddr));
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
......@@ -58,8 +59,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS;
}
pagefault_enable();
*oval = oldval;
prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr));
......
......@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/rcuwait.h>
#include <asm/firmware.h>
#include <asm/lv1call.h>
......@@ -670,7 +671,8 @@ struct ps3_notification_device {
spinlock_t lock;
u64 tag;
u64 lv1_status;
struct completion done;
struct rcuwait wait;
bool done;
};
enum ps3_notify_type {
......@@ -712,7 +714,8 @@ static irqreturn_t ps3_notification_interrupt(int irq, void *data)
pr_debug("%s:%u: completed, status 0x%llx\n", __func__,
__LINE__, status);
dev->lv1_status = status;
complete(&dev->done);
dev->done = true;
rcuwait_wake_up(&dev->wait);
}
spin_unlock(&dev->lock);
return IRQ_HANDLED;
......@@ -725,12 +728,12 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
unsigned long flags;
int res;
init_completion(&dev->done);
spin_lock_irqsave(&dev->lock, flags);
res = write ? lv1_storage_write(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
&dev->tag)
: lv1_storage_read(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
&dev->tag);
dev->done = false;
spin_unlock_irqrestore(&dev->lock, flags);
if (res) {
pr_err("%s:%u: %s failed %d\n", __func__, __LINE__, op, res);
......@@ -738,14 +741,10 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev,
}
pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
res = wait_event_interruptible(dev->done.wait,
dev->done.done || kthread_should_stop());
rcuwait_wait_event(&dev->wait, dev->done || kthread_should_stop(), TASK_IDLE);
if (kthread_should_stop())
res = -EINTR;
if (res) {
pr_debug("%s:%u: interrupted %s\n", __func__, __LINE__, op);
return res;
}
if (dev->lv1_status) {
pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__,
......@@ -810,6 +809,7 @@ static int ps3_probe_thread(void *data)
}
spin_lock_init(&dev.lock);
rcuwait_init(&dev.wait);
res = request_irq(irq, ps3_notification_interrupt, 0,
"ps3_notification", &dev);
......
......@@ -46,7 +46,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
int oldval = 0, ret = 0;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -73,8 +74,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -29,7 +29,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("lr %2,%5\n",
......@@ -54,7 +53,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
default:
ret = -ENOSYS;
}
pagefault_enable();
disable_sacf_uaccess(old_fs);
if (!ret)
......
......@@ -34,8 +34,6 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
u32 oldval, newval, prev;
int ret;
pagefault_disable();
do {
ret = get_user(oldval, uaddr);
......@@ -67,8 +65,6 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
ret = futex_atomic_cmpxchg_inatomic(&prev, uaddr, oldval, newval);
} while (!ret && prev != oldval);
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -38,8 +38,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
if (unlikely((((unsigned long) uaddr) & 0x3UL)))
return -EINVAL;
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_cas_op("mov\t%4, %1", ret, oldval, uaddr, oparg);
......@@ -60,8 +58,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -12,76 +12,103 @@
#include <asm/processor.h>
#include <asm/smap.h>
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
asm volatile("\t" ASM_STAC "\n" \
"1:\t" insn "\n" \
"2:\t" ASM_CLAC "\n" \
#define unsafe_atomic_op1(insn, oval, uaddr, oparg, label) \
do { \
int oldval = 0, ret; \
asm volatile("1:\t" insn "\n" \
"2:\n" \
"\t.section .fixup,\"ax\"\n" \
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "0" (oparg), "1" (0))
: "i" (-EFAULT), "0" (oparg), "1" (0)); \
if (ret) \
goto label; \
*oval = oldval; \
} while(0)
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
asm volatile("\t" ASM_STAC "\n" \
"1:\tmovl %2, %0\n" \
"\tmovl\t%0, %3\n" \
#define unsafe_atomic_op2(insn, oval, uaddr, oparg, label) \
do { \
int oldval = 0, ret, tem; \
asm volatile("1:\tmovl %2, %0\n" \
"2:\tmovl\t%0, %3\n" \
"\t" insn "\n" \
"2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
"\tjnz\t1b\n" \
"3:\t" ASM_CLAC "\n" \
"3:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \
"\tjnz\t2b\n" \
"4:\n" \
"\t.section .fixup,\"ax\"\n" \
"4:\tmov\t%5, %1\n" \
"\tjmp\t3b\n" \
"5:\tmov\t%5, %1\n" \
"\tjmp\t4b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 4b) \
_ASM_EXTABLE_UA(2b, 4b) \
_ASM_EXTABLE_UA(1b, 5b) \
_ASM_EXTABLE_UA(3b, 5b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
: "r" (oparg), "i" (-EFAULT), "1" (0)); \
if (ret) \
goto label; \
*oval = oldval; \
} while(0)
static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
int oldval = 0, ret, tem;
pagefault_disable();
if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
unsafe_atomic_op1("xchgl %0, %2", oval, uaddr, oparg, Efault);
break;
case FUTEX_OP_ADD:
__futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval,
uaddr, oparg);
unsafe_atomic_op1(LOCK_PREFIX "xaddl %0, %2", oval,
uaddr, oparg, Efault);
break;
case FUTEX_OP_OR:
__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
unsafe_atomic_op2("orl %4, %3", oval, uaddr, oparg, Efault);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
unsafe_atomic_op2("andl %4, %3", oval, uaddr, ~oparg, Efault);
break;
case FUTEX_OP_XOR:
__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
unsafe_atomic_op2("xorl %4, %3", oval, uaddr, oparg, Efault);
break;
default:
ret = -ENOSYS;
user_access_end();
return -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
return ret;
user_access_end();
return 0;
Efault:
user_access_end();
return -EFAULT;
}
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval);
int ret = 0;
if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
"2:\n"
"\t.section .fixup, \"ax\"\n"
"3:\tmov %3, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE_UA(1b, 3b)
: "+r" (ret), "=a" (oldval), "+m" (*uaddr)
: "i" (-EFAULT), "r" (newval), "1" (oldval)
: "memory"
);
user_access_end();
*uval = oldval;
return ret;
}
#endif
......
......@@ -584,99 +584,6 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
extern void __cmpxchg_wrong_size(void)
__compiletime_error("Bad argument size for cmpxchg");
#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
({ \
int __ret = 0; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__uaccess_begin_nospec(); \
switch (size) { \
case 1: \
{ \
asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \
"2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "q" (__new), "1" (__old) \
: "memory" \
); \
break; \
} \
case 2: \
{ \
asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \
"2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
); \
break; \
} \
case 4: \
{ \
asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \
"2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
); \
break; \
} \
case 8: \
{ \
if (!IS_ENABLED(CONFIG_X86_64)) \
__cmpxchg_wrong_size(); \
\
asm volatile("\n" \
"1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \
"2:\n" \
"\t.section .fixup, \"ax\"\n" \
"3:\tmov %3, %0\n" \
"\tjmp 2b\n" \
"\t.previous\n" \
_ASM_EXTABLE_UA(1b, 3b) \
: "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \
: "i" (-EFAULT), "r" (__new), "1" (__old) \
: "memory" \
); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__uaccess_end(); \
*(uval) = __old; \
__ret; \
})
#define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \
({ \
access_ok((ptr), sizeof(*(ptr))) ? \
__user_atomic_cmpxchg_inatomic((uval), (ptr), \
(old), (new), sizeof(*(ptr))) : \
-EFAULT; \
})
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
......
......@@ -72,7 +72,8 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
#if XCHAL_HAVE_S32C1I || XCHAL_HAVE_EXCLUSIVE
int oldval = 0, ret;
pagefault_disable();
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
case FUTEX_OP_SET:
......@@ -99,8 +100,6 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
ret = -ENOSYS;
}
pagefault_enable();
if (!ret)
*oval = oldval;
......
......@@ -365,17 +365,6 @@ static struct request_context *ezusb_alloc_ctx(struct ezusb_priv *upriv,
return ctx;
}
/* Hopefully the real complete_all will soon be exported, in the mean
* while this should work. */
static inline void ezusb_complete_all(struct completion *comp)
{
complete(comp);
complete(comp);
complete(comp);
complete(comp);
}
static void ezusb_ctx_complete(struct request_context *ctx)
{
struct ezusb_priv *upriv = ctx->upriv;
......@@ -409,7 +398,7 @@ static void ezusb_ctx_complete(struct request_context *ctx)
netif_wake_queue(dev);
}
ezusb_complete_all(&ctx->done);
complete_all(&ctx->done);
ezusb_request_context_put(ctx);
break;
......@@ -419,7 +408,7 @@ static void ezusb_ctx_complete(struct request_context *ctx)
/* This is normal, as all request contexts get flushed
* when the device is disconnected */
err("Called, CTX not terminating, but device gone");
ezusb_complete_all(&ctx->done);
complete_all(&ctx->done);
ezusb_request_context_put(ctx);
break;
}
......@@ -690,11 +679,11 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv,
* get the chance to run themselves. So we make sure
* that we don't sleep for ever */
int msecs = DEF_TIMEOUT * (1000 / HZ);
while (!ctx->done.done && msecs--)
while (!try_wait_for_completion(&ctx->done) && msecs--)
udelay(1000);
} else {
wait_event_interruptible(ctx->done.wait,
ctx->done.done);
wait_for_completion(&ctx->done);
}
break;
default:
......
......@@ -52,10 +52,11 @@ struct switchtec_user {
enum mrpc_state state;
struct completion comp;
wait_queue_head_t cmd_comp;
struct kref kref;
struct list_head list;
bool cmd_done;
u32 cmd;
u32 status;
u32 return_code;
......@@ -77,7 +78,7 @@ static struct switchtec_user *stuser_create(struct switchtec_dev *stdev)
stuser->stdev = stdev;
kref_init(&stuser->kref);
INIT_LIST_HEAD(&stuser->list);
init_completion(&stuser->comp);
init_waitqueue_head(&stuser->cmd_comp);
stuser->event_cnt = atomic_read(&stdev->event_cnt);
dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
......@@ -175,7 +176,7 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser)
kref_get(&stuser->kref);
stuser->read_len = sizeof(stuser->data);
stuser_set_state(stuser, MRPC_QUEUED);
init_completion(&stuser->comp);
stuser->cmd_done = false;
list_add_tail(&stuser->list, &stdev->mrpc_queue);
mrpc_cmd_submit(stdev);
......@@ -222,7 +223,8 @@ static void mrpc_complete_cmd(struct switchtec_dev *stdev)
memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data,
stuser->read_len);
out:
complete_all(&stuser->comp);
stuser->cmd_done = true;
wake_up_interruptible(&stuser->cmd_comp);
list_del_init(&stuser->list);
stuser_put(stuser);
stdev->mrpc_busy = 0;
......@@ -529,10 +531,11 @@ static ssize_t switchtec_dev_read(struct file *filp, char __user *data,
mutex_unlock(&stdev->mrpc_mutex);
if (filp->f_flags & O_NONBLOCK) {
if (!try_wait_for_completion(&stuser->comp))
if (!stuser->cmd_done)
return -EAGAIN;
} else {
rc = wait_for_completion_interruptible(&stuser->comp);
rc = wait_event_interruptible(stuser->cmd_comp,
stuser->cmd_done);
if (rc < 0)
return rc;
}
......@@ -580,7 +583,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
struct switchtec_dev *stdev = stuser->stdev;
__poll_t ret = 0;
poll_wait(filp, &stuser->comp.wait, wait);
poll_wait(filp, &stuser->cmd_comp, wait);
poll_wait(filp, &stdev->event_wq, wait);
if (lock_mutex_and_test_alive(stdev))
......@@ -588,7 +591,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait)
mutex_unlock(&stdev->mrpc_mutex);
if (try_wait_for_completion(&stuser->comp))
if (stuser->cmd_done)
ret |= EPOLLIN | EPOLLRDNORM;
if (stuser->event_cnt != atomic_read(&stdev->event_cnt))
......@@ -1272,7 +1275,8 @@ static void stdev_kill(struct switchtec_dev *stdev)
/* Wake up and kill any users waiting on an MRPC request */
list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) {
complete_all(&stuser->comp);
stuser->cmd_done = true;
wake_up_interruptible(&stuser->cmd_comp);
list_del_init(&stuser->list);
stuser_put(stuser);
}
......
......@@ -16,6 +16,7 @@
#include <linux/interrupt.h>
#include <linux/miscdevice.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
struct smo8800_device {
u32 irq; /* acpi device irq */
......
......@@ -29,6 +29,7 @@
#include <linux/uaccess.h>
#include <linux/uuid.h>
#include <linux/wmi.h>
#include <linux/fs.h>
#include <uapi/linux/wmi.h>
ACPI_MODULE_NAME("wmi");
......
......@@ -19,6 +19,7 @@
#include <linux/acpi.h>
#include <linux/uaccess.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include "acpi_thermal_rel.h"
static acpi_handle acpi_thermal_rel_handle;
......
......@@ -63,7 +63,7 @@ static int max_id __read_mostly;
/* Array of zone pointers */
static struct zone_device **zones;
/* Serializes interrupt notification, work and hotplug */
static DEFINE_SPINLOCK(pkg_temp_lock);
static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
/* Protects zone operation in the work function against hotplug removal */
static DEFINE_MUTEX(thermal_zone_mutex);
......@@ -266,12 +266,12 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
u64 msr_val, wr_val;
mutex_lock(&thermal_zone_mutex);
spin_lock_irq(&pkg_temp_lock);
raw_spin_lock_irq(&pkg_temp_lock);
++pkg_work_cnt;
zonedev = pkg_temp_thermal_get_dev(cpu);
if (!zonedev) {
spin_unlock_irq(&pkg_temp_lock);
raw_spin_unlock_irq(&pkg_temp_lock);
mutex_unlock(&thermal_zone_mutex);
return;
}
......@@ -285,7 +285,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
}
enable_pkg_thres_interrupt();
spin_unlock_irq(&pkg_temp_lock);
raw_spin_unlock_irq(&pkg_temp_lock);
/*
* If tzone is not NULL, then thermal_zone_mutex will prevent the
......@@ -310,7 +310,7 @@ static int pkg_thermal_notify(u64 msr_val)
struct zone_device *zonedev;
unsigned long flags;
spin_lock_irqsave(&pkg_temp_lock, flags);
raw_spin_lock_irqsave(&pkg_temp_lock, flags);
++pkg_interrupt_cnt;
disable_pkg_thres_interrupt();
......@@ -322,7 +322,7 @@ static int pkg_thermal_notify(u64 msr_val)
pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
}
spin_unlock_irqrestore(&pkg_temp_lock, flags);
raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
return 0;
}
......@@ -368,9 +368,9 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
zonedev->msr_pkg_therm_high);
cpumask_set_cpu(cpu, &zonedev->cpumask);
spin_lock_irq(&pkg_temp_lock);
raw_spin_lock_irq(&pkg_temp_lock);
zones[id] = zonedev;
spin_unlock_irq(&pkg_temp_lock);
raw_spin_unlock_irq(&pkg_temp_lock);
return 0;
}
......@@ -407,7 +407,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
}
/* Protect against work and interrupts */
spin_lock_irq(&pkg_temp_lock);
raw_spin_lock_irq(&pkg_temp_lock);
/*
* Check whether this cpu was the current target and store the new
......@@ -439,9 +439,9 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
* To cancel the work we need to drop the lock, otherwise
* we might deadlock if the work needs to be flushed.
*/
spin_unlock_irq(&pkg_temp_lock);
raw_spin_unlock_irq(&pkg_temp_lock);
cancel_delayed_work_sync(&zonedev->work);
spin_lock_irq(&pkg_temp_lock);
raw_spin_lock_irq(&pkg_temp_lock);
/*
* If this is not the last cpu in the package and the work
* did not run after we dropped the lock above, then we
......@@ -452,7 +452,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
pkg_thermal_schedule_work(target, &zonedev->work);
}
spin_unlock_irq(&pkg_temp_lock);
raw_spin_unlock_irq(&pkg_temp_lock);
/* Final cleanup if this is the last cpu */
if (lastcpu)
......
......@@ -1704,7 +1704,7 @@ static void ffs_data_put(struct ffs_data *ffs)
pr_info("%s(): freeing\n", __func__);
ffs_data_clear(ffs);
BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
waitqueue_active(&ffs->ep0req_completion.wait) ||
swait_active(&ffs->ep0req_completion.wait) ||
waitqueue_active(&ffs->wait));
destroy_workqueue(ffs->io_completion_wq);
kfree(ffs->dev_name);
......
......@@ -344,7 +344,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
spin_unlock_irq (&epdata->dev->lock);
if (likely (value == 0)) {
value = wait_event_interruptible (done.wait, done.done);
value = wait_for_completion_interruptible(&done);
if (value != 0) {
spin_lock_irq (&epdata->dev->lock);
if (likely (epdata->ep != NULL)) {
......@@ -353,7 +353,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
usb_ep_dequeue (epdata->ep, epdata->req);
spin_unlock_irq (&epdata->dev->lock);
wait_event (done.wait, done.done);
wait_for_completion(&done);
if (epdata->status == -ECONNRESET)
epdata->status = -EINTR;
} else {
......
......@@ -274,8 +274,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
* decide that the page is now completely done.
*/
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
spin_lock_irqsave(&first->b_uptodate_lock, flags);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
......@@ -288,8 +287,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
}
tmp = tmp->b_this_page;
} while (tmp != bh);
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
/*
* If none of the buffers had errors and they are all
......@@ -301,8 +299,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
return;
}
......@@ -371,8 +368,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
}
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
spin_lock_irqsave(&first->b_uptodate_lock, flags);
clear_buffer_async_write(bh);
unlock_buffer(bh);
......@@ -384,14 +380,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
}
tmp = tmp->b_this_page;
}
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
end_page_writeback(page);
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
return;
}
EXPORT_SYMBOL(end_buffer_async_write);
......@@ -3342,6 +3336,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
spin_lock_init(&ret->b_uptodate_lock);
preempt_disable();
__this_cpu_inc(bh_accounting.nr);
recalc_bh_state();
......
......@@ -125,11 +125,10 @@ static void ext4_finish_bio(struct bio *bio)
}
bh = head = page_buffers(page);
/*
* We check all buffers in the page under BH_Uptodate_Lock
* We check all buffers in the page under b_uptodate_lock
* to avoid races with other end io clearing async_write flags
*/
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
spin_lock_irqsave(&head->b_uptodate_lock, flags);
do {
if (bh_offset(bh) < bio_start ||
bh_offset(bh) + bh->b_size > bio_end) {
......@@ -141,8 +140,7 @@ static void ext4_finish_bio(struct bio *bio)
if (bio->bi_status)
buffer_io_error(bh);
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
if (!under_io) {
fscrypt_free_bounce_page(bounce_page);
end_page_writeback(page);
......
......@@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
"0x%llx.", (unsigned long long)bh->b_blocknr);
}
first = page_buffers(page);
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
spin_lock_irqsave(&first->b_uptodate_lock, flags);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
......@@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
}
tmp = tmp->b_this_page;
} while (tmp != bh);
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
/*
* If none of the buffers had errors then we can set the page uptodate,
* but we first have to perform the post read mst fixups, if the
......@@ -142,8 +140,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
unlock_page(page);
return;
still_busy:
bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
local_irq_restore(flags);
spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
return;
}
......
......@@ -80,7 +80,7 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv);
#ifdef CONFIG_ACPI
#include <linux/proc_fs.h>
struct proc_dir_entry;
#define ACPI_BUS_FILE_ROOT "acpi"
extern struct proc_dir_entry *acpi_root_dir;
......
......@@ -4,8 +4,9 @@
/*
* For the benefit of those who are trying to port Linux to another
* architecture, here are some C-language equivalents. You should
* recode these in the native assembly language, if at all possible.
* architecture, here are some C-language equivalents. They should
* generate reasonable code, so take a look at what your compiler spits
* out before rolling your own buggy implementation in assembly language.
*
* C language equivalents written by Theodore Ts'o, 9/26/92
*/
......
......@@ -34,7 +34,6 @@ arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
u32 tmp;
preempt_disable();
pagefault_disable();
ret = -EFAULT;
if (unlikely(get_user(oldval, uaddr) != 0))
......@@ -67,7 +66,6 @@ arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
ret = -EFAULT;
out_pagefault_enable:
pagefault_enable();
preempt_enable();
if (ret == 0)
......
......@@ -22,9 +22,6 @@ enum bh_state_bits {
BH_Dirty, /* Is dirty */
BH_Lock, /* Is locked */
BH_Req, /* Has been submitted for I/O */
BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
* IO completion of other buffers in the page
*/
BH_Mapped, /* Has a disk mapping */
BH_New, /* Disk mapping was newly created by get_block */
......@@ -76,6 +73,9 @@ struct buffer_head {
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
atomic_t b_count; /* users using this buffer_head */
spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to
* serialise IO completion of other
* buffers in the page */
};
/*
......
......@@ -9,7 +9,7 @@
* See kernel/sched/completion.c for details.
*/
#include <linux/wait.h>
#include <linux/swait.h>
/*
* struct completion - structure used to maintain state for a "completion"
......@@ -25,7 +25,7 @@
*/
struct completion {
unsigned int done;
wait_queue_head_t wait;
struct swait_queue_head wait;
};
#define init_completion_map(x, m) __init_completion(x)
......@@ -34,7 +34,7 @@ static inline void complete_acquire(struct completion *x) {}
static inline void complete_release(struct completion *x) {}
#define COMPLETION_INITIALIZER(work) \
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
(*({ init_completion_map(&(work), &(map)); &(work); }))
......@@ -85,7 +85,7 @@ static inline void complete_release(struct completion *x) {}
static inline void __init_completion(struct completion *x)
{
x->done = 0;
init_waitqueue_head(&x->wait);
init_swait_queue_head(&x->wait);
}
/**
......
......@@ -18,6 +18,8 @@
/* Doesn't want IPI, wait for tick: */
#define IRQ_WORK_LAZY BIT(2)
/* Run hard IRQ context, even on RT */
#define IRQ_WORK_HARD_IRQ BIT(3)
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
......
......@@ -37,7 +37,12 @@
# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
# define trace_hardirq_enter() \
do { \
current->hardirq_context++; \
if (!current->hardirq_context++) \
current->hardirq_threaded = 0; \
} while (0)
# define trace_hardirq_threaded() \
do { \
current->hardirq_threaded = 1; \
} while (0)
# define trace_hardirq_exit() \
do { \
......@@ -51,6 +56,40 @@ do { \
do { \
current->softirq_context--; \
} while (0)
# define lockdep_hrtimer_enter(__hrtimer) \
do { \
if (!__hrtimer->is_hard) \
current->irq_config = 1; \
} while (0)
# define lockdep_hrtimer_exit(__hrtimer) \
do { \
if (!__hrtimer->is_hard) \
current->irq_config = 0; \
} while (0)
# define lockdep_posixtimer_enter() \
do { \
current->irq_config = 1; \
} while (0)
# define lockdep_posixtimer_exit() \
do { \
current->irq_config = 0; \
} while (0)
# define lockdep_irq_work_enter(__work) \
do { \
if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
current->irq_config = 1; \
} while (0)
# define lockdep_irq_work_exit(__work) \
do { \
if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
current->irq_config = 0; \
} while (0)
#else
# define trace_hardirqs_on() do { } while (0)
# define trace_hardirqs_off() do { } while (0)
......@@ -59,9 +98,16 @@ do { \
# define trace_hardirqs_enabled(p) 0
# define trace_softirqs_enabled(p) 0
# define trace_hardirq_enter() do { } while (0)
# define trace_hardirq_threaded() do { } while (0)
# define trace_hardirq_exit() do { } while (0)
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) do { } while (0)
# define lockdep_hrtimer_exit(__hrtimer) do { } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
# define lockdep_irq_work_exit(__work) do { } while (0)
#endif
#if defined(CONFIG_IRQSOFF_TRACER) || \
......
......@@ -21,6 +21,22 @@ extern int lock_stat;
#include <linux/types.h>
enum lockdep_wait_type {
LD_WAIT_INV = 0, /* not checked, catch all */
LD_WAIT_FREE, /* wait free, rcu etc.. */
LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
#else
LD_WAIT_CONFIG = LD_WAIT_SPIN,
#endif
LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */
LD_WAIT_MAX, /* must be last */
};
#ifdef CONFIG_LOCKDEP
#include <linux/linkage.h>
......@@ -111,6 +127,9 @@ struct lock_class {
int name_version;
const char *name;
short wait_type_inner;
short wait_type_outer;
#ifdef CONFIG_LOCK_STAT
unsigned long contention_point[LOCKSTAT_POINTS];
unsigned long contending_point[LOCKSTAT_POINTS];
......@@ -158,6 +177,8 @@ struct lockdep_map {
struct lock_class_key *key;
struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
const char *name;
short wait_type_outer; /* can be taken in this context */
short wait_type_inner; /* presents this context */
#ifdef CONFIG_LOCK_STAT
int cpu;
unsigned long ip;
......@@ -299,8 +320,21 @@ extern void lockdep_unregister_key(struct lock_class_key *key);
* to lockdep:
*/
extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass);
extern void lockdep_init_map_waits(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass, short inner, short outer);
static inline void
lockdep_init_map_wait(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass, short inner)
{
lockdep_init_map_waits(lock, name, key, subclass, inner, LD_WAIT_INV);
}
static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass)
{
lockdep_init_map_wait(lock, name, key, subclass, LD_WAIT_INV);
}
/*
* Reinitialize a lock key - for cases where there is special locking or
......@@ -308,18 +342,29 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
* of dependencies wrong: they are either too broad (they need a class-split)
* or they are too narrow (they suffer from a false class-split):
*/
#define lockdep_set_class(lock, key) \
lockdep_init_map(&(lock)->dep_map, #key, key, 0)
#define lockdep_set_class_and_name(lock, key, name) \
lockdep_init_map(&(lock)->dep_map, name, key, 0)
#define lockdep_set_class_and_subclass(lock, key, sub) \
lockdep_init_map(&(lock)->dep_map, #key, key, sub)
#define lockdep_set_subclass(lock, sub) \
lockdep_init_map(&(lock)->dep_map, #lock, \
(lock)->dep_map.key, sub)
#define lockdep_set_class(lock, key) \
lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \
(lock)->dep_map.wait_type_inner, \
(lock)->dep_map.wait_type_outer)
#define lockdep_set_class_and_name(lock, key, name) \
lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \
(lock)->dep_map.wait_type_inner, \
(lock)->dep_map.wait_type_outer)
#define lockdep_set_class_and_subclass(lock, key, sub) \
lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\
(lock)->dep_map.wait_type_inner, \
(lock)->dep_map.wait_type_outer)
#define lockdep_set_subclass(lock, sub) \
lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
(lock)->dep_map.wait_type_inner, \
(lock)->dep_map.wait_type_outer)
#define lockdep_set_novalidate_class(lock) \
lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock)
/*
* Compare locking classes
*/
......@@ -432,6 +477,10 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
# define lock_set_class(l, n, k, s, i) do { } while (0)
# define lock_set_subclass(l, s, i) do { } while (0)
# define lockdep_init() do { } while (0)
# define lockdep_init_map_waits(lock, name, key, sub, inner, outer) \
do { (void)(name); (void)(key); } while (0)
# define lockdep_init_map_wait(lock, name, key, sub, inner) \
do { (void)(name); (void)(key); } while (0)
# define lockdep_init_map(lock, name, key, sub) \
do { (void)(name); (void)(key); } while (0)
# define lockdep_set_class(lock, key) do { (void)(key); } while (0)
......@@ -662,6 +711,21 @@ do { \
# define lockdep_assert_in_irq() do { } while (0)
#endif
#ifdef CONFIG_PROVE_RAW_LOCK_NESTING
# define lockdep_assert_RT_in_threaded_ctx() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
current->hardirq_context && \
!(current->hardirq_threaded || current->irq_config), \
"Not in threaded context on PREEMPT_RT as expected\n"); \
} while (0)
#else
# define lockdep_assert_RT_in_threaded_ctx() do { } while (0)
#endif
#ifdef CONFIG_LOCKDEP
void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
#else
......
......@@ -109,8 +109,11 @@ do { \
} while (0)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
, .dep_map = { .name = #lockname }
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
, .dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SLEEP, \
}
#else
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
#endif
......
......@@ -3,41 +3,52 @@
#define _LINUX_PERCPU_RWSEM_H
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/rcuwait.h>
#include <linux/wait.h>
#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
struct percpu_rw_semaphore {
struct rcu_sync rss;
unsigned int __percpu *read_count;
struct rw_semaphore rw_sem; /* slowpath */
struct rcuwait writer; /* blocked writer */
int readers_block;
struct rcuwait writer;
wait_queue_head_t waiters;
atomic_t block;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname },
#else
#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
#endif
#define __DEFINE_PERCPU_RWSEM(name, is_static) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
is_static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
.waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \
.block = ATOMIC_INIT(0), \
__PERCPU_RWSEM_DEP_MAP_INIT(name) \
}
#define DEFINE_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, /* not static */)
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, static)
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);
extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
preempt_disable();
/*
......@@ -48,8 +59,9 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
* and that once the synchronize_rcu() is done, the writer will see
* anything we did within this RCU-sched read-size critical section.
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
if (likely(rcu_sync_is_idle(&sem->rss)))
__this_cpu_inc(*sem->read_count);
else
__percpu_down_read(sem, false); /* Unconditional memory barrier */
/*
* The preempt_enable() prevents the compiler from
......@@ -58,16 +70,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
preempt_enable();
}
static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
int ret = 1;
bool ret = true;
preempt_disable();
/*
* Same as in percpu_down_read().
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
if (likely(rcu_sync_is_idle(&sem->rss)))
__this_cpu_inc(*sem->read_count);
else
ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
preempt_enable();
/*
......@@ -76,24 +89,36 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
*/
if (ret)
rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
return ret;
}
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
preempt_disable();
/*
* Same as in percpu_down_read().
*/
if (likely(rcu_sync_is_idle(&sem->rss)))
if (likely(rcu_sync_is_idle(&sem->rss))) {
__this_cpu_dec(*sem->read_count);
else
__percpu_up_read(sem); /* Unconditional memory barrier */
} else {
/*
* slowpath; reader will only ever wake a single blocked
* writer.
*/
smp_mb(); /* B matches C */
/*
* In other words, if they see our decrement (presumably to
* aggregate zero, as that is the only time it matters) they
* will also see our critical section.
*/
__this_cpu_dec(*sem->read_count);
rcuwait_wake_up(&sem->writer);
}
preempt_enable();
rwsem_release(&sem->rw_sem.dep_map, _RET_IP_);
}
extern void percpu_down_write(struct percpu_rw_semaphore *);
......@@ -110,29 +135,19 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
})
#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
#define percpu_rwsem_assert_held(sem) \
lockdep_assert_held(&(sem)->rw_sem)
#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem)
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_release(&sem->rw_sem.dep_map, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
#endif
lock_release(&sem->dep_map, ip);
}
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
bool read, unsigned long ip)
{
lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
if (!read)
atomic_long_set(&sem->rw_sem.owner, (long)current);
#endif
lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
}
#endif
......@@ -3,6 +3,7 @@
#define _LINUX_RCUWAIT_H_
#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
/*
* rcuwait provides a way of blocking and waking up a single
......@@ -30,23 +31,30 @@ extern void rcuwait_wake_up(struct rcuwait *w);
* The caller is responsible for locking around rcuwait_wait_event(),
* such that writes to @task are properly serialized.
*/
#define rcuwait_wait_event(w, condition) \
#define rcuwait_wait_event(w, condition, state) \
({ \
int __ret = 0; \
rcu_assign_pointer((w)->task, current); \
for (;;) { \
/* \
* Implicit barrier (A) pairs with (B) in \
* rcuwait_wake_up(). \
*/ \
set_current_state(TASK_UNINTERRUPTIBLE); \
set_current_state(state); \
if (condition) \
break; \
\
if (signal_pending_state(state, current)) { \
__ret = -EINTR; \
break; \
} \
\
schedule(); \
} \
\
WRITE_ONCE((w)->task, NULL); \
__set_current_state(TASK_RUNNING); \
__ret; \
})
#endif /* _LINUX_RCUWAIT_H_ */
......@@ -22,7 +22,11 @@ typedef struct {
#define RWLOCK_MAGIC 0xdeaf1eed
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
# define RW_DEP_MAP_INIT(lockname) \
.dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_CONFIG, \
}
#else
# define RW_DEP_MAP_INIT(lockname)
#endif
......
......@@ -53,12 +53,6 @@ struct rw_semaphore {
#endif
};
/*
* Setting all bits of the owner field except bit 0 will indicate
* that the rwsem is writer-owned with an unknown owner.
*/
#define RWSEM_OWNER_UNKNOWN (-2L)
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
......@@ -71,7 +65,11 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
/* Common initializer macros and functions */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
# define __RWSEM_DEP_MAP_INIT(lockname) \
, .dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SLEEP, \
}
#else
# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
......
......@@ -970,6 +970,7 @@ struct task_struct {
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned int hardirq_threaded;
unsigned long hardirq_enable_ip;
unsigned long hardirq_disable_ip;
unsigned int hardirq_enable_event;
......@@ -982,6 +983,7 @@ struct task_struct {
unsigned int softirq_enable_event;
int softirqs_enabled;
int softirq_context;
int irq_config;
#endif
#ifdef CONFIG_LOCKDEP
......
......@@ -93,12 +93,13 @@
#ifdef CONFIG_DEBUG_SPINLOCK
extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key);
# define raw_spin_lock_init(lock) \
do { \
static struct lock_class_key __key; \
\
__raw_spin_lock_init((lock), #lock, &__key); \
struct lock_class_key *key, short inner);
# define raw_spin_lock_init(lock) \
do { \
static struct lock_class_key __key; \
\
__raw_spin_lock_init((lock), #lock, &__key, LD_WAIT_SPIN); \
} while (0)
#else
......@@ -327,12 +328,26 @@ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
return &lock->rlock;
}
#define spin_lock_init(_lock) \
do { \
spinlock_check(_lock); \
raw_spin_lock_init(&(_lock)->rlock); \
#ifdef CONFIG_DEBUG_SPINLOCK
# define spin_lock_init(lock) \
do { \
static struct lock_class_key __key; \
\
__raw_spin_lock_init(spinlock_check(lock), \
#lock, &__key, LD_WAIT_CONFIG); \
} while (0)
#else
# define spin_lock_init(_lock) \
do { \
spinlock_check(_lock); \
*(_lock) = __SPIN_LOCK_UNLOCKED(_lock); \
} while (0)
#endif
static __always_inline void spin_lock(spinlock_t *lock)
{
raw_spin_lock(&lock->rlock);
......
......@@ -33,8 +33,18 @@ typedef struct raw_spinlock {
#define SPINLOCK_OWNER_INIT ((void *)-1L)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
# define RAW_SPIN_DEP_MAP_INIT(lockname) \
.dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_SPIN, \
}
# define SPIN_DEP_MAP_INIT(lockname) \
.dep_map = { \
.name = #lockname, \
.wait_type_inner = LD_WAIT_CONFIG, \
}
#else
# define RAW_SPIN_DEP_MAP_INIT(lockname)
# define SPIN_DEP_MAP_INIT(lockname)
#endif
......@@ -51,7 +61,7 @@ typedef struct raw_spinlock {
{ \
.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
SPIN_DEBUG_INIT(lockname) \
SPIN_DEP_MAP_INIT(lockname) }
RAW_SPIN_DEP_MAP_INIT(lockname) }
#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
(raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
......@@ -72,11 +82,17 @@ typedef struct spinlock {
};
} spinlock_t;
#define ___SPIN_LOCK_INITIALIZER(lockname) \
{ \
.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
SPIN_DEBUG_INIT(lockname) \
SPIN_DEP_MAP_INIT(lockname) }
#define __SPIN_LOCK_INITIALIZER(lockname) \
{ { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
{ { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } }
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
(spinlock_t) __SPIN_LOCK_INITIALIZER(lockname)
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
......
......@@ -20,6 +20,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
#define WQ_FLAG_EXCLUSIVE 0x01
#define WQ_FLAG_WOKEN 0x02
#define WQ_FLAG_BOOKMARK 0x04
#define WQ_FLAG_CUSTOM 0x08
/*
* A single wait-queue entry structure:
......
......@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
static void lockdep_acquire_cpus_lock(void)
{
rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}
static void lockdep_release_cpus_lock(void)
{
rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
/*
......
......@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
wake_up_process(task);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(rcuwait_wake_up);
/*
* Determine if a process group is "orphaned", according to the POSIX
......
......@@ -135,8 +135,7 @@
*
* Where (A) orders the waiters increment and the futex value read through
* atomic operations (see hb_waiters_inc) and where (B) orders the write
* to futex and the waiters read -- this is done by the barriers for both
* shared and private futexes in get_futex_key_refs().
* to futex and the waiters read (see hb_waiters_pending()).
*
* This yields the following case (where X:=waiters, Y:=futex):
*
......@@ -331,17 +330,6 @@ static void compat_exit_robust_list(struct task_struct *curr);
static inline void compat_exit_robust_list(struct task_struct *curr) { }
#endif
static inline void futex_get_mm(union futex_key *key)
{
mmgrab(key->private.mm);
/*
* Ensure futex_get_mm() implies a full barrier such that
* get_futex_key() implies a full barrier. This is relied upon
* as smp_mb(); (B), see the ordering comment above.
*/
smp_mb__after_atomic();
}
/*
* Reflects a new waiter being added to the waitqueue.
*/
......@@ -370,6 +358,10 @@ static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
{
#ifdef CONFIG_SMP
/*
* Full barrier (B), see the ordering comment above.
*/
smp_mb();
return atomic_read(&hb->waiters);
#else
return 1;
......@@ -407,69 +399,6 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
&& key1->both.offset == key2->both.offset);
}
/*
* Take a reference to the resource addressed by a key.
* Can be called while holding spinlocks.
*
*/
static void get_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr)
return;
/*
* On MMU less systems futexes are always "private" as there is no per
* process address space. We need the smp wmb nevertheless - yes,
* arch/blackfin has MMU less SMP ...
*/
if (!IS_ENABLED(CONFIG_MMU)) {
smp_mb(); /* explicit smp_mb(); (B) */
return;
}
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
smp_mb(); /* explicit smp_mb(); (B) */
break;
case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies smp_mb(); (B) */
break;
default:
/*
* Private futexes do not hold reference on an inode or
* mm, therefore the only purpose of calling get_futex_key_refs
* is because we need the barrier for the lockless waiter check.
*/
smp_mb(); /* explicit smp_mb(); (B) */
}
}
/*
* Drop a reference to the resource addressed by a key.
* The hash bucket spinlock must not be held. This is
* a no-op for private futexes, see comment in the get
* counterpart.
*/
static void drop_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr) {
/* If we're here then we tried to put a key we failed to get */
WARN_ON_ONCE(1);
return;
}
if (!IS_ENABLED(CONFIG_MMU))
return;
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
break;
case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
break;
}
}
enum futex_access {
FUTEX_READ,
FUTEX_WRITE
......@@ -601,7 +530,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
if (!fshared) {
key->private.mm = mm;
key->private.address = address;
get_futex_key_refs(key); /* implies smp_mb(); (B) */
return 0;
}
......@@ -741,8 +669,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
rcu_read_unlock();
}
get_futex_key_refs(key); /* implies smp_mb(); (B) */
out:
put_page(page);
return err;
......@@ -750,7 +676,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
static inline void put_futex_key(union futex_key *key)
{
drop_futex_key_refs(key);
}
/**
......@@ -1740,10 +1665,9 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
oparg = 1 << oparg;
}
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
pagefault_enable();
if (ret)
return ret;
......@@ -1885,7 +1809,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
}
get_futex_key_refs(key2);
q->key = *key2;
}
......@@ -1907,7 +1830,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
get_futex_key_refs(key);
q->key = *key;
__unqueue_futex(q);
......@@ -2018,7 +1940,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
u32 *cmpval, int requeue_pi)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
int drop_count = 0, task_count = 0, ret;
int task_count = 0, ret;
struct futex_pi_state *pi_state = NULL;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
......@@ -2139,7 +2061,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
*/
if (ret > 0) {
WARN_ON(pi_state);
drop_count++;
task_count++;
/*
* If we acquired the lock, then the user space value
......@@ -2259,7 +2180,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* doing so.
*/
requeue_pi_wake_futex(this, &key2, hb2);
drop_count++;
continue;
} else if (ret) {
/*
......@@ -2280,7 +2200,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
}
}
requeue_futex(this, hb1, hb2, &key2);
drop_count++;
}
/*
......@@ -2295,15 +2214,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
/*
* drop_futex_key_refs() must be called outside the spinlocks. During
* the requeue we moved futex_q's from the hash bucket at key1 to the
* one at key2 and updated their key pointer. We no longer need to
* hold the references to key1.
*/
while (--drop_count >= 0)
drop_futex_key_refs(&key1);
out_put_keys:
put_futex_key(&key2);
out_put_key1:
......@@ -2433,7 +2343,6 @@ static int unqueue_me(struct futex_q *q)
ret = 1;
}
drop_futex_key_refs(&q->key);
return ret;
}
......
......@@ -145,6 +145,13 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
for_each_action_of_desc(desc, action) {
irqreturn_t res;
/*
* If this IRQ would be threaded under force_irqthreads, mark it so.
*/
if (irq_settings_can_thread(desc) &&
!(action->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)))
trace_hardirq_threaded();
trace_irq_handler_entry(irq, action);
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);
......
......@@ -153,7 +153,9 @@ static void irq_work_run_list(struct llist_head *list)
*/
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
lockdep_irq_work_enter(work);
work->func(work);
lockdep_irq_work_exit(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
......
This diff is collapsed.
......@@ -106,6 +106,12 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
#define STACK_TRACE_HASH_SIZE 16384
#endif
/*
* Bit definitions for lock_chain.irq_context
*/
#define LOCK_CHAIN_SOFTIRQ_CONTEXT (1 << 0)
#define LOCK_CHAIN_HARDIRQ_CONTEXT (1 << 1)
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
......@@ -124,17 +130,21 @@ extern const char *__get_key_name(const struct lockdep_subclass_key *key,
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
extern unsigned long nr_lock_classes;
extern unsigned long nr_zapped_classes;
extern unsigned long nr_zapped_lock_chains;
extern unsigned long nr_list_entries;
long lockdep_next_lockchain(long i);
unsigned long lock_chain_count(void);
extern int nr_chain_hlocks;
extern unsigned long nr_stack_trace_entries;
extern unsigned int nr_hardirq_chains;
extern unsigned int nr_softirq_chains;
extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
extern unsigned int nr_free_chain_hlocks;
extern unsigned int nr_lost_chain_hlocks;
extern unsigned int nr_large_chain_blocks;
extern unsigned int max_lockdep_depth;
extern unsigned int max_bfs_queue_depth;
#ifdef CONFIG_PROVE_LOCKING
......
......@@ -128,15 +128,22 @@ static int lc_show(struct seq_file *m, void *v)
struct lock_chain *chain = v;
struct lock_class *class;
int i;
static const char * const irq_strs[] = {
[0] = "0",
[LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq",
[LOCK_CHAIN_SOFTIRQ_CONTEXT] = "softirq",
[LOCK_CHAIN_SOFTIRQ_CONTEXT|
LOCK_CHAIN_HARDIRQ_CONTEXT] = "hardirq|softirq",
};
if (v == SEQ_START_TOKEN) {
if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)
if (!nr_free_chain_hlocks)
seq_printf(m, "(buggered) ");
seq_printf(m, "all lock chains:\n");
return 0;
}
seq_printf(m, "irq_context: %d\n", chain->irq_context);
seq_printf(m, "irq_context: %s\n", irq_strs[chain->irq_context]);
for (i = 0; i < chain->depth; i++) {
class = lock_chain_get_class(chain, i);
......@@ -271,8 +278,12 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
lock_chain_count(), MAX_LOCKDEP_CHAINS);
seq_printf(m, " dependency chain hlocks: %11d [max: %lu]\n",
nr_chain_hlocks, MAX_LOCKDEP_CHAIN_HLOCKS);
seq_printf(m, " dependency chain hlocks used: %11lu [max: %lu]\n",
MAX_LOCKDEP_CHAIN_HLOCKS -
(nr_free_chain_hlocks + nr_lost_chain_hlocks),
MAX_LOCKDEP_CHAIN_HLOCKS);
seq_printf(m, " dependency chain hlocks lost: %11u\n",
nr_lost_chain_hlocks);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
......@@ -336,6 +347,18 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
seq_printf(m, " debug_locks: %11u\n",
debug_locks);
/*
* Zappped classes and lockdep data buffers reuse statistics.
*/
seq_puts(m, "\n");
seq_printf(m, " zapped classes: %11lu\n",
nr_zapped_classes);
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " zapped lock chains: %11lu\n",
nr_zapped_lock_chains);
seq_printf(m, " large chain blocks: %11u\n",
nr_large_chain_blocks);
#endif
return 0;
}
......
......@@ -85,7 +85,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
#endif
lock->magic = lock;
}
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/errno.h>
#include "rwsem.h"
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
const char *name, struct lock_class_key *key)
{
sem->read_count = alloc_percpu(int);
if (unlikely(!sem->read_count))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
sem->readers_block = 0;
init_waitqueue_head(&sem->waiters);
atomic_set(&sem->block, 0);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
......@@ -41,73 +43,139 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
}
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
{
__this_cpu_inc(*sem->read_count);
/*
* Due to having preemption disabled the decrement happens on
* the same CPU as the increment, avoiding the
* increment-on-one-CPU-and-decrement-on-another problem.
*
* If the reader misses the writer's assignment of readers_block, then
* the writer is guaranteed to see the reader's increment.
* If the reader misses the writer's assignment of sem->block, then the
* writer is guaranteed to see the reader's increment.
*
* Conversely, any readers that increment their sem->read_count after
* the writer looks are guaranteed to see the readers_block value,
* which in turn means that they are guaranteed to immediately
* decrement their sem->read_count, so that it doesn't matter that the
* writer missed them.
* the writer looks are guaranteed to see the sem->block value, which
* in turn means that they are guaranteed to immediately decrement
* their sem->read_count, so that it doesn't matter that the writer
* missed them.
*/
smp_mb(); /* A matches D */
/*
* If !readers_block the critical section starts here, matched by the
* If !sem->block the critical section starts here, matched by the
* release in percpu_up_write().
*/
if (likely(!smp_load_acquire(&sem->readers_block)))
if (likely(!atomic_read_acquire(&sem->block)))
return true;
__this_cpu_dec(*sem->read_count);
/* Prod writer to re-evaluate readers_active_check() */
rcuwait_wake_up(&sem->writer);
return false;
}
static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
{
if (atomic_read(&sem->block))
return false;
return atomic_xchg(&sem->block, 1) == 0;
}
static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
{
if (reader) {
bool ret;
preempt_disable();
ret = __percpu_down_read_trylock(sem);
preempt_enable();
return ret;
}
return __percpu_down_write_trylock(sem);
}
/*
* The return value of wait_queue_entry::func means:
*
* <0 - error, wakeup is terminated and the error is returned
* 0 - no wakeup, a next waiter is tried
* >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
*
* We use EXCLUSIVE for both readers and writers to preserve FIFO order,
* and play games with the return value to allow waking multiple readers.
*
* Specifically, we wake readers until we've woken a single writer, or until a
* trylock fails.
*/
static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
unsigned int mode, int wake_flags,
void *key)
{
struct task_struct *p = get_task_struct(wq_entry->private);
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
struct percpu_rw_semaphore *sem = key;
/* concurrent against percpu_down_write(), can get stolen */
if (!__percpu_rwsem_trylock(sem, reader))
return 1;
/*
* Per the above comment; we still have preemption disabled and
* will thus decrement on the same CPU as we incremented.
*/
__percpu_up_read(sem);
list_del_init(&wq_entry->entry);
smp_store_release(&wq_entry->private, NULL);
if (try)
return 0;
wake_up_process(p);
put_task_struct(p);
/*
* We either call schedule() in the wait, or we'll fall through
* and reschedule on the preempt_enable() in percpu_down_read().
*/
preempt_enable_no_resched();
return !reader; /* wake (readers until) 1 writer */
}
static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
{
DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
bool wait;
spin_lock_irq(&sem->waiters.lock);
/*
* Avoid lockdep for the down/up_read() we already have them.
* Serialize against the wakeup in percpu_up_write(), if we fail
* the trylock, the wakeup must see us on the list.
*/
__down_read(&sem->rw_sem);
this_cpu_inc(*sem->read_count);
__up_read(&sem->rw_sem);
wait = !__percpu_rwsem_trylock(sem, reader);
if (wait) {
wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
__add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
}
spin_unlock_irq(&sem->waiters.lock);
preempt_disable();
return 1;
while (wait) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!smp_load_acquire(&wq_entry.private))
break;
schedule();
}
__set_current_state(TASK_RUNNING);
}
EXPORT_SYMBOL_GPL(__percpu_down_read);
void __percpu_up_read(struct percpu_rw_semaphore *sem)
bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
{
smp_mb(); /* B matches C */
/*
* In other words, if they see our decrement (presumably to aggregate
* zero, as that is the only time it matters) they will also see our
* critical section.
*/
__this_cpu_dec(*sem->read_count);
if (__percpu_down_read_trylock(sem))
return true;
/* Prod writer to recheck readers_active */
rcuwait_wake_up(&sem->writer);
if (try)
return false;
preempt_enable();
percpu_rwsem_wait(sem, /* .reader = */ true);
preempt_disable();
return true;
}
EXPORT_SYMBOL_GPL(__percpu_up_read);
EXPORT_SYMBOL_GPL(__percpu_down_read);
#define per_cpu_sum(var) \
({ \
......@@ -124,6 +192,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
* zero. If this sum is zero, then it is stable due to the fact that if any
* newly arriving readers increment a given counter, they will immediately
* decrement that same counter.
*
* Assumes sem->block is set.
*/
static bool readers_active_check(struct percpu_rw_semaphore *sem)
{
......@@ -142,32 +212,36 @@ static bool readers_active_check(struct percpu_rw_semaphore *sem)
void percpu_down_write(struct percpu_rw_semaphore *sem)
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
down_write(&sem->rw_sem);
/*
* Notify new readers to block; up until now, and thus throughout the
* longish rcu_sync_enter() above, new readers could still come in.
* Try set sem->block; this provides writer-writer exclusion.
* Having sem->block set makes new readers block.
*/
WRITE_ONCE(sem->readers_block, 1);
if (!__percpu_down_write_trylock(sem))
percpu_rwsem_wait(sem, /* .reader = */ false);
smp_mb(); /* D matches A */
/* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
/*
* If they don't see our writer of readers_block, then we are
* guaranteed to see their sem->read_count increment, and therefore
* will wait for them.
* If they don't see our store of sem->block, then we are guaranteed to
* see their sem->read_count increment, and therefore will wait for
* them.
*/
/* Wait for all now active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem));
/* Wait for all active readers to complete. */
rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL_GPL(percpu_down_write);
void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
/*
* Signal the writer is done, no fast path yet.
*
......@@ -178,12 +252,12 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
* Therefore we force it through the slow path which guarantees an
* acquire and thereby guarantees the critical section's consistency.
*/
smp_store_release(&sem->readers_block, 0);
atomic_set_release(&sem->block, 0);
/*
* Release the write lock, this will allow readers back in the game.
* Prod any pending reader/writer to make progress.
*/
up_write(&sem->rw_sem);
__wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
/*
* Once this completes (at least one RCU-sched grace period hence) the
......
......@@ -28,7 +28,6 @@
#include <linux/rwsem.h>
#include <linux/atomic.h>
#include "rwsem.h"
#include "lock_events.h"
/*
......@@ -329,7 +328,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
* Make sure we are not reinitializing a held semaphore:
*/
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
#endif
#ifdef CONFIG_DEBUG_RWSEMS
sem->magic = sem;
......@@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem,
unsigned long flags;
bool ret = true;
BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
if (need_resched()) {
lockevent_inc(rwsem_opt_fail);
return false;
......@@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
/*
* lock for reading
*/
inline void __down_read(struct rw_semaphore *sem)
static inline void __down_read(struct rw_semaphore *sem)
{
if (!rwsem_read_trylock(sem)) {
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
......@@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
/*
* unlock after reading
*/
inline void __up_read(struct rw_semaphore *sem)
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __INTERNAL_RWSEM_H
#define __INTERNAL_RWSEM_H
#include <linux/rwsem.h>
extern void __down_read(struct rw_semaphore *sem);
extern void __up_read(struct rw_semaphore *sem);
#endif /* __INTERNAL_RWSEM_H */
......@@ -14,14 +14,14 @@
#include <linux/export.h>
void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
struct lock_class_key *key)
struct lock_class_key *key, short inner)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, inner);
#endif
lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
lock->magic = SPINLOCK_MAGIC;
......@@ -39,7 +39,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lockdep_init_map(&lock->dep_map, name, key, 0);
lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_CONFIG);
#endif
lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
lock->magic = RWLOCK_MAGIC;
......
......@@ -1124,6 +1124,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
(rnp->ffmask & rdp->grpmask)) {
init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
atomic_set(&rdp->rcu_iw.flags, IRQ_WORK_HARD_IRQ);
rdp->rcu_iw_pending = true;
rdp->rcu_iw_gp_seq = rnp->gp_seq;
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
......
......@@ -239,18 +239,30 @@ core_initcall(rcu_set_runtime_mode);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
struct lockdep_map rcu_lock_map = {
.name = "rcu_read_lock",
.key = &rcu_lock_key,
.wait_type_outer = LD_WAIT_FREE,
.wait_type_inner = LD_WAIT_CONFIG, /* XXX PREEMPT_RCU ? */
};
EXPORT_SYMBOL_GPL(rcu_lock_map);
static struct lock_class_key rcu_bh_lock_key;
struct lockdep_map rcu_bh_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key);
struct lockdep_map rcu_bh_lock_map = {
.name = "rcu_read_lock_bh",
.key = &rcu_bh_lock_key,
.wait_type_outer = LD_WAIT_FREE,
.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_LOCK also makes BH preemptible */
};
EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
static struct lock_class_key rcu_sched_lock_key;
struct lockdep_map rcu_sched_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
struct lockdep_map rcu_sched_lock_map = {
.name = "rcu_read_lock_sched",
.key = &rcu_sched_lock_key,
.wait_type_outer = LD_WAIT_FREE,
.wait_type_inner = LD_WAIT_SPIN,
};
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
static struct lock_class_key rcu_callback_key;
......
......@@ -29,12 +29,12 @@ void complete(struct completion *x)
{
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (x->done != UINT_MAX)
x->done++;
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
spin_unlock_irqrestore(&x->wait.lock, flags);
swake_up_locked(&x->wait);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
......@@ -58,10 +58,12 @@ void complete_all(struct completion *x)
{
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
lockdep_assert_RT_in_threaded_ctx();
raw_spin_lock_irqsave(&x->wait.lock, flags);
x->done = UINT_MAX;
__wake_up_locked(&x->wait, TASK_NORMAL, 0);
spin_unlock_irqrestore(&x->wait.lock, flags);
swake_up_all_locked(&x->wait);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
......@@ -70,20 +72,20 @@ do_wait_for_common(struct completion *x,
long (*action)(long), long timeout, int state)
{
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
DECLARE_SWAITQUEUE(wait);
__add_wait_queue_entry_tail_exclusive(&x->wait, &wait);
do {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
}
__prepare_to_swait(&x->wait, &wait);
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
raw_spin_unlock_irq(&x->wait.lock);
timeout = action(timeout);
spin_lock_irq(&x->wait.lock);
raw_spin_lock_irq(&x->wait.lock);
} while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
__finish_swait(&x->wait, &wait);
if (!x->done)
return timeout;
}
......@@ -100,9 +102,9 @@ __wait_for_common(struct completion *x,
complete_acquire(x);
spin_lock_irq(&x->wait.lock);
raw_spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
spin_unlock_irq(&x->wait.lock);
raw_spin_unlock_irq(&x->wait.lock);
complete_release(x);
......@@ -291,12 +293,12 @@ bool try_wait_for_completion(struct completion *x)
if (!READ_ONCE(x->done))
return false;
spin_lock_irqsave(&x->wait.lock, flags);
raw_spin_lock_irqsave(&x->wait.lock, flags);
if (!x->done)
ret = false;
else if (x->done != UINT_MAX)
x->done--;
spin_unlock_irqrestore(&x->wait.lock, flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return ret;
}
EXPORT_SYMBOL(try_wait_for_completion);
......@@ -322,8 +324,8 @@ bool completion_done(struct completion *x)
* otherwise we can end up freeing the completion before complete()
* is done referencing it.
*/
spin_lock_irqsave(&x->wait.lock, flags);
spin_unlock_irqrestore(&x->wait.lock, flags);
raw_spin_lock_irqsave(&x->wait.lock, flags);
raw_spin_unlock_irqrestore(&x->wait.lock, flags);
return true;
}
EXPORT_SYMBOL(completion_done);
......@@ -2492,3 +2492,6 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
return true;
}
#endif
void swake_up_all_locked(struct swait_queue_head *q);
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
......@@ -32,6 +32,19 @@ void swake_up_locked(struct swait_queue_head *q)
}
EXPORT_SYMBOL(swake_up_locked);
/*
* Wake up all waiters. This is an interface which is solely exposed for
* completions and not for general usage.
*
* It is intentionally different from swake_up_all() to allow usage from
* hard interrupt context and interrupt disabled regions.
*/
void swake_up_all_locked(struct swait_queue_head *q)
{
while (!list_empty(&q->task_list))
swake_up_locked(q);
}
void swake_up_one(struct swait_queue_head *q)
{
unsigned long flags;
......@@ -69,7 +82,7 @@ void swake_up_all(struct swait_queue_head *q)
}
EXPORT_SYMBOL(swake_up_all);
static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
{
wait->task = current;
if (list_empty(&wait->task_list))
......
......@@ -1404,7 +1404,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
base += hrtimer_clockid_to_base(clock_id);
timer->is_soft = softtimer;
timer->is_hard = !softtimer;
timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
}
......@@ -1514,7 +1514,11 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
*/
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
trace_hrtimer_expire_entry(timer, now);
lockdep_hrtimer_enter(timer);
restart = fn(timer);
lockdep_hrtimer_exit(timer);
trace_hrtimer_expire_exit(timer);
raw_spin_lock_irq(&cpu_base->lock);
......
......@@ -58,7 +58,8 @@ static struct clocksource clocksource_jiffies = {
.max_cycles = 10,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
__cacheline_aligned_in_smp seqcount_t jiffies_seq;
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
......@@ -67,9 +68,9 @@ u64 get_jiffies_64(void)
u64 ret;
do {
seq = read_seqbegin(&jiffies_lock);
seq = read_seqcount_begin(&jiffies_seq);
ret = jiffies_64;
} while (read_seqretry(&jiffies_lock, seq));
} while (read_seqcount_retry(&jiffies_seq, seq));
return ret;
}
EXPORT_SYMBOL(get_jiffies_64);
......
......@@ -1126,8 +1126,11 @@ void run_posix_cpu_timers(void)
if (!fastpath_timer_check(tsk))
return;
if (!lock_task_sighand(tsk, &flags))
lockdep_posixtimer_enter();
if (!lock_task_sighand(tsk, &flags)) {
lockdep_posixtimer_exit();
return;
}
/*
* Here we take off tsk->signal->cpu_timers[N] and
* tsk->cpu_timers[N] all the timers that are firing, and
......@@ -1169,6 +1172,7 @@ void run_posix_cpu_timers(void)
cpu_timer_fire(timer);
spin_unlock(&timer->it_lock);
}
lockdep_posixtimer_exit();
}
/*
......
......@@ -84,13 +84,15 @@ int tick_is_oneshot_available(void)
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) {
write_seqlock(&jiffies_lock);
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1);
write_sequnlock(&jiffies_lock);
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
update_wall_time();
}
......@@ -162,9 +164,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
ktime_t next;
do {
seq = read_seqbegin(&jiffies_lock);
seq = read_seqcount_begin(&jiffies_seq);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));
} while (read_seqcount_retry(&jiffies_seq, seq));
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
......
......@@ -65,7 +65,8 @@ static void tick_do_update_jiffies64(ktime_t now)
return;
/* Reevaluate with jiffies_lock held */
write_seqlock(&jiffies_lock);
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
delta = ktime_sub(now, last_jiffies_update);
if (delta >= tick_period) {
......@@ -91,10 +92,12 @@ static void tick_do_update_jiffies64(ktime_t now)
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
} else {
write_sequnlock(&jiffies_lock);
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
return;
}
write_sequnlock(&jiffies_lock);
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
update_wall_time();
}
......@@ -105,12 +108,14 @@ static ktime_t tick_init_jiffy_update(void)
{
ktime_t period;
write_seqlock(&jiffies_lock);
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
/* Did we start the jiffies update yet ? */
if (last_jiffies_update == 0)
last_jiffies_update = tick_next_period;
period = last_jiffies_update;
write_sequnlock(&jiffies_lock);
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
return period;
}
......@@ -240,6 +245,7 @@ static void nohz_full_kick_func(struct irq_work *work)
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_func,
.flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
};
/*
......@@ -676,10 +682,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&jiffies_lock);
seq = read_seqcount_begin(&jiffies_seq);
basemono = last_jiffies_update;
basejiff = jiffies;
} while (read_seqretry(&jiffies_lock, seq));
} while (read_seqcount_retry(&jiffies_seq, seq));
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
......
......@@ -2397,8 +2397,10 @@ EXPORT_SYMBOL(hardpps);
*/
void xtime_update(unsigned long ticks)
{
write_seqlock(&jiffies_lock);
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
do_timer(ticks);
write_sequnlock(&jiffies_lock);
write_seqcount_end(&jiffies_seq);
raw_spin_unlock(&jiffies_lock);
update_wall_time();
}
......@@ -25,7 +25,8 @@ static inline void sched_clock_resume(void) { }
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
extern seqlock_t jiffies_lock;
extern raw_spinlock_t jiffies_lock;
extern seqcount_t jiffies_seq;
#define CS_NAME_LEN 32
......
......@@ -1086,6 +1086,23 @@ config PROVE_LOCKING
For more details, see Documentation/locking/lockdep-design.rst.
config PROVE_RAW_LOCK_NESTING
bool "Enable raw_spinlock - spinlock nesting checks"
depends on PROVE_LOCKING
default n
help
Enable the raw_spinlock vs. spinlock nesting checks which ensure
that the lock nesting rules for PREEMPT_RT enabled kernels are
not violated.
NOTE: There are known nesting problems. So if you enable this
option expect lockdep splats until these problems have been fully
addressed which is work in progress. This config switch allows to
identify and analyze these problems. It will be removed and the
check permanentely enabled once the main issues have been fixed.
If unsure, select N.
config LOCK_STAT
bool "Lock usage statistics"
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
......
......@@ -488,6 +488,7 @@ static const char *uaccess_safe_builtin[] = {
"__sanitizer_cov_trace_cmp2",
"__sanitizer_cov_trace_cmp4",
"__sanitizer_cov_trace_cmp8",
"__sanitizer_cov_trace_switch",
/* UBSAN */
"ubsan_type_mismatch_common",
"__ubsan_handle_type_mismatch",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment