Commit ed016af5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locking-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking updates from Ingo Molnar:
 "These are the locking updates for v5.10:

   - Add deadlock detection for recursive read-locks.

     The rationale is outlined in commit 224ec489 ("lockdep/
     Documention: Recursive read lock detection reasoning")

     The main deadlock pattern we want to detect is:

           TASK A:                 TASK B:

           read_lock(X);
                                   write_lock(X);
           read_lock_2(X);

   - Add "latch sequence counters" (seqcount_latch_t):

     A sequence counter variant where the counter even/odd value is used
     to switch between two copies of protected data. This allows the
     read path, typically NMIs, to safely interrupt the write side
     critical section.

     We utilize this new variant for sched-clock, and to make x86 TSC
     handling safer.

   - Other seqlock cleanups, fixes and enhancements

   - KCSAN updates

   - LKMM updates

   - Misc updates, cleanups and fixes"

* tag 'locking-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits)
  lockdep: Revert "lockdep: Use raw_cpu_*() for per-cpu variables"
  lockdep: Fix lockdep recursion
  lockdep: Fix usage_traceoverflow
  locking/atomics: Check atomic-arch-fallback.h too
  locking/seqlock: Tweak DEFINE_SEQLOCK() kernel doc
  lockdep: Optimize the memory usage of circular queue
  seqlock: Unbreak lockdep
  seqlock: PREEMPT_RT: Do not starve seqlock_t writers
  seqlock: seqcount_LOCKNAME_t: Introduce PREEMPT_RT support
  seqlock: seqcount_t: Implement all read APIs as statement expressions
  seqlock: Use unique prefix for seqcount_t property accessors
  seqlock: seqcount_LOCKNAME_t: Standardize naming convention
  seqlock: seqcount latch APIs: Only allow seqcount_latch_t
  rbtree_latch: Use seqcount_latch_t
  x86/tsc: Use seqcount_latch_t
  timekeeping: Use seqcount_latch_t
  time/sched_clock: Use seqcount_latch_t
  seqlock: Introduce seqcount_latch_t
  mm/swap: Do not abuse the seqcount_t latching API
  time/sched_clock: Use raw_read_seqcount_latch() during suspend
  ...
parents edaa5ddf 2116d708
This diff is collapsed.
......@@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired.
Read path: same as in :ref:`seqcount_t`.
.. _seqcount_latch_t:
Latch sequence counters (``seqcount_latch_t``)
----------------------------------------------
Latch sequence counters are a multiversion concurrency control mechanism
where the embedded seqcount_t counter even/odd value is used to switch
between two copies of protected data. This allows the sequence counter
read path to safely interrupt its own write side critical section.
Use seqcount_latch_t when the write side sections cannot be protected
from interruption by readers. This is typically the case when the read
side can be invoked from NMI handlers.
Check `raw_write_seqcount_latch()` for more information.
.. _seqlock_t:
Sequential locks (``seqlock_t``)
......
......@@ -54,7 +54,7 @@ struct clocksource *art_related_clocksource;
struct cyc2ns {
struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */
seqcount_t seq; /* 32 + 4 = 36 */
seqcount_latch_t seq; /* 32 + 4 = 36 */
}; /* fits one cacheline */
......@@ -73,14 +73,14 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
preempt_disable_notrace();
do {
seq = this_cpu_read(cyc2ns.seq.sequence);
seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence)));
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}
__always_inline void cyc2ns_read_end(void)
......@@ -186,7 +186,7 @@ static void __init cyc2ns_init_boot_cpu(void)
{
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
seqcount_init(&c2n->seq);
seqcount_latch_init(&c2n->seq);
__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}
......@@ -203,7 +203,7 @@ static void __init cyc2ns_init_secondary_cpus(void)
for_each_possible_cpu(cpu) {
if (cpu != this_cpu) {
seqcount_init(&c2n->seq);
seqcount_latch_init(&c2n->seq);
c2n = per_cpu_ptr(&cyc2ns, cpu);
c2n->data[0] = data[0];
c2n->data[1] = data[1];
......
This diff is collapsed.
......@@ -67,7 +67,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
*/
static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
return arch_test_and_set_bit(nr, addr);
}
......@@ -80,7 +80,7 @@ static inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
return arch_test_and_clear_bit(nr, addr);
}
......@@ -93,7 +93,7 @@ static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
return arch_test_and_change_bit(nr, addr);
}
......
......@@ -52,7 +52,7 @@ static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
*/
static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
instrument_atomic_write(addr + BIT_WORD(nr), sizeof(long));
instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long));
return arch_test_and_set_bit_lock(nr, addr);
}
......
......@@ -58,6 +58,30 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
arch___change_bit(nr, addr);
}
static inline void __instrument_read_write_bitop(long nr, volatile unsigned long *addr)
{
if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC)) {
/*
* We treat non-atomic read-write bitops a little more special.
* Given the operations here only modify a single bit, assuming
* non-atomicity of the writer is sufficient may be reasonable
* for certain usage (and follows the permissible nature of the
* assume-plain-writes-atomic rule):
* 1. report read-modify-write races -> check read;
* 2. do not report races with marked readers, but do report
* races with unmarked readers -> check "atomic" write.
*/
kcsan_check_read(addr + BIT_WORD(nr), sizeof(long));
/*
* Use generic write instrumentation, in case other sanitizers
* or tools are enabled alongside KCSAN.
*/
instrument_write(addr + BIT_WORD(nr), sizeof(long));
} else {
instrument_read_write(addr + BIT_WORD(nr), sizeof(long));
}
}
/**
* __test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
......@@ -68,7 +92,7 @@ static inline void __change_bit(long nr, volatile unsigned long *addr)
*/
static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
__instrument_read_write_bitop(nr, addr);
return arch___test_and_set_bit(nr, addr);
}
......@@ -82,7 +106,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
__instrument_read_write_bitop(nr, addr);
return arch___test_and_clear_bit(nr, addr);
}
......@@ -96,7 +120,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
{
instrument_write(addr + BIT_WORD(nr), sizeof(long));
__instrument_read_write_bitop(nr, addr);
return arch___test_and_change_bit(nr, addr);
}
......
......@@ -42,6 +42,21 @@ static __always_inline void instrument_write(const volatile void *v, size_t size
kcsan_check_write(v, size);
}
/**
* instrument_read_write - instrument regular read-write access
*
* Instrument a regular write access. The instrumentation should be inserted
* before the actual write happens.
*
* @ptr address of access
* @size size of access
*/
static __always_inline void instrument_read_write(const volatile void *v, size_t size)
{
kasan_check_write(v, size);
kcsan_check_read_write(v, size);
}
/**
* instrument_atomic_read - instrument atomic read access
*
......@@ -72,6 +87,21 @@ static __always_inline void instrument_atomic_write(const volatile void *v, size
kcsan_check_atomic_write(v, size);
}
/**
* instrument_atomic_read_write - instrument atomic read-write access
*
* Instrument an atomic read-write access. The instrumentation should be
* inserted before the actual write happens.
*
* @ptr address of access
* @size size of access
*/
static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size)
{
kasan_check_write(v, size);
kcsan_check_atomic_read_write(v, size);
}
/**
* instrument_copy_to_user - instrument reads of copy_to_user
*
......
......@@ -7,19 +7,13 @@
#include <linux/compiler_attributes.h>
#include <linux/types.h>
/*
* ACCESS TYPE MODIFIERS
*
* <none>: normal read access;
* WRITE : write access;
* ATOMIC: access is atomic;
* ASSERT: access is not a regular access, but an assertion;
* SCOPED: access is a scoped access;
*/
#define KCSAN_ACCESS_WRITE 0x1
#define KCSAN_ACCESS_ATOMIC 0x2
#define KCSAN_ACCESS_ASSERT 0x4
#define KCSAN_ACCESS_SCOPED 0x8
/* Access types -- if KCSAN_ACCESS_WRITE is not set, the access is a read. */
#define KCSAN_ACCESS_WRITE (1 << 0) /* Access is a write. */
#define KCSAN_ACCESS_COMPOUND (1 << 1) /* Compounded read-write instrumentation. */
#define KCSAN_ACCESS_ATOMIC (1 << 2) /* Access is atomic. */
/* The following are special, and never due to compiler instrumentation. */
#define KCSAN_ACCESS_ASSERT (1 << 3) /* Access is an assertion. */
#define KCSAN_ACCESS_SCOPED (1 << 4) /* Access is a scoped access. */
/*
* __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used
......@@ -204,6 +198,15 @@ static inline void __kcsan_disable_current(void) { }
#define __kcsan_check_write(ptr, size) \
__kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
/**
* __kcsan_check_read_write - check regular read-write access for races
*
* @ptr: address of access
* @size: size of access
*/
#define __kcsan_check_read_write(ptr, size) \
__kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE)
/**
* kcsan_check_read - check regular read access for races
*
......@@ -221,6 +224,15 @@ static inline void __kcsan_disable_current(void) { }
#define kcsan_check_write(ptr, size) \
kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE)
/**
* kcsan_check_read_write - check regular read-write access for races
*
* @ptr: address of access
* @size: size of access
*/
#define kcsan_check_read_write(ptr, size) \
kcsan_check_access(ptr, size, KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE)
/*
* Check for atomic accesses: if atomic accesses are not ignored, this simply
* aliases to kcsan_check_access(), otherwise becomes a no-op.
......@@ -228,11 +240,14 @@ static inline void __kcsan_disable_current(void) { }
#ifdef CONFIG_KCSAN_IGNORE_ATOMICS
#define kcsan_check_atomic_read(...) do { } while (0)
#define kcsan_check_atomic_write(...) do { } while (0)
#define kcsan_check_atomic_read_write(...) do { } while (0)
#else
#define kcsan_check_atomic_read(ptr, size) \
kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC)
#define kcsan_check_atomic_write(ptr, size) \
kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE)
#define kcsan_check_atomic_read_write(ptr, size) \
kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_COMPOUND)
#endif
/**
......
......@@ -54,7 +54,11 @@ struct lock_list {
struct lock_class *class;
struct lock_class *links_to;
const struct lock_trace *trace;
int distance;
u16 distance;
/* bitmap of different dependencies from head to this */
u8 dep;
/* used by BFS to record whether "prev -> this" only has -(*R)-> */
u8 only_xr;
/*
* The parent field is used to implement breadth-first search, and the
......@@ -469,6 +473,20 @@ static inline void print_irqtrace_events(struct task_struct *curr)
}
#endif
/* Variable used to make lockdep treat read_lock() as recursive in selftests */
#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
extern unsigned int force_read_lock_recursive;
#else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */
#define force_read_lock_recursive 0
#endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */
#ifdef CONFIG_LOCKDEP
extern bool read_lock_is_recursive(void);
#else /* CONFIG_LOCKDEP */
/* If !LOCKDEP, the value is meaningless */
#define read_lock_is_recursive() 0
#endif
/*
* For trivial one-depth nesting of a lock-class, the following
* global define can be used. (Subsystems with multiple levels
......@@ -490,7 +508,14 @@ static inline void print_irqtrace_events(struct task_struct *curr)
#define spin_release(l, i) lock_release(l, i)
#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
#define rwlock_acquire_read(l, s, t, i) \
do { \
if (read_lock_is_recursive()) \
lock_acquire_shared_recursive(l, s, t, NULL, i); \
else \
lock_acquire_shared(l, s, t, NULL, i); \
} while (0)
#define rwlock_release(l, i) lock_release(l, i)
#define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
......@@ -534,44 +559,39 @@ do { \
DECLARE_PER_CPU(int, hardirqs_enabled);
DECLARE_PER_CPU(int, hardirq_context);
DECLARE_PER_CPU(unsigned int, lockdep_recursion);
/*
* The below lockdep_assert_*() macros use raw_cpu_read() to access the above
* per-cpu variables. This is required because this_cpu_read() will potentially
* call into preempt/irq-disable and that obviously isn't right. This is also
* correct because when IRQs are enabled, it doesn't matter if we accidentally
* read the value from our previous CPU.
*/
#define __lockdep_enabled (debug_locks && !this_cpu_read(lockdep_recursion))
#define lockdep_assert_irqs_enabled() \
do { \
WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \
WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirqs_enabled)); \
} while (0)
#define lockdep_assert_irqs_disabled() \
do { \
WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \
WARN_ON_ONCE(__lockdep_enabled && this_cpu_read(hardirqs_enabled)); \
} while (0)
#define lockdep_assert_in_irq() \
do { \
WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \
WARN_ON_ONCE(__lockdep_enabled && !this_cpu_read(hardirq_context)); \
} while (0)
#define lockdep_assert_preemption_enabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
debug_locks && \
__lockdep_enabled && \
(preempt_count() != 0 || \
!raw_cpu_read(hardirqs_enabled))); \
!this_cpu_read(hardirqs_enabled))); \
} while (0)
#define lockdep_assert_preemption_disabled() \
do { \
WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \
debug_locks && \
__lockdep_enabled && \
(preempt_count() == 0 && \
raw_cpu_read(hardirqs_enabled))); \
this_cpu_read(hardirqs_enabled))); \
} while (0)
#else
......
......@@ -35,8 +35,12 @@ enum lockdep_wait_type {
/*
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-(
*
* XXX_LOCK_USAGE_STATES is the number of lines in lockdep_states.h, for each
* of those we generates 4 states, Additionally we report on USED and USED_READ.
*/
#define XXX_LOCK_USAGE_STATES (1+2*4)
#define XXX_LOCK_USAGE_STATES 2
#define LOCK_TRACE_STATES (XXX_LOCK_USAGE_STATES*4 + 2)
/*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes
......@@ -106,7 +110,7 @@ struct lock_class {
* IRQ/softirq usage tracking bits:
*/
unsigned long usage_mask;
const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];
const struct lock_trace *usage_traces[LOCK_TRACE_STATES];
/*
* Generation counter, when doing certain classes of graph walking,
......
......@@ -42,7 +42,7 @@ struct latch_tree_node {
};
struct latch_tree_root {
seqcount_t seq;
seqcount_latch_t seq;
struct rb_root tree[2];
};
......@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
do {
seq = raw_read_seqcount_latch(&root->seq);
node = __lt_find(key, root, seq & 1, ops->comp);
} while (read_seqcount_retry(&root->seq, seq));
} while (read_seqcount_latch_retry(&root->seq, seq));
return node;
}
......
......@@ -165,7 +165,7 @@ static inline unsigned int refcount_read(const refcount_t *r)
*
* Return: false if the passed refcount is 0, true otherwise
*/
static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
{
int old = refcount_read(r);
......@@ -174,12 +174,20 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
break;
} while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));
if (oldp)
*oldp = old;
if (unlikely(old < 0 || old + i < 0))
refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
return old;
}
static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
{
return __refcount_add_not_zero(i, r, NULL);
}
/**
* refcount_add - add a value to a refcount
* @i: the value to add to the refcount
......@@ -196,16 +204,24 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
* cases, refcount_inc(), or one of its variants, should instead be used to
* increment a reference count.
*/
static inline void refcount_add(int i, refcount_t *r)
static inline void __refcount_add(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_add_relaxed(i, &r->refs);
if (oldp)
*oldp = old;
if (unlikely(!old))
refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
else if (unlikely(old < 0 || old + i < 0))
refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
}
static inline void refcount_add(int i, refcount_t *r)
{
__refcount_add(i, r, NULL);
}
/**
* refcount_inc_not_zero - increment a refcount unless it is 0
* @r: the refcount to increment
......@@ -219,9 +235,14 @@ static inline void refcount_add(int i, refcount_t *r)
*
* Return: true if the increment was successful, false otherwise
*/
static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
{
return __refcount_add_not_zero(1, r, oldp);
}
static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
{
return refcount_add_not_zero(1, r);
return __refcount_inc_not_zero(r, NULL);
}
/**
......@@ -236,9 +257,14 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
* Will WARN if the refcount is 0, as this represents a possible use-after-free
* condition.
*/
static inline void __refcount_inc(refcount_t *r, int *oldp)
{
__refcount_add(1, r, oldp);
}
static inline void refcount_inc(refcount_t *r)
{
refcount_add(1, r);
__refcount_inc(r, NULL);
}
/**
......@@ -261,10 +287,13 @@ static inline void refcount_inc(refcount_t *r)
*
* Return: true if the resulting refcount is 0, false otherwise
*/
static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
{
int old = atomic_fetch_sub_release(i, &r->refs);
if (oldp)
*oldp = old;
if (old == i) {
smp_acquire__after_ctrl_dep();
return true;
......@@ -276,6 +305,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
return false;
}
static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
{
return __refcount_sub_and_test(i, r, NULL);
}
/**
* refcount_dec_and_test - decrement a refcount and test if it is 0
* @r: the refcount
......@@ -289,9 +323,14 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
*
* Return: true if the resulting refcount is 0, false otherwise
*/
static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
{
return __refcount_sub_and_test(1, r, oldp);
}
static inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
return refcount_sub_and_test(1, r);
return __refcount_dec_and_test(r, NULL);
}
/**
......@@ -304,12 +343,22 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r)
* Provides release memory ordering, such that prior loads and stores are done
* before.
*/
static inline void refcount_dec(refcount_t *r)
static inline void __refcount_dec(refcount_t *r, int *oldp)
{
if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1))
int old = atomic_fetch_sub_release(1, &r->refs);
if (oldp)
*oldp = old;
if (unlikely(old <= 1))
refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
}
static inline void refcount_dec(refcount_t *r)
{
__refcount_dec(r, NULL);
}
extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
......
This diff is collapsed.
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) "kcsan: " fmt
#include <linux/atomic.h>
#include <linux/bsearch.h>
#include <linux/bug.h>
......@@ -15,10 +17,19 @@
#include "kcsan.h"
/*
* Statistics counters.
*/
static atomic_long_t counters[KCSAN_COUNTER_COUNT];
atomic_long_t kcsan_counters[KCSAN_COUNTER_COUNT];
static const char *const counter_names[] = {
[KCSAN_COUNTER_USED_WATCHPOINTS] = "used_watchpoints",
[KCSAN_COUNTER_SETUP_WATCHPOINTS] = "setup_watchpoints",
[KCSAN_COUNTER_DATA_RACES] = "data_races",
[KCSAN_COUNTER_ASSERT_FAILURES] = "assert_failures",
[KCSAN_COUNTER_NO_CAPACITY] = "no_capacity",
[KCSAN_COUNTER_REPORT_RACES] = "report_races",
[KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN] = "races_unknown_origin",
[KCSAN_COUNTER_UNENCODABLE_ACCESSES] = "unencodable_accesses",
[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES] = "encoding_false_positives",
};
static_assert(ARRAY_SIZE(counter_names) == KCSAN_COUNTER_COUNT);
/*
* Addresses for filtering functions from reporting. This list can be used as a
......@@ -39,34 +50,6 @@ static struct {
};
static DEFINE_SPINLOCK(report_filterlist_lock);
static const char *counter_to_name(enum kcsan_counter_id id)
{
switch (id) {
case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints";
case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints";
case KCSAN_COUNTER_DATA_RACES: return "data_races";
case KCSAN_COUNTER_ASSERT_FAILURES: return "assert_failures";
case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity";
case KCSAN_COUNTER_REPORT_RACES: return "report_races";
case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin";
case KCSAN_COUNTER_UNENCODABLE_ACCESSES: return "unencodable_accesses";
case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: return "encoding_false_positives";
case KCSAN_COUNTER_COUNT:
BUG();
}
return NULL;
}
void kcsan_counter_inc(enum kcsan_counter_id id)
{
atomic_long_inc(&counters[id]);
}
void kcsan_counter_dec(enum kcsan_counter_id id)
{
atomic_long_dec(&counters[id]);
}
/*
* The microbenchmark allows benchmarking KCSAN core runtime only. To run
* multiple threads, pipe 'microbench=<iters>' from multiple tasks into the
......@@ -86,7 +69,7 @@ static noinline void microbenchmark(unsigned long iters)
*/
WRITE_ONCE(kcsan_enabled, false);
pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
pr_info("%s begin | iters: %lu\n", __func__, iters);
cycles = get_cycles();
while (iters--) {
......@@ -97,73 +80,13 @@ static noinline void microbenchmark(unsigned long iters)
}
cycles = get_cycles() - cycles;
pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles);
pr_info("%s end | cycles: %llu\n", __func__, cycles);
WRITE_ONCE(kcsan_enabled, was_enabled);
/* restore context */
current->kcsan_ctx = ctx_save;
}
/*
* Simple test to create conflicting accesses. Write 'test=<iters>' to KCSAN's
* debugfs file from multiple tasks to generate real conflicts and show reports.
*/
static long test_dummy;
static long test_flags;
static long test_scoped;
static noinline void test_thread(unsigned long iters)
{
const long CHANGE_BITS = 0xff00ff00ff00ff00L;
const struct kcsan_ctx ctx_save = current->kcsan_ctx;
cycles_t cycles;
/* We may have been called from an atomic region; reset context. */
memset(&current->kcsan_ctx, 0, sizeof(current->kcsan_ctx));
pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters);
pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n",
&test_dummy, &test_flags, &test_scoped);
cycles = get_cycles();
while (iters--) {
/* These all should generate reports. */
__kcsan_check_read(&test_dummy, sizeof(test_dummy));
ASSERT_EXCLUSIVE_WRITER(test_dummy);
ASSERT_EXCLUSIVE_ACCESS(test_dummy);
ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */
__kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */
__kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */
/* not actually instrumented */
WRITE_ONCE(test_dummy, iters); /* to observe value-change */
__kcsan_check_write(&test_dummy, sizeof(test_dummy));
test_flags ^= CHANGE_BITS; /* generate value-change */
__kcsan_check_write(&test_flags, sizeof(test_flags));
BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
{
/* Should generate reports anywhere in this block. */
ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped);
ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped);
BUG_ON(!current->kcsan_ctx.scoped_accesses.prev);
/* Unrelated accesses. */
__kcsan_check_access(&cycles, sizeof(cycles), 0);
__kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC);
}
BUG_ON(current->kcsan_ctx.scoped_accesses.prev);
}
cycles = get_cycles() - cycles;
pr_info("KCSAN: %s end | cycles: %llu\n", __func__, cycles);
/* restore context */
current->kcsan_ctx = ctx_save;
}
static int cmp_filterlist_addrs(const void *rhs, const void *lhs)
{
const unsigned long a = *(const unsigned long *)rhs;
......@@ -220,7 +143,7 @@ static ssize_t insert_report_filterlist(const char *func)
ssize_t ret = 0;
if (!addr) {
pr_err("KCSAN: could not find function: '%s'\n", func);
pr_err("could not find function: '%s'\n", func);
return -ENOENT;
}
......@@ -270,9 +193,10 @@ static int show_info(struct seq_file *file, void *v)
/* show stats */
seq_printf(file, "enabled: %i\n", READ_ONCE(kcsan_enabled));
for (i = 0; i < KCSAN_COUNTER_COUNT; ++i)
seq_printf(file, "%s: %ld\n", counter_to_name(i),
atomic_long_read(&counters[i]));
for (i = 0; i < KCSAN_COUNTER_COUNT; ++i) {
seq_printf(file, "%s: %ld\n", counter_names[i],
atomic_long_read(&kcsan_counters[i]));
}
/* show filter functions, and filter type */
spin_lock_irqsave(&report_filterlist_lock, flags);
......@@ -307,18 +231,12 @@ debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *o
WRITE_ONCE(kcsan_enabled, true);
} else if (!strcmp(arg, "off")) {
WRITE_ONCE(kcsan_enabled, false);
} else if (!strncmp(arg, "microbench=", sizeof("microbench=") - 1)) {
} else if (str_has_prefix(arg, "microbench=")) {
unsigned long iters;
if (kstrtoul(&arg[sizeof("microbench=") - 1], 0, &iters))
if (kstrtoul(&arg[strlen("microbench=")], 0, &iters))
return -EINVAL;
microbenchmark(iters);
} else if (!strncmp(arg, "test=", sizeof("test=") - 1)) {
unsigned long iters;
if (kstrtoul(&arg[sizeof("test=") - 1], 0, &iters))
return -EINVAL;
test_thread(iters);
} else if (!strcmp(arg, "whitelist")) {
set_report_filterlist_whitelist(true);
} else if (!strcmp(arg, "blacklist")) {
......
......@@ -27,6 +27,12 @@
#include <linux/types.h>
#include <trace/events/printk.h>
#ifdef CONFIG_CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
#define __KCSAN_ACCESS_RW(alt) (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE)
#else
#define __KCSAN_ACCESS_RW(alt) (alt)
#endif
/* Points to current test-case memory access "kernels". */
static void (*access_kernels[2])(void);
......@@ -186,20 +192,21 @@ static bool report_matches(const struct expect_report *r)
/* Access 1 & 2 */
for (i = 0; i < 2; ++i) {
const int ty = r->access[i].type;
const char *const access_type =
(r->access[i].type & KCSAN_ACCESS_ASSERT) ?
((r->access[i].type & KCSAN_ACCESS_WRITE) ?
(ty & KCSAN_ACCESS_ASSERT) ?
((ty & KCSAN_ACCESS_WRITE) ?
"assert no accesses" :
"assert no writes") :
((r->access[i].type & KCSAN_ACCESS_WRITE) ?
"write" :
((ty & KCSAN_ACCESS_WRITE) ?
((ty & KCSAN_ACCESS_COMPOUND) ?
"read-write" :
"write") :
"read");
const char *const access_type_aux =
(r->access[i].type & KCSAN_ACCESS_ATOMIC) ?
(ty & KCSAN_ACCESS_ATOMIC) ?
" (marked)" :
((r->access[i].type & KCSAN_ACCESS_SCOPED) ?
" (scoped)" :
"");
((ty & KCSAN_ACCESS_SCOPED) ? " (scoped)" : "");
if (i == 1) {
/* Access 2 */
......@@ -277,6 +284,12 @@ static noinline void test_kernel_write_atomic(void)
WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1);
}
static noinline void test_kernel_atomic_rmw(void)
{
/* Use builtin, so we can set up the "bad" atomic/non-atomic scenario. */
__atomic_fetch_add(&test_var, 1, __ATOMIC_RELAXED);
}
__no_kcsan
static noinline void test_kernel_write_uninstrumented(void) { test_var++; }
......@@ -390,6 +403,15 @@ static noinline void test_kernel_seqlock_writer(void)
write_sequnlock_irqrestore(&test_seqlock, flags);
}
static noinline void test_kernel_atomic_builtins(void)
{
/*
* Generate concurrent accesses, expecting no reports, ensuring KCSAN
* treats builtin atomics as actually atomic.
*/
__atomic_load_n(&test_var, __ATOMIC_RELAXED);
}
/* ===== Test cases ===== */
/* Simple test with normal data race. */
......@@ -430,8 +452,8 @@ static void test_concurrent_races(struct kunit *test)
const struct expect_report expect = {
.access = {
/* NULL will match any address. */
{ test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE },
{ test_kernel_rmw_array, NULL, 0, 0 },
{ test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) },
{ test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(0) },
},
};
static const struct expect_report never = {
......@@ -620,6 +642,29 @@ static void test_read_plain_atomic_write(struct kunit *test)
KUNIT_EXPECT_TRUE(test, match_expect);
}
/* Test that atomic RMWs generate correct report. */
__no_kcsan
static void test_read_plain_atomic_rmw(struct kunit *test)
{
const struct expect_report expect = {
.access = {
{ test_kernel_read, &test_var, sizeof(test_var), 0 },
{ test_kernel_atomic_rmw, &test_var, sizeof(test_var),
KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC },
},
};
bool match_expect = false;
if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS))
return;
begin_test_checks(test_kernel_read, test_kernel_atomic_rmw);
do {
match_expect = report_matches(&expect);
} while (!end_test_checks(match_expect));
KUNIT_EXPECT_TRUE(test, match_expect);
}
/* Zero-sized accesses should never cause data race reports. */
__no_kcsan
static void test_zero_size_access(struct kunit *test)
......@@ -852,6 +897,59 @@ static void test_seqlock_noreport(struct kunit *test)
KUNIT_EXPECT_FALSE(test, match_never);
}
/*
* Test atomic builtins work and required instrumentation functions exist. We
* also test that KCSAN understands they're atomic by racing with them via
* test_kernel_atomic_builtins(), and expect no reports.
*
* The atomic builtins _SHOULD NOT_ be used in normal kernel code!
*/
static void test_atomic_builtins(struct kunit *test)
{
bool match_never = false;
begin_test_checks(test_kernel_atomic_builtins, test_kernel_atomic_builtins);
do {
long tmp;
kcsan_enable_current();
__atomic_store_n(&test_var, 42L, __ATOMIC_RELAXED);
KUNIT_EXPECT_EQ(test, 42L, __atomic_load_n(&test_var, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 42L, __atomic_exchange_n(&test_var, 20, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 20L, test_var);
tmp = 20L;
KUNIT_EXPECT_TRUE(test, __atomic_compare_exchange_n(&test_var, &tmp, 30L,
0, __ATOMIC_RELAXED,
__ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, tmp, 20L);
KUNIT_EXPECT_EQ(test, test_var, 30L);
KUNIT_EXPECT_FALSE(test, __atomic_compare_exchange_n(&test_var, &tmp, 40L,
1, __ATOMIC_RELAXED,
__ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, tmp, 30L);
KUNIT_EXPECT_EQ(test, test_var, 30L);
KUNIT_EXPECT_EQ(test, 30L, __atomic_fetch_add(&test_var, 1, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 31L, __atomic_fetch_sub(&test_var, 1, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 30L, __atomic_fetch_and(&test_var, 0xf, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 14L, __atomic_fetch_xor(&test_var, 0xf, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 1L, __atomic_fetch_or(&test_var, 0xf0, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, 241L, __atomic_fetch_nand(&test_var, 0xf, __ATOMIC_RELAXED));
KUNIT_EXPECT_EQ(test, -2L, test_var);
__atomic_thread_fence(__ATOMIC_SEQ_CST);
__atomic_signal_fence(__ATOMIC_SEQ_CST);
kcsan_disable_current();
match_never = report_available();
} while (!end_test_checks(match_never));
KUNIT_EXPECT_FALSE(test, match_never);
}
/*
* Each test case is run with different numbers of threads. Until KUnit supports
* passing arguments for each test case, we encode #threads in the test case
......@@ -880,6 +978,7 @@ static struct kunit_case kcsan_test_cases[] = {
KCSAN_KUNIT_CASE(test_write_write_struct_part),
KCSAN_KUNIT_CASE(test_read_atomic_write_atomic),
KCSAN_KUNIT_CASE(test_read_plain_atomic_write),
KCSAN_KUNIT_CASE(test_read_plain_atomic_rmw),
KCSAN_KUNIT_CASE(test_zero_size_access),
KCSAN_KUNIT_CASE(test_data_race),
KCSAN_KUNIT_CASE(test_assert_exclusive_writer),
......@@ -891,6 +990,7 @@ static struct kunit_case kcsan_test_cases[] = {
KCSAN_KUNIT_CASE(test_assert_exclusive_access_scoped),
KCSAN_KUNIT_CASE(test_jiffies_noreport),
KCSAN_KUNIT_CASE(test_seqlock_noreport),
KCSAN_KUNIT_CASE(test_atomic_builtins),
{},
};
......
......@@ -8,6 +8,7 @@
#ifndef _KERNEL_KCSAN_KCSAN_H
#define _KERNEL_KCSAN_KCSAN_H
#include <linux/atomic.h>
#include <linux/kcsan.h>
#include <linux/sched.h>
......@@ -34,6 +35,10 @@ void kcsan_restore_irqtrace(struct task_struct *task);
*/
void kcsan_debugfs_init(void);
/*
* Statistics counters displayed via debugfs; should only be modified in
* slow-paths.
*/
enum kcsan_counter_id {
/*
* Number of watchpoints currently in use.
......@@ -86,12 +91,7 @@ enum kcsan_counter_id {
KCSAN_COUNTER_COUNT, /* number of counters */
};
/*
* Increment/decrement counter with given id; avoid calling these in fast-path.
*/
extern void kcsan_counter_inc(enum kcsan_counter_id id);
extern void kcsan_counter_dec(enum kcsan_counter_id id);
extern atomic_long_t kcsan_counters[KCSAN_COUNTER_COUNT];
/*
* Returns true if data races in the function symbol that maps to func_addr
......
......@@ -228,6 +228,10 @@ static const char *get_access_type(int type)
return "write";
case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
return "write (marked)";
case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE:
return "read-write";
case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC:
return "read-write (marked)";
case KCSAN_ACCESS_SCOPED:
return "read (scoped)";
case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC:
......@@ -275,8 +279,8 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
cur = strnstr(buf, "kcsan_", len);
if (cur) {
cur += sizeof("kcsan_") - 1;
if (strncmp(cur, "test", sizeof("test") - 1))
cur += strlen("kcsan_");
if (!str_has_prefix(cur, "test"))
continue; /* KCSAN runtime function. */
/* KCSAN related test. */
}
......@@ -555,7 +559,7 @@ static bool prepare_report_consumer(unsigned long *flags,
* If the actual accesses to not match, this was a false
* positive due to watchpoint encoding.
*/
kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]);
goto discard;
}
......
// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) "kcsan: " fmt
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/printk.h>
......@@ -116,16 +118,16 @@ static int __init kcsan_selftest(void)
if (do_test()) \
++passed; \
else \
pr_err("KCSAN selftest: " #do_test " failed"); \
pr_err("selftest: " #do_test " failed"); \
} while (0)
RUN_TEST(test_requires);
RUN_TEST(test_encode_decode);
RUN_TEST(test_matching_access);
pr_info("KCSAN selftest: %d/%d tests passed\n", passed, total);
pr_info("selftest: %d/%d tests passed\n", passed, total);
if (passed != total)
panic("KCSAN selftests failed");
panic("selftests failed");
return 0;
}
postcore_initcall(kcsan_selftest);
This diff is collapsed.
......@@ -20,9 +20,12 @@ enum lock_usage_bit {
#undef LOCKDEP_STATE
LOCK_USED,
LOCK_USED_READ,
LOCK_USAGE_STATES
LOCK_USAGE_STATES,
};
/* states after LOCK_USED_READ are not traced and printed */
static_assert(LOCK_TRACE_STATES == LOCK_USAGE_STATES);
#define LOCK_USAGE_READ_MASK 1
#define LOCK_USAGE_DIR_MASK 2
#define LOCK_USAGE_STATE_MASK (~(LOCK_USAGE_READ_MASK | LOCK_USAGE_DIR_MASK))
......@@ -121,7 +124,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
extern struct list_head all_lock_classes;
extern struct lock_chain lock_chains[];
#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
#define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1)
extern void get_usage_chars(struct lock_class *class,
char usage[LOCK_USAGE_CHARS]);
......
......@@ -35,7 +35,7 @@
* into a single 64-byte cache line.
*/
struct clock_data {
seqcount_t seq;
seqcount_latch_t seq;
struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
......@@ -76,7 +76,7 @@ struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
int sched_clock_read_retry(unsigned int seq)
{
return read_seqcount_retry(&cd.seq, seq);
return read_seqcount_latch_retry(&cd.seq, seq);
}
unsigned long long notrace sched_clock(void)
......@@ -258,7 +258,7 @@ void __init generic_sched_clock_init(void)
*/
static u64 notrace suspended_sched_clock_read(void)
{
unsigned int seq = raw_read_seqcount(&cd.seq);
unsigned int seq = raw_read_seqcount_latch(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
......
......@@ -67,7 +67,7 @@ int __read_mostly timekeeping_suspended;
* See @update_fast_timekeeper() below.
*/
struct tk_fast {
seqcount_raw_spinlock_t seq;
seqcount_latch_t seq;
struct tk_read_base base[2];
};
......@@ -101,13 +101,13 @@ static struct clocksource dummy_clock = {
}
static struct tk_fast tk_fast_mono ____cacheline_aligned = {
.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_mono.seq, &timekeeper_lock),
.seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
.base[0] = FAST_TK_INIT,
.base[1] = FAST_TK_INIT,
};
static struct tk_fast tk_fast_raw ____cacheline_aligned = {
.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_fast_raw.seq, &timekeeper_lock),
.seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
.base[0] = FAST_TK_INIT,
.base[1] = FAST_TK_INIT,
};
......@@ -484,7 +484,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
} while (read_seqcount_latch_retry(&tkf->seq, seq));
return now;
}
......@@ -548,7 +548,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
delta = timekeeping_delta_to_ns(tkr,
clocksource_delta(tk_clock_read(tkr),
tkr->cycle_last, tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
} while (read_seqcount_latch_retry(&tkf->seq, seq));
if (mono)
*mono = basem + delta;
......
......@@ -40,6 +40,11 @@ menuconfig KCSAN
if KCSAN
# Compiler capabilities that should not fail the test if they are unavailable.
config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
(CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
config KCSAN_VERBOSE
bool "Show verbose reports with more information about system state"
depends on PROVE_LOCKING
......
This diff is collapsed.
......@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
*/
void lru_add_drain_all(void)
{
static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
static DEFINE_MUTEX(lock);
/*
* lru_drain_gen - Global pages generation number
*
* (A) Definition: global lru_drain_gen = x implies that all generations
* 0 < n <= x are already *scheduled* for draining.
*
* This is an optimization for the highly-contended use case where a
* user space workload keeps constantly generating a flow of pages for
* each CPU.
*/
static unsigned int lru_drain_gen;
static struct cpumask has_work;
int cpu, seq;
static DEFINE_MUTEX(lock);
unsigned cpu, this_gen;
/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
......@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
if (WARN_ON(!mm_percpu_wq))
return;
seq = raw_read_seqcount_latch(&seqcount);
/*
* Guarantee pagevec counter stores visible by this CPU are visible to
* other CPUs before loading the current drain generation.
*/
smp_mb();
/*
* (B) Locally cache global LRU draining generation number
*
* The read barrier ensures that the counter is loaded before the mutex
* is taken. It pairs with smp_mb() inside the mutex critical section
* at (D).
*/
this_gen = smp_load_acquire(&lru_drain_gen);
mutex_lock(&lock);
/*
* Piggyback on drain started and finished while we waited for lock:
* all pages pended at the time of our enter were drained from vectors.
* (C) Exit the draining operation if a newer generation, from another
* lru_add_drain_all(), was already scheduled for draining. Check (A).
*/
if (__read_seqcount_retry(&seqcount, seq))
if (unlikely(this_gen != lru_drain_gen))
goto done;
raw_write_seqcount_latch(&seqcount);
/*
* (D) Increment global generation number
*
* Pairs with smp_load_acquire() at (B), outside of the critical
* section. Use a full memory barrier to guarantee that the new global
* drain generation number is stored before loading pagevec counters.
*
* This pairing must be done here, before the for_each_online_cpu loop
* below which drains the page vectors.
*
* Let x, y, and z represent some system CPU numbers, where x < y < z.
* Assume CPU #z is is in the middle of the for_each_online_cpu loop
* below and has already reached CPU #y's per-cpu data. CPU #x comes
* along, adds some pages to its per-cpu vectors, then calls
* lru_add_drain_all().
*
* If the paired barrier is done at any later step, e.g. after the
* loop, CPU #x will just exit at (C) and miss flushing out all of its
* added pages.
*/
WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
smp_mb();
cpumask_clear(&has_work);
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
......@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
cpumask_set_cpu(cpu, &has_work);
__cpumask_set_cpu(cpu, &has_work);
}
}
......@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
{
lru_add_drain();
}
#endif
#endif /* CONFIG_SMP */
/**
* release_pages - batched put_page()
......
......@@ -11,5 +11,5 @@ endif
# of some options does not break KCSAN nor causes false positive reports.
CFLAGS_KCSAN := -fsanitize=thread \
$(call cc-option,$(call cc-param,tsan-instrument-func-entry-exit=0) -fno-optimize-sibling-calls) \
$(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1)) \
$(call cc-option,$(call cc-param,tsan-compound-read-before-write=1),$(call cc-option,$(call cc-param,tsan-instrument-read-before-write=1))) \
$(call cc-param,tsan-distinguish-volatile=1)
......@@ -16,6 +16,7 @@ fi
cat <<EOF |
asm-generic/atomic-instrumented.h
asm-generic/atomic-long.h
linux/atomic-arch-fallback.h
linux/atomic-fallback.h
EOF
while read header; do
......
......@@ -5,9 +5,10 @@ ATOMICDIR=$(dirname $0)
. ${ATOMICDIR}/atomic-tbl.sh
#gen_param_check(arg)
#gen_param_check(meta, arg)
gen_param_check()
{
local meta="$1"; shift
local arg="$1"; shift
local type="${arg%%:*}"
local name="$(gen_param_name "${arg}")"
......@@ -17,17 +18,25 @@ gen_param_check()
i) return;;
esac
# We don't write to constant parameters
[ ${type#c} != ${type} ] && rw="read"
if [ ${type#c} != ${type} ]; then
# We don't write to constant parameters.
rw="read"
elif [ "${meta}" != "s" ]; then
# An atomic RMW: if this parameter is not a constant, and this atomic is
# not just a 's'tore, this parameter is both read from and written to.
rw="read_write"
fi
printf "\tinstrument_atomic_${rw}(${name}, sizeof(*${name}));\n"
}
#gen_param_check(arg...)
#gen_params_checks(meta, arg...)
gen_params_checks()
{
local meta="$1"; shift
while [ "$#" -gt 0 ]; do
gen_param_check "$1"
gen_param_check "$meta" "$1"
shift;
done
}
......@@ -77,7 +86,7 @@ gen_proto_order_variant()
local ret="$(gen_ret_type "${meta}" "${int}")"
local params="$(gen_params "${int}" "${atomic}" "$@")"
local checks="$(gen_params_checks "$@")"
local checks="$(gen_params_checks "${meta}" "$@")"
local args="$(gen_args "$@")"
local retstmt="$(gen_ret_stmt "${meta}")"
......
......@@ -205,6 +205,8 @@ regex_c=(
'/\<DEVICE_ATTR_\(RW\|RO\|WO\)(\([[:alnum:]_]\+\)/dev_attr_\2/'
'/\<DRIVER_ATTR_\(RW\|RO\|WO\)(\([[:alnum:]_]\+\)/driver_attr_\2/'
'/\<\(DEFINE\|DECLARE\)_STATIC_KEY_\(TRUE\|FALSE\)\(\|_RO\)(\([[:alnum:]_]\+\)/\4/'
'/^SEQCOUNT_LOCKTYPE(\([^,]*\),[[:space:]]*\([^,]*\),[^)]*)/seqcount_\2_t/'
'/^SEQCOUNT_LOCKTYPE(\([^,]*\),[[:space:]]*\([^,]*\),[^)]*)/seqcount_\2_init/'
)
regex_kconfig=(
'/^[[:blank:]]*\(menu\|\)config[[:blank:]]\+\([[:alnum:]_]\+\)/\2/'
......
......@@ -3,9 +3,9 @@
C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
Store, e.g., WRITE_ONCE() Y Y
Load, e.g., READ_ONCE() Y Y Y Y
Unsuccessful RMW operation Y Y Y Y
Relaxed store Y Y
Relaxed load Y Y Y Y
Relaxed RMW operation Y Y Y Y
rcu_dereference() Y Y Y Y
Successful *_acquire() R Y Y Y Y Y Y
Successful *_release() C Y Y Y W Y
......@@ -17,7 +17,12 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y
smp_mb__after_atomic() CP a a Y Y Y Y Y Y
Key: C: Ordering is cumulative
Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
a *_relaxed() RMW operation, an unsuccessful RMW
operation, a non-value-returning RMW operation such
as atomic_inc(), or one of the atomic*_read() and
atomic*_set() family of operations.
C: Ordering is cumulative
P: Ordering propagates
R: Read, for example, READ_ONCE(), or read portion of RMW
W: Write, for example, WRITE_ONCE(), or write portion of RMW
......
This diff is collapsed.
This document provides "recipes", that is, litmus tests for commonly
occurring situations, as well as a few that illustrate subtly broken but
attractive nuisances. Many of these recipes include example code from
v4.13 of the Linux kernel.
v5.7 of the Linux kernel.
The first section covers simple special cases, the second section
takes off the training wheels to cover more involved examples,
......@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
first place (control dependency). Note that the term "data dependency"
is sometimes casually used to cover both address and data dependencies.
In lib/prime_numbers.c, the expand_to_next_prime() function invokes
In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
rcu_assign_pointer(), and the next_prime_number() function invokes
rcu_dereference(). This combination mediates access to a bit vector
that is expanded as additional primes are needed.
......
......@@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
semantics of the weak consistency model specification language
cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
Memory-model comparisons
......
This diff is collapsed.
......@@ -63,10 +63,32 @@ BASIC USAGE: HERD7
==================
The memory model is used, in conjunction with "herd7", to exhaustively
explore the state space of small litmus tests.
explore the state space of small litmus tests. Documentation describing
the format, features, capabilities and limitations of these litmus
tests is available in tools/memory-model/Documentation/litmus-tests.txt.
For example, to run SB+fencembonceonces.litmus against the memory model:
Example litmus tests may be found in the Linux-kernel source tree:
tools/memory-model/litmus-tests/
Documentation/litmus-tests/
Several thousand more example litmus tests are available here:
https://github.com/paulmckrcu/litmus
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
Documentation describing litmus tests and now to use them may be found
here:
tools/memory-model/Documentation/litmus-tests.txt
The remainder of this section uses the SB+fencembonceonces.litmus test
located in the tools/memory-model directory.
To run SB+fencembonceonces.litmus against the memory model:
$ cd $LINUX_SOURCE_TREE/tools/memory-model
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
Here is the corresponding output:
......@@ -87,7 +109,11 @@ Here is the corresponding output:
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
this litmus test's "exists" clause can not be satisfied.
See "herd7 -help" or "herdtools7/doc/" for more information.
See "herd7 -help" or "herdtools7/doc/" for more information on running the
tool itself, but please be aware that this documentation is intended for
people who work on the memory model itself, that is, people making changes
to the tools/memory-model/linux-kernel.* files. It is not intended for
people focusing on writing, understanding, and running LKMM litmus tests.
=====================
......@@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus
test's "exists" clause was not reached.
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
for more information.
for more information. And again, please be aware that this documentation
is intended for people who work on the memory model itself, that is,
people making changes to the tools/memory-model/linux-kernel.* files.
It is not intended for people focusing on writing, understanding, and
running LKMM litmus tests.
====================
......@@ -137,12 +167,21 @@ Documentation/cheatsheet.txt
Documentation/explanation.txt
Describes the memory model in detail.
Documentation/litmus-tests.txt
Describes the format, features, capabilities, and limitations
of the litmus tests that LKMM can evaluate.
Documentation/recipes.txt
Lists common memory-ordering patterns.
Documentation/references.txt
Provides background reading.
Documentation/simple.txt
Starting point for someone new to Linux-kernel concurrency.
And also for those needing a reminder of the simpler approaches
to concurrency!
linux-kernel.bell
Categorizes the relevant instructions, including memory
references, memory barriers, atomic read-modify-write operations,
......@@ -187,116 +226,3 @@ README
This file.
scripts Various scripts, see scripts/README.
===========
LIMITATIONS
===========
The Linux-kernel memory model (LKMM) has the following limitations:
1. Compiler optimizations are not accurately modeled. Of course,
the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
ability to optimize, but under some circumstances it is possible
for the compiler to undermine the memory model. For more
information, see Documentation/explanation.txt (in particular,
the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
sections).
Note that this limitation in turn limits LKMM's ability to
accurately model address, control, and data dependencies.
For example, if the compiler can deduce the value of some variable
carrying a dependency, then the compiler can break that dependency
by substituting a constant of that value.
2. Multiple access sizes for a single variable are not supported,
and neither are misaligned or partially overlapping accesses.
3. Exceptions and interrupts are not modeled. In some cases,
this limitation can be overcome by modeling the interrupt or
exception with an additional process.
4. I/O such as MMIO or DMA is not supported.
5. Self-modifying code (such as that found in the kernel's
alternatives mechanism, function tracer, Berkeley Packet Filter
JIT compiler, and module loader) is not supported.
6. Complete modeling of all variants of atomic read-modify-write
operations, locking primitives, and RCU is not provided.
For example, call_rcu() and rcu_barrier() are not supported.
However, a substantial amount of support is provided for these
operations, as shown in the linux-kernel.def file.
a. When rcu_assign_pointer() is passed NULL, the Linux
kernel provides no ordering, but LKMM models this
case as a store release.
b. The "unless" RMW operations are not currently modeled:
atomic_long_add_unless(), atomic_inc_unless_negative(),
and atomic_dec_unless_positive(). These can be emulated
in litmus tests, for example, by using atomic_cmpxchg().
One exception of this limitation is atomic_add_unless(),
which is provided directly by herd7 (so no corresponding
definition in linux-kernel.def). atomic_add_unless() is
modeled by herd7 therefore it can be used in litmus tests.
c. The call_rcu() function is not modeled. It can be
emulated in litmus tests by adding another process that
invokes synchronize_rcu() and the body of the callback
function, with (for example) a release-acquire from
the site of the emulated call_rcu() to the beginning
of the additional process.
d. The rcu_barrier() function is not modeled. It can be
emulated in litmus tests emulating call_rcu() via
(for example) a release-acquire from the end of each
additional call_rcu() process to the site of the
emulated rcu-barrier().
e. Although sleepable RCU (SRCU) is now modeled, there
are some subtle differences between its semantics and
those in the Linux kernel. For example, the kernel
might interpret the following sequence as two partially
overlapping SRCU read-side critical sections:
1 r1 = srcu_read_lock(&my_srcu);
2 do_something_1();
3 r2 = srcu_read_lock(&my_srcu);
4 do_something_2();
5 srcu_read_unlock(&my_srcu, r1);
6 do_something_3();
7 srcu_read_unlock(&my_srcu, r2);
In contrast, LKMM will interpret this as a nested pair of
SRCU read-side critical sections, with the outer critical
section spanning lines 1-7 and the inner critical section
spanning lines 3-5.
This difference would be more of a concern had anyone
identified a reasonable use case for partially overlapping
SRCU read-side critical sections. For more information,
please see: https://paulmck.livejournal.com/40593.html
f. Reader-writer locking is not modeled. It can be
emulated in litmus tests using atomic read-modify-write
operations.
The "herd7" tool has some additional limitations of its own, apart from
the memory model:
1. Non-trivial data structures such as arrays or structures are
not supported. However, pointers are supported, allowing trivial
linked lists to be constructed.
2. Dynamic memory allocation is not supported, although this can
be worked around in some cases by supplying multiple statically
allocated variables.
Some of these limitations may be overcome in the future, but others are
more likely to be addressed by incorporating the Linux-kernel memory model
into other tools.
Finally, please note that LKMM is subject to change as hardware, use cases,
and compilers evolve.
......@@ -528,6 +528,61 @@ static const char *uaccess_safe_builtin[] = {
"__tsan_write4",
"__tsan_write8",
"__tsan_write16",
"__tsan_read_write1",
"__tsan_read_write2",
"__tsan_read_write4",
"__tsan_read_write8",
"__tsan_read_write16",
"__tsan_atomic8_load",
"__tsan_atomic16_load",
"__tsan_atomic32_load",
"__tsan_atomic64_load",
"__tsan_atomic8_store",
"__tsan_atomic16_store",
"__tsan_atomic32_store",
"__tsan_atomic64_store",
"__tsan_atomic8_exchange",
"__tsan_atomic16_exchange",
"__tsan_atomic32_exchange",
"__tsan_atomic64_exchange",
"__tsan_atomic8_fetch_add",
"__tsan_atomic16_fetch_add",
"__tsan_atomic32_fetch_add",
"__tsan_atomic64_fetch_add",
"__tsan_atomic8_fetch_sub",
"__tsan_atomic16_fetch_sub",
"__tsan_atomic32_fetch_sub",
"__tsan_atomic64_fetch_sub",
"__tsan_atomic8_fetch_and",
"__tsan_atomic16_fetch_and",
"__tsan_atomic32_fetch_and",
"__tsan_atomic64_fetch_and",
"__tsan_atomic8_fetch_or",
"__tsan_atomic16_fetch_or",
"__tsan_atomic32_fetch_or",
"__tsan_atomic64_fetch_or",
"__tsan_atomic8_fetch_xor",
"__tsan_atomic16_fetch_xor",
"__tsan_atomic32_fetch_xor",
"__tsan_atomic64_fetch_xor",
"__tsan_atomic8_fetch_nand",
"__tsan_atomic16_fetch_nand",
"__tsan_atomic32_fetch_nand",
"__tsan_atomic64_fetch_nand",
"__tsan_atomic8_compare_exchange_strong",
"__tsan_atomic16_compare_exchange_strong",
"__tsan_atomic32_compare_exchange_strong",
"__tsan_atomic64_compare_exchange_strong",
"__tsan_atomic8_compare_exchange_weak",
"__tsan_atomic16_compare_exchange_weak",
"__tsan_atomic32_compare_exchange_weak",
"__tsan_atomic64_compare_exchange_weak",
"__tsan_atomic8_compare_exchange_val",
"__tsan_atomic16_compare_exchange_val",
"__tsan_atomic32_compare_exchange_val",
"__tsan_atomic64_compare_exchange_val",
"__tsan_atomic_thread_fence",
"__tsan_atomic_signal_fence",
/* KCOV */
"write_comp_data",
"check_kcov_mode",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment