Commit 66e1c94d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/pti updates from Thomas Gleixner:
 "A mixed bag of fixes and updates for the ghosts which are hunting us.

  The scheduler fixes have been pulled into that branch to avoid
  conflicts.

   - A set of fixes to address a khread_parkme() race which caused lost
     wakeups and loss of state.

   - A deadlock fix for stop_machine() solved by moving the wakeups
     outside of the stopper_lock held region.

   - A set of Spectre V1 array access restrictions. The possible
     problematic spots were discuvered by Dan Carpenters new checks in
     smatch.

   - Removal of an unused file which was forgotten when the rest of that
     functionality was removed"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso: Remove unused file
  perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr
  perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver
  perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map()
  perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_*
  perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[]
  sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
  sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[]
  sched/core: Introduce set_special_state()
  kthread, sched/wait: Fix kthread_parkme() completion issue
  kthread, sched/wait: Fix kthread_parkme() wait-loop
  sched/fair: Fix the update of blocked load when newly idle
  stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
parents 86a4ac43 e0f6d1a5
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/nospec.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
...@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) ...@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
config = attr->config; config = attr->config;
cache_type = (config >> 0) & 0xff; cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX) if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL; return -EINVAL;
cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
cache_op = (config >> 8) & 0xff; cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL; return -EINVAL;
cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
cache_result = (config >> 16) & 0xff; cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL; return -EINVAL;
cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
val = hw_cache_event_ids[cache_type][cache_op][cache_result]; val = hw_cache_event_ids[cache_type][cache_op][cache_result];
...@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) ...@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
if (attr->config >= x86_pmu.max_events) if (attr->config >= x86_pmu.max_events)
return -EINVAL; return -EINVAL;
attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
/* /*
* The generic map: * The generic map:
*/ */
......
...@@ -92,6 +92,7 @@ ...@@ -92,6 +92,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/nospec.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
#include "../perf_event.h" #include "../perf_event.h"
...@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) ...@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
} else if (event->pmu == &cstate_pkg_pmu) { } else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL; return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!pkg_msr[cfg].attr) if (!pkg_msr[cfg].attr)
return -EINVAL; return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr; event->hw.event_base = pkg_msr[cfg].msr;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/nospec.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
enum perf_msr_id { enum perf_msr_id {
...@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) ...@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type) if (event->attr.type != event->pmu->type)
return -ENOENT; return -ENOENT;
if (cfg >= PERF_MSR_EVENT_MAX)
return -EINVAL;
/* unsupported modes and filters */ /* unsupported modes and filters */
if (event->attr.exclude_user || if (event->attr.exclude_user ||
event->attr.exclude_kernel || event->attr.exclude_kernel ||
...@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) ...@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */ event->attr.sample_period) /* no sampling */
return -EINVAL; return -EINVAL;
if (cfg >= PERF_MSR_EVENT_MAX)
return -EINVAL;
cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
if (!msr[cfg].attr) if (!msr[cfg].attr)
return -EINVAL; return -EINVAL;
......
...@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k); ...@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
int kthread_park(struct task_struct *k); int kthread_park(struct task_struct *k);
void kthread_unpark(struct task_struct *k); void kthread_unpark(struct task_struct *k);
void kthread_parkme(void); void kthread_parkme(void);
void kthread_park_complete(struct task_struct *k);
int kthreadd(void *unused); int kthreadd(void *unused);
extern struct task_struct *kthreadd_task; extern struct task_struct *kthreadd_task;
......
...@@ -112,17 +112,36 @@ struct task_group; ...@@ -112,17 +112,36 @@ struct task_group;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
*/
#define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
#define __set_current_state(state_value) \ #define __set_current_state(state_value) \
do { \ do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \ current->task_state_change = _THIS_IP_; \
current->state = (state_value); \ current->state = (state_value); \
} while (0) } while (0)
#define set_current_state(state_value) \ #define set_current_state(state_value) \
do { \ do { \
WARN_ON_ONCE(is_special_task_state(state_value));\
current->task_state_change = _THIS_IP_; \ current->task_state_change = _THIS_IP_; \
smp_store_mb(current->state, (state_value)); \ smp_store_mb(current->state, (state_value)); \
} while (0) } while (0)
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
WARN_ON_ONCE(!is_special_task_state(state_value)); \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->task_state_change = _THIS_IP_; \
current->state = (state_value); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
#else #else
/* /*
* set_current_state() includes a barrier so that the write of current->state * set_current_state() includes a barrier so that the write of current->state
...@@ -144,8 +163,8 @@ struct task_group; ...@@ -144,8 +163,8 @@ struct task_group;
* *
* The above is typically ordered against the wakeup, which does: * The above is typically ordered against the wakeup, which does:
* *
* need_sleep = false; * need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE); * wake_up_state(p, TASK_UNINTERRUPTIBLE);
* *
* Where wake_up_state() (and all other wakeup primitives) imply enough * Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup. * barriers to order the store of the variable against wakeup.
...@@ -154,12 +173,33 @@ struct task_group; ...@@ -154,12 +173,33 @@ struct task_group;
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
* *
* This is obviously fine, since they both store the exact same value. * However, with slightly different timing the wakeup TASK_RUNNING store can
* also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not
* a problem either because that will result in one extra go around the loop
* and our @cond test will save the day.
* *
* Also see the comments of try_to_wake_up(). * Also see the comments of try_to_wake_up().
*/ */
#define __set_current_state(state_value) do { current->state = (state_value); } while (0) #define __set_current_state(state_value) \
#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) current->state = (state_value)
#define set_current_state(state_value) \
smp_store_mb(current->state, (state_value))
/*
* set_special_state() should be used for those states when the blocking task
* can not use the regular condition based wait-loop. In that case we must
* serialize against wakeups such that any possible in-flight TASK_RUNNING stores
* will not collide with our state change.
*/
#define set_special_state(state_value) \
do { \
unsigned long flags; /* may shadow */ \
raw_spin_lock_irqsave(&current->pi_lock, flags); \
current->state = (state_value); \
raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
} while (0)
#endif #endif
/* Task command name length: */ /* Task command name length: */
......
...@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void) ...@@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void)
{ {
spin_lock_irq(&current->sighand->siglock); spin_lock_irq(&current->sighand->siglock);
if (current->jobctl & JOBCTL_STOP_DEQUEUED) if (current->jobctl & JOBCTL_STOP_DEQUEUED)
__set_current_state(TASK_STOPPED); set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);
schedule(); schedule();
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/circ_buf.h> #include <linux/circ_buf.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/nospec.h>
#include "internal.h" #include "internal.h"
...@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) ...@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
return NULL; return NULL;
/* AUX space */ /* AUX space */
if (pgoff >= rb->aux_pgoff) if (pgoff >= rb->aux_pgoff) {
return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
return virt_to_page(rb->aux_pages[aux_pgoff]);
}
} }
return __perf_mmap_to_page(rb, pgoff); return __perf_mmap_to_page(rb, pgoff);
......
...@@ -55,7 +55,6 @@ enum KTHREAD_BITS { ...@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0, KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP, KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK, KTHREAD_SHOULD_PARK,
KTHREAD_IS_PARKED,
}; };
static inline void set_kthread_struct(void *kthread) static inline void set_kthread_struct(void *kthread)
...@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task) ...@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
static void __kthread_parkme(struct kthread *self) static void __kthread_parkme(struct kthread *self)
{ {
__set_current_state(TASK_PARKED); for (;;) {
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { set_current_state(TASK_PARKED);
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
complete(&self->parked); break;
schedule(); schedule();
__set_current_state(TASK_PARKED);
} }
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }
...@@ -194,6 +191,11 @@ void kthread_parkme(void) ...@@ -194,6 +191,11 @@ void kthread_parkme(void)
} }
EXPORT_SYMBOL_GPL(kthread_parkme); EXPORT_SYMBOL_GPL(kthread_parkme);
void kthread_park_complete(struct task_struct *k)
{
complete(&to_kthread(k)->parked);
}
static int kthread(void *_create) static int kthread(void *_create)
{ {
/* Copy data: it's on kthread's stack */ /* Copy data: it's on kthread's stack */
...@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k) ...@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
{ {
struct kthread *kthread = to_kthread(k); struct kthread *kthread = to_kthread(k);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/* /*
* We clear the IS_PARKED bit here as we don't wait * Newly created kthread was parked when the CPU was offline.
* until the task has left the park code. So if we'd * The binding was lost and we need to set it again.
* park before that happens we'd see the IS_PARKED bit
* which might be about to be cleared.
*/ */
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
/* __kthread_bind(k, kthread->cpu, TASK_PARKED);
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again. clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
*/ wake_up_state(k, TASK_PARKED);
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
wake_up_state(k, TASK_PARKED);
}
} }
EXPORT_SYMBOL_GPL(kthread_unpark); EXPORT_SYMBOL_GPL(kthread_unpark);
...@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k) ...@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING)) if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS; return -ENOSYS;
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); return -EBUSY;
if (k != current) {
wake_up_process(k); set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
wait_for_completion(&kthread->parked); if (k != current) {
} wake_up_process(k);
wait_for_completion(&kthread->parked);
} }
return 0; return 0;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* /*
* Auto-group scheduling implementation: * Auto-group scheduling implementation:
*/ */
#include <linux/nospec.h>
#include "sched.h" #include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
...@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) ...@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES; static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag; struct autogroup *ag;
unsigned long shares; unsigned long shares;
int err; int err, idx;
if (nice < MIN_NICE || nice > MAX_NICE) if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL; return -EINVAL;
...@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) ...@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
next = HZ / 10 + jiffies; next = HZ / 10 + jiffies;
ag = autogroup_task_get(p); ag = autogroup_task_get(p);
shares = scale_load(sched_prio_to_weight[nice + 20]);
idx = array_index_nospec(nice + 20, 40);
shares = scale_load(sched_prio_to_weight[idx]);
down_write(&ag->lock); down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares); err = sched_group_set_shares(ag->tg, shares);
......
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
*/ */
#include "sched.h" #include "sched.h"
#include <linux/kthread.h>
#include <linux/nospec.h>
#include <asm/switch_to.h> #include <asm/switch_to.h>
#include <asm/tlb.h> #include <asm/tlb.h>
...@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev) ...@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm); membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm); mmdrop(mm);
} }
if (unlikely(prev_state == TASK_DEAD)) { if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
if (prev->sched_class->task_dead) switch (prev_state) {
prev->sched_class->task_dead(prev); case TASK_DEAD:
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
/* /*
* Remove function-return probe instances associated with this * Remove function-return probe instances associated with this
* task and put them back on the free list. * task and put them back on the free list.
*/ */
kprobe_flush_task(prev); kprobe_flush_task(prev);
/* Task is done with its stack. */
put_task_stack(prev);
/* Task is done with its stack. */ put_task_struct(prev);
put_task_stack(prev); break;
put_task_struct(prev); case TASK_PARKED:
kthread_park_complete(prev);
break;
}
} }
tick_nohz_task_switch(); tick_nohz_task_switch();
...@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt) ...@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
void __noreturn do_task_dead(void) void __noreturn do_task_dead(void)
{ {
/*
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
* when the following two conditions become true.
* - There is race condition of mmap_sem (It is acquired by
* exit_mm()), and
* - SMI occurs before setting TASK_RUNINNG.
* (or hypervisor of virtual machine switches to other guest)
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
*
* To avoid it, we have to wait for releasing tsk->pi_lock which
* is held by try_to_wake_up()
*/
raw_spin_lock_irq(&current->pi_lock);
raw_spin_unlock_irq(&current->pi_lock);
/* Causes final put_task_struct in finish_task_switch(): */ /* Causes final put_task_struct in finish_task_switch(): */
__set_current_state(TASK_DEAD); set_special_state(TASK_DEAD);
/* Tell freezer to ignore us: */ /* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE; current->flags |= PF_NOFREEZE;
...@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, ...@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
struct cftype *cft, s64 nice) struct cftype *cft, s64 nice)
{ {
unsigned long weight; unsigned long weight;
int idx;
if (nice < MIN_NICE || nice > MAX_NICE) if (nice < MIN_NICE || nice > MAX_NICE)
return -ERANGE; return -ERANGE;
weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
idx = array_index_nospec(idx, 40);
weight = sched_prio_to_weight[idx];
return sched_group_set_shares(css_tg(css), scale_load(weight)); return sched_group_set_shares(css_tg(css), scale_load(weight));
} }
#endif #endif
......
...@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) ...@@ -9792,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (curr_cost > this_rq->max_idle_balance_cost) if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost; this_rq->max_idle_balance_cost = curr_cost;
out:
/* /*
* While browsing the domains, we released the rq lock, a task could * While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle, * have been enqueued in the meantime. Since we're not going idle,
...@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) ...@@ -9800,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (this_rq->cfs.h_nr_running && !pulled_task) if (this_rq->cfs.h_nr_running && !pulled_task)
pulled_task = 1; pulled_task = 1;
out:
/* Move the next balance forward */ /* Move the next balance forward */
if (time_after(this_rq->next_balance, next_balance)) if (time_after(this_rq->next_balance, next_balance))
this_rq->next_balance = next_balance; this_rq->next_balance = next_balance;
......
...@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) ...@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
return; return;
} }
set_special_state(TASK_TRACED);
/* /*
* We're committing to trapping. TRACED should be visible before * We're committing to trapping. TRACED should be visible before
* TRAPPING is cleared; otherwise, the tracer might fail do_wait(). * TRAPPING is cleared; otherwise, the tracer might fail do_wait().
* Also, transition to TRACED and updates to ->jobctl should be * Also, transition to TRACED and updates to ->jobctl should be
* atomic with respect to siglock and should be done after the arch * atomic with respect to siglock and should be done after the arch
* hook as siglock is released and regrabbed across it. * hook as siglock is released and regrabbed across it.
*
* TRACER TRACEE
*
* ptrace_attach()
* [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED)
* do_wait()
* set_current_state() smp_wmb();
* ptrace_do_wait()
* wait_task_stopped()
* task_stopped_code()
* [L] task_is_traced() [S] task_clear_jobctl_trapping();
*/ */
set_current_state(TASK_TRACED); smp_wmb();
current->last_siginfo = info; current->last_siginfo = info;
current->exit_code = exit_code; current->exit_code = exit_code;
...@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr) ...@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr)
if (task_participate_group_stop(current)) if (task_participate_group_stop(current))
notify = CLD_STOPPED; notify = CLD_STOPPED;
__set_current_state(TASK_STOPPED); set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);
/* /*
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/smpboot.h> #include <linux/smpboot.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/sched/wake_q.h>
/* /*
* Structure to determine completion condition and record errors. May * Structure to determine completion condition and record errors. May
...@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) ...@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
} }
static void __cpu_stop_queue_work(struct cpu_stopper *stopper, static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
struct cpu_stop_work *work) struct cpu_stop_work *work,
struct wake_q_head *wakeq)
{ {
list_add_tail(&work->list, &stopper->works); list_add_tail(&work->list, &stopper->works);
wake_up_process(stopper->thread); wake_q_add(wakeq, stopper->thread);
} }
/* queue @work to @stopper. if offline, @work is completed immediately */ /* queue @work to @stopper. if offline, @work is completed immediately */
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{ {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
DEFINE_WAKE_Q(wakeq);
unsigned long flags; unsigned long flags;
bool enabled; bool enabled;
spin_lock_irqsave(&stopper->lock, flags); spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled; enabled = stopper->enabled;
if (enabled) if (enabled)
__cpu_stop_queue_work(stopper, work); __cpu_stop_queue_work(stopper, work, &wakeq);
else if (work->done) else if (work->done)
cpu_stop_signal_done(work->done); cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags); spin_unlock_irqrestore(&stopper->lock, flags);
wake_up_q(&wakeq);
return enabled; return enabled;
} }
...@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, ...@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
{ {
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
DEFINE_WAKE_Q(wakeq);
int err; int err;
retry: retry:
spin_lock_irq(&stopper1->lock); spin_lock_irq(&stopper1->lock);
...@@ -252,8 +258,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, ...@@ -252,8 +258,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
goto unlock; goto unlock;
err = 0; err = 0;
__cpu_stop_queue_work(stopper1, work1); __cpu_stop_queue_work(stopper1, work1, &wakeq);
__cpu_stop_queue_work(stopper2, work2); __cpu_stop_queue_work(stopper2, work2, &wakeq);
unlock: unlock:
spin_unlock(&stopper2->lock); spin_unlock(&stopper2->lock);
spin_unlock_irq(&stopper1->lock); spin_unlock_irq(&stopper1->lock);
...@@ -263,6 +269,9 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, ...@@ -263,6 +269,9 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
cpu_relax(); cpu_relax();
goto retry; goto retry;
} }
wake_up_q(&wakeq);
return err; return err;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment