Commit a961e409 authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Linus Torvalds

membarrier: Provide register expedited private command

This introduces a "register private expedited" membarrier command which
allows eventual removal of important memory barrier constraints on the
scheduler fast-paths. It changes how the "private expedited" membarrier
command (new to 4.14) is used from user-space.

This new command allows processes to register their intent to use the
private expedited command.  This affects how the expedited private
command introduced in 4.14-rc is meant to be used, and should be merged
before 4.14 final.

Processes are now required to register before using
MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM.

This fixes a problem that arose when designing requested extensions to
sys_membarrier() to allow JITs to efficiently flush old code from
instruction caches.  Several potential algorithms are much less painful
if the user register intent to use this functionality early on, for
example, before the process spawns the second thread.  Registering at
this time removes the need to interrupt each and every thread in that
process at the first expedited sys_membarrier() system call.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 96f893ab
...@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, ...@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
/* execve succeeded */ /* execve succeeded */
current->fs->in_exec = 0; current->fs->in_exec = 0;
current->in_execve = 0; current->in_execve = 0;
membarrier_execve(current);
acct_update_integrals(current); acct_update_integrals(current);
task_numa_free(current); task_numa_free(current);
free_bprm(bprm); free_bprm(bprm);
......
...@@ -445,6 +445,9 @@ struct mm_struct { ...@@ -445,6 +445,9 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access the bits */ unsigned long flags; /* Must use atomic bitops to access the bits */
struct core_state *core_state; /* coredumping support */ struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_MEMBARRIER
atomic_t membarrier_state;
#endif
#ifdef CONFIG_AIO #ifdef CONFIG_AIO
spinlock_t ioctx_lock; spinlock_t ioctx_lock;
struct kioctx_table __rcu *ioctx_table; struct kioctx_table __rcu *ioctx_table;
......
...@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) ...@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
current->flags = (current->flags & ~PF_MEMALLOC) | flags; current->flags = (current->flags & ~PF_MEMALLOC) | flags;
} }
#ifdef CONFIG_MEMBARRIER
enum {
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
};
static inline void membarrier_execve(struct task_struct *t)
{
atomic_set(&t->mm->membarrier_state, 0);
}
#else
static inline void membarrier_execve(struct task_struct *t)
{
}
#endif
#endif /* _LINUX_SCHED_MM_H */ #endif /* _LINUX_SCHED_MM_H */
...@@ -52,10 +52,18 @@ ...@@ -52,10 +52,18 @@
* (non-running threads are de facto in such a * (non-running threads are de facto in such a
* state). This only covers threads from the * state). This only covers threads from the
* same processes as the caller thread. This * same processes as the caller thread. This
* command returns 0. The "expedited" commands * command returns 0 on success. The
* complete faster than the non-expedited ones, * "expedited" commands complete faster than
* they never block, but have the downside of * the non-expedited ones, they never block,
* causing extra overhead. * but have the downside of causing extra
* overhead. A process needs to register its
* intent to use the private expedited command
* prior to using it, otherwise this command
* returns -EPERM.
* @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
* Register the process intent to use
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
* returns 0.
* *
* Command to be passed to the membarrier system call. The commands need to * Command to be passed to the membarrier system call. The commands need to
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
...@@ -67,6 +75,7 @@ enum membarrier_cmd { ...@@ -67,6 +75,7 @@ enum membarrier_cmd {
/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
}; };
#endif /* _UAPI_LINUX_MEMBARRIER_H */ #endif /* _UAPI_LINUX_MEMBARRIER_H */
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/membarrier.h> #include <linux/membarrier.h>
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/atomic.h>
#include "sched.h" /* for cpu_rq(). */ #include "sched.h" /* for cpu_rq(). */
...@@ -26,21 +27,26 @@ ...@@ -26,21 +27,26 @@
* except MEMBARRIER_CMD_QUERY. * except MEMBARRIER_CMD_QUERY.
*/ */
#define MEMBARRIER_CMD_BITMASK \ #define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
static void ipi_mb(void *info) static void ipi_mb(void *info)
{ {
smp_mb(); /* IPIs should be serializing but paranoid. */ smp_mb(); /* IPIs should be serializing but paranoid. */
} }
static void membarrier_private_expedited(void) static int membarrier_private_expedited(void)
{ {
int cpu; int cpu;
bool fallback = false; bool fallback = false;
cpumask_var_t tmpmask; cpumask_var_t tmpmask;
if (!(atomic_read(&current->mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
return -EPERM;
if (num_online_cpus() == 1) if (num_online_cpus() == 1)
return; return 0;
/* /*
* Matches memory barriers around rq->curr modification in * Matches memory barriers around rq->curr modification in
...@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) ...@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
* rq->curr modification in scheduler. * rq->curr modification in scheduler.
*/ */
smp_mb(); /* exit from system call is not a mb */ smp_mb(); /* exit from system call is not a mb */
return 0;
}
static void membarrier_register_private_expedited(void)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
/*
* We need to consider threads belonging to different thread
* groups, which use the same mm. (CLONE_VM but not
* CLONE_THREAD).
*/
if (atomic_read(&mm->membarrier_state)
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
return;
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
&mm->membarrier_state);
} }
/** /**
...@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) ...@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
synchronize_sched(); synchronize_sched();
return 0; return 0;
case MEMBARRIER_CMD_PRIVATE_EXPEDITED: case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
membarrier_private_expedited(); return membarrier_private_expedited();
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
membarrier_register_private_expedited();
return 0; return 0;
default: default:
return -EINVAL; return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment