Commit cbae6bac authored by Mathieu Desnoyers's avatar Mathieu Desnoyers Committed by Peter Zijlstra

rseq: Extend struct rseq with numa node id

Adding the NUMA node id to struct rseq is a straightforward thing to do,
and a good way to figure out if anything in the user-space ecosystem
prevents extending struct rseq.

This NUMA node id field allows memory allocators such as tcmalloc to
take advantage of fast access to the current NUMA node id to perform
NUMA-aware memory allocation.

It can also be useful for implementing fast-paths for NUMA-aware
user-space mutexes.

It also allows implementing getcpu(2) purely in user-space.
Signed-off-by: default avatarMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221122203932.231377-5-mathieu.desnoyers@efficios.com
parent ee3e3ac0
...@@ -16,13 +16,15 @@ TRACE_EVENT(rseq_update, ...@@ -16,13 +16,15 @@ TRACE_EVENT(rseq_update,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(s32, cpu_id) __field(s32, cpu_id)
__field(s32, node_id)
), ),
TP_fast_assign( TP_fast_assign(
__entry->cpu_id = raw_smp_processor_id(); __entry->cpu_id = raw_smp_processor_id();
__entry->node_id = cpu_to_node(__entry->cpu_id);
), ),
TP_printk("cpu_id=%d", __entry->cpu_id) TP_printk("cpu_id=%d node_id=%d", __entry->cpu_id, __entry->node_id)
); );
TRACE_EVENT(rseq_ip_fixup, TRACE_EVENT(rseq_ip_fixup,
......
...@@ -131,6 +131,14 @@ struct rseq { ...@@ -131,6 +131,14 @@ struct rseq {
*/ */
__u32 flags; __u32 flags;
/*
* Restartable sequences node_id field. Updated by the kernel. Read by
* user-space with single-copy atomicity semantics. This field should
* only be read by the thread which registered this data structure.
* Aligned on 32-bit. Contains the current NUMA node ID.
*/
__u32 node_id;
/* /*
* Flexible array member at end of structure, after last feature field. * Flexible array member at end of structure, after last feature field.
*/ */
......
...@@ -85,15 +85,17 @@ ...@@ -85,15 +85,17 @@
* F1. <failure> * F1. <failure>
*/ */
static int rseq_update_cpu_id(struct task_struct *t) static int rseq_update_cpu_node_id(struct task_struct *t)
{ {
u32 cpu_id = raw_smp_processor_id();
struct rseq __user *rseq = t->rseq; struct rseq __user *rseq = t->rseq;
u32 cpu_id = raw_smp_processor_id();
u32 node_id = cpu_to_node(cpu_id);
if (!user_write_access_begin(rseq, t->rseq_len)) if (!user_write_access_begin(rseq, t->rseq_len))
goto efault; goto efault;
unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
unsafe_put_user(node_id, &rseq->node_id, efault_end);
/* /*
* Additional feature fields added after ORIG_RSEQ_SIZE * Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally updated only if * need to be conditionally updated only if
...@@ -109,9 +111,9 @@ static int rseq_update_cpu_id(struct task_struct *t) ...@@ -109,9 +111,9 @@ static int rseq_update_cpu_id(struct task_struct *t)
return -EFAULT; return -EFAULT;
} }
static int rseq_reset_rseq_cpu_id(struct task_struct *t) static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{ {
u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED; u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0;
/* /*
* Reset cpu_id_start to its initial state (0). * Reset cpu_id_start to its initial state (0).
...@@ -125,6 +127,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) ...@@ -125,6 +127,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
*/ */
if (put_user(cpu_id, &t->rseq->cpu_id)) if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT; return -EFAULT;
/*
* Reset node_id to its initial state (0).
*/
if (put_user(node_id, &t->rseq->node_id))
return -EFAULT;
/* /*
* Additional feature fields added after ORIG_RSEQ_SIZE * Additional feature fields added after ORIG_RSEQ_SIZE
* need to be conditionally reset only if * need to be conditionally reset only if
...@@ -314,7 +321,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) ...@@ -314,7 +321,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(ret < 0)) if (unlikely(ret < 0))
goto error; goto error;
} }
if (unlikely(rseq_update_cpu_id(t))) if (unlikely(rseq_update_cpu_node_id(t)))
goto error; goto error;
return; return;
...@@ -361,7 +368,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, ...@@ -361,7 +368,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
return -EINVAL; return -EINVAL;
if (current->rseq_sig != sig) if (current->rseq_sig != sig)
return -EPERM; return -EPERM;
ret = rseq_reset_rseq_cpu_id(current); ret = rseq_reset_rseq_cpu_node_id(current);
if (ret) if (ret)
return ret; return ret;
current->rseq = NULL; current->rseq = NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment