Commit a57eb940 authored by Paul E. McKenney's avatar Paul E. McKenney

rcu: Add a TINY_PREEMPT_RCU

Implement a small-memory-footprint uniprocessor-only implementation of
preemptible RCU.  This implementation uses but a single blocked-tasks
list rather than the combinatorial number used per leaf rcu_node by
TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies
processing.  This version also takes advantage of uniprocessor execution
to accelerate grace periods in the case where there are no readers.

The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU.

This implementation is a step towards having RCU implementation driven
off of the SMP and PREEMPT kernel configuration variables, which can
happen once this implementation has accumulated sufficient experience.

Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as
suggested by Steve Rostedt in order to avoid the compiler-reordering
issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183).

As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte
savings compared to CONFIG_TREE_PREEMPT_RCU.  Of course, for non-real-time
workloads, CONFIG_TINY_RCU is even better.

	CONFIG_TREE_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   6170	    825	     28	   7023	   kernel/rcutree.o
				   ----
				   7026    Total

	CONFIG_TINY_PREEMPT_RCU

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	   2081	     81	      8	   2170	   kernel/rcutiny.o
				   ----
				   2183    Total

	CONFIG_TINY_RCU (non-preemptible)

	   text	   data	    bss	    dec	   filename
	     13	      0	      0	     13	   kernel/rcupdate.o
	    719	     25	      0	    744	   kernel/rcutiny.o
				    ---
				    757    Total
Requested-by: default avatarLoïc Minier <loic.minier@canonical.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
parent 4d87ffad
......@@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
#endif
#if defined(CONFIG_NO_HZ)
#if defined(CONFIG_TINY_RCU)
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
extern void rcu_enter_nohz(void);
extern void rcu_exit_nohz(void);
......
......@@ -82,11 +82,17 @@ extern struct group_info init_groups;
# define CAP_INIT_BSET CAP_FULL_SET
#ifdef CONFIG_TREE_PREEMPT_RCU
#define INIT_TASK_RCU_TREE_PREEMPT() \
.rcu_blocked_node = NULL,
#else
#define INIT_TASK_RCU_TREE_PREEMPT(tsk)
#endif
#ifdef CONFIG_PREEMPT_RCU
#define INIT_TASK_RCU_PREEMPT(tsk) \
.rcu_read_lock_nesting = 0, \
.rcu_read_unlock_special = 0, \
.rcu_blocked_node = NULL, \
.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
INIT_TASK_RCU_TREE_PREEMPT()
#else
#define INIT_TASK_RCU_PREEMPT(tsk)
#endif
......
......@@ -58,7 +58,6 @@ struct rcu_head {
};
/* Exported common interfaces */
extern void rcu_barrier(void);
extern void rcu_barrier_bh(void);
extern void rcu_barrier_sched(void);
extern void synchronize_sched_expedited(void);
......@@ -69,7 +68,7 @@ extern void rcu_init(void);
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
#include <linux/rcutree.h>
#elif defined(CONFIG_TINY_RCU)
#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
#include <linux/rcutiny.h>
#else
#error "Unknown RCU implementation specified to kernel configuration"
......
......@@ -29,66 +29,51 @@
void rcu_sched_qs(int cpu);
void rcu_bh_qs(int cpu);
static inline void rcu_note_context_switch(int cpu)
{
rcu_sched_qs(cpu);
}
#ifdef CONFIG_TINY_RCU
#define __rcu_read_lock() preempt_disable()
#define __rcu_read_unlock() preempt_enable()
#else /* #ifdef CONFIG_TINY_RCU */
void __rcu_read_lock(void);
void __rcu_read_unlock(void);
#endif /* #else #ifdef CONFIG_TINY_RCU */
#define __rcu_read_lock_bh() local_bh_disable()
#define __rcu_read_unlock_bh() local_bh_enable()
#define call_rcu_sched call_rcu
extern void call_rcu_sched(struct rcu_head *head,
void (*func)(struct rcu_head *rcu));
#define rcu_init_sched() do { } while (0)
extern void rcu_check_callbacks(int cpu, int user);
static inline int rcu_needs_cpu(int cpu)
{
return 0;
}
extern void synchronize_sched(void);
/*
* Return the number of grace periods.
*/
static inline long rcu_batches_completed(void)
{
return 0;
}
#ifdef CONFIG_TINY_RCU
/*
* Return the number of bottom-half grace periods.
*/
static inline long rcu_batches_completed_bh(void)
{
return 0;
}
#define call_rcu call_rcu_sched
static inline void rcu_force_quiescent_state(void)
static inline void synchronize_rcu(void)
{
synchronize_sched();
}
static inline void rcu_bh_force_quiescent_state(void)
static inline void synchronize_rcu_expedited(void)
{
synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
}
static inline void rcu_sched_force_quiescent_state(void)
static inline void rcu_barrier(void)
{
rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
}
extern void synchronize_sched(void);
#else /* #ifdef CONFIG_TINY_RCU */
static inline void synchronize_rcu(void)
{
synchronize_sched();
}
void synchronize_rcu(void);
void rcu_barrier(void);
void synchronize_rcu_expedited(void);
static inline void synchronize_rcu_bh(void)
{
synchronize_sched();
}
#endif /* #else #ifdef CONFIG_TINY_RCU */
static inline void synchronize_rcu_expedited(void)
static inline void synchronize_rcu_bh(void)
{
synchronize_sched();
}
......@@ -117,15 +102,82 @@ static inline void rcu_exit_nohz(void)
#endif /* #else #ifdef CONFIG_NO_HZ */
#ifdef CONFIG_TINY_RCU
static inline void rcu_preempt_note_context_switch(void)
{
}
static inline void exit_rcu(void)
{
}
static inline int rcu_needs_cpu(int cpu)
{
return 0;
}
static inline int rcu_preempt_depth(void)
{
return 0;
}
#else /* #ifdef CONFIG_TINY_RCU */
void rcu_preempt_note_context_switch(void);
extern void exit_rcu(void);
int rcu_preempt_needs_cpu(void);
static inline int rcu_needs_cpu(int cpu)
{
return rcu_preempt_needs_cpu();
}
/*
* Defined as macro as it is a very low level header
* included from areas that don't even know about current
* FIXME: combine with include/linux/rcutree.h into rcupdate.h.
*/
#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
#endif /* #else #ifdef CONFIG_TINY_RCU */
static inline void rcu_note_context_switch(int cpu)
{
rcu_sched_qs(cpu);
rcu_preempt_note_context_switch();
}
extern void rcu_check_callbacks(int cpu, int user);
/*
* Return the number of grace periods.
*/
static inline long rcu_batches_completed(void)
{
return 0;
}
/*
* Return the number of bottom-half grace periods.
*/
static inline long rcu_batches_completed_bh(void)
{
return 0;
}
static inline void rcu_force_quiescent_state(void)
{
}
static inline void rcu_bh_force_quiescent_state(void)
{
}
static inline void rcu_sched_force_quiescent_state(void)
{
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern int rcu_scheduler_active __read_mostly;
......
......@@ -95,6 +95,8 @@ static inline void synchronize_rcu_bh_expedited(void)
synchronize_sched_expedited();
}
extern void rcu_barrier(void);
extern void rcu_check_callbacks(int cpu, int user);
extern long rcu_batches_completed(void);
......
......@@ -1202,11 +1202,13 @@ struct task_struct {
unsigned int policy;
cpumask_t cpus_allowed;
#ifdef CONFIG_TREE_PREEMPT_RCU
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
struct rcu_node *rcu_blocked_node;
struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
......@@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
#ifdef CONFIG_TREE_PREEMPT_RCU
#ifdef CONFIG_PREEMPT_RCU
#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
......@@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p)
{
p->rcu_read_lock_nesting = 0;
p->rcu_read_unlock_special = 0;
#ifdef CONFIG_TREE_PREEMPT_RCU
p->rcu_blocked_node = NULL;
#endif
INIT_LIST_HEAD(&p->rcu_node_entry);
}
......
......@@ -348,7 +348,7 @@ config TREE_RCU
smaller systems.
config TREE_PREEMPT_RCU
bool "Preemptable tree-based hierarchical RCU"
bool "Preemptible tree-based hierarchical RCU"
depends on PREEMPT
help
This option selects the RCU implementation that is
......@@ -366,8 +366,22 @@ config TINY_RCU
is not required. This option greatly reduces the
memory footprint of RCU.
config TINY_PREEMPT_RCU
bool "Preemptible UP-only small-memory-footprint RCU"
depends on !SMP && PREEMPT
help
This option selects the RCU implementation that is designed
for real-time UP systems. This option greatly reduces the
memory footprint of RCU.
endchoice
config PREEMPT_RCU
def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
help
This option enables preemptible-RCU code that is common between
the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
config RCU_TRACE
bool "Enable tracing for RCU"
depends on TREE_RCU || TREE_PREEMPT_RCU
......
......@@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
obj-$(CONFIG_TINY_RCU) += rcutiny.o
obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
......
......@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/* Forward declarations for rcutiny_plugin.h. */
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
#include "rcutiny_plugin.h"
#ifdef CONFIG_NO_HZ
static long rcu_dynticks_nesting = 1;
......@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
rcu_preempt_check_callbacks();
}
/*
......@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
*rcp->donetail = NULL;
if (rcp->curtail == rcp->donetail)
rcp->curtail = &rcp->rcucblist;
rcu_preempt_remove_callbacks(rcp);
rcp->donetail = &rcp->rcucblist;
local_irq_restore(flags);
......@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
rcu_preempt_process_callbacks();
}
/*
......@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
}
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* Post an RCU callback to be invoked after the end of an RCU-sched grace
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_ctrlblk);
}
EXPORT_SYMBOL_GPL(call_rcu);
EXPORT_SYMBOL_GPL(call_rcu_sched);
/*
* Post an RCU bottom-half callback to be invoked after any subsequent
......@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
void rcu_barrier(void)
{
struct rcu_synchronize rcu;
init_rcu_head_on_stack(&rcu.head);
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
destroy_rcu_head_on_stack(&rcu.head);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
void rcu_barrier_bh(void)
{
struct rcu_synchronize rcu;
......@@ -289,5 +286,3 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
#include "rcutiny_plugin.h"
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment