Commit 9b5b7751 authored by Srivatsa Vaddagiri's avatar Srivatsa Vaddagiri Committed by Ingo Molnar

sched: clean up code under CONFIG_FAIR_GROUP_SCHED

With the view of supporting user-id based fair scheduling (and not just
container-based fair scheduling), this patch renames several functions
and makes them independent of whether they are being used for container
or user-id based fair scheduling.

Also fix a problem reported by KAMEZAWA Hiroyuki (wrt allocating
less-sized array for tg->cfs_rq[] and tf->se[]).
Signed-off-by: default avatarSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: default avatarDhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 75c28ace
...@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu); ...@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
struct seq_file; struct seq_file;
struct cfs_rq; struct cfs_rq;
struct task_grp;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p); extern void proc_sched_set_task(struct task_struct *p);
...@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings; ...@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
extern void normalize_rt_tasks(void); extern void normalize_rt_tasks(void);
#ifdef CONFIG_FAIR_GROUP_SCHED
extern struct task_grp init_task_grp;
extern struct task_grp *sched_create_group(void);
extern void sched_destroy_group(struct task_grp *tg);
extern void sched_move_task(struct task_struct *tsk);
extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
#endif
#ifdef CONFIG_TASK_XACCT #ifdef CONFIG_TASK_XACCT
static inline void add_rchar(struct task_struct *tsk, ssize_t amt) static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
{ {
......
...@@ -282,13 +282,12 @@ config CPUSETS ...@@ -282,13 +282,12 @@ config CPUSETS
Say N if unsure. Say N if unsure.
config FAIR_GROUP_SCHED config FAIR_GROUP_SCHED
bool "Fair group scheduler" bool "Fair group cpu scheduler"
depends on EXPERIMENTAL && CONTAINERS default n
depends on EXPERIMENTAL
help help
This option enables you to group tasks and control CPU resource This feature lets cpu scheduler recognize task groups and control cpu
allocation to such groups. bandwidth allocation to such task groups.
Say N if unsure.
config SYSFS_DEPRECATED config SYSFS_DEPRECATED
bool "Create deprecated sysfs files" bool "Create deprecated sysfs files"
......
...@@ -173,13 +173,10 @@ struct rt_prio_array { ...@@ -173,13 +173,10 @@ struct rt_prio_array {
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
#include <linux/container.h>
struct cfs_rq; struct cfs_rq;
/* task group related information */ /* task group related information */
struct task_grp { struct task_grp {
struct container_subsys_state css;
/* schedulable entities of this group on each cpu */ /* schedulable entities of this group on each cpu */
struct sched_entity **se; struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */ /* runqueue "owned" by this group on each cpu */
...@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); ...@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
/* Default task group's cfs_rq on each cpu */ /* Default task group's cfs_rq on each cpu */
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS]; static struct sched_entity *init_sched_entity_p[NR_CPUS];
static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS]; static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
/* Default task group. /* Default task group.
* Every task in system belong to this group at bootup. * Every task in system belong to this group at bootup.
*/ */
static struct task_grp init_task_grp = { struct task_grp init_task_grp = {
.se = init_sched_entity_p, .se = init_sched_entity_p,
.cfs_rq = init_cfs_rq_p, .cfs_rq = init_cfs_rq_p,
}; };
#define INIT_TASK_GRP_LOAD NICE_0_LOAD
static int init_task_grp_load = INIT_TASK_GRP_LOAD;
/* return group to which a task belongs */ /* return group to which a task belongs */
static inline struct task_grp *task_grp(struct task_struct *p) static inline struct task_grp *task_grp(struct task_struct *p)
{ {
return container_of(task_subsys_state(p, cpu_subsys_id), struct task_grp *tg;
struct task_grp, css);
tg = &init_task_grp;
return tg;
} }
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
...@@ -250,6 +253,7 @@ struct cfs_rq { ...@@ -250,6 +253,7 @@ struct cfs_rq {
*/ */
struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
struct task_grp *tg; /* group that "owns" this runqueue */ struct task_grp *tg; /* group that "owns" this runqueue */
struct rcu_head rcu;
#endif #endif
}; };
...@@ -6513,11 +6517,12 @@ void __init sched_init(void) ...@@ -6513,11 +6517,12 @@ void __init sched_init(void)
init_sched_entity_p[i] = se; init_sched_entity_p[i] = se;
se->cfs_rq = &rq->cfs; se->cfs_rq = &rq->cfs;
se->my_q = cfs_rq; se->my_q = cfs_rq;
se->load.weight = NICE_0_LOAD; se->load.weight = init_task_grp_load;
se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD); se->load.inv_weight =
div64_64(1ULL<<32, init_task_grp_load);
se->parent = NULL; se->parent = NULL;
} }
init_task_grp.shares = NICE_0_LOAD; init_task_grp.shares = init_task_grp_load;
#endif #endif
for (j = 0; j < CPU_LOAD_IDX_MAX; j++) for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
...@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p) ...@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* return corresponding task_grp object of a container */
static inline struct task_grp *container_tg(struct container *cont)
{
return container_of(container_subsys_state(cont, cpu_subsys_id),
struct task_grp, css);
}
/* allocate runqueue etc for a new task group */ /* allocate runqueue etc for a new task group */
static struct container_subsys_state * struct task_grp *sched_create_group(void)
sched_create_group(struct container_subsys *ss, struct container *cont)
{ {
struct task_grp *tg; struct task_grp *tg;
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
struct sched_entity *se; struct sched_entity *se;
struct rq *rq;
int i; int i;
if (!cont->parent) {
/* This is early initialization for the top container */
init_task_grp.css.container = cont;
return &init_task_grp.css;
}
/* we support only 1-level deep hierarchical scheduler atm */
if (cont->parent->parent)
return ERR_PTR(-EINVAL);
tg = kzalloc(sizeof(*tg), GFP_KERNEL); tg = kzalloc(sizeof(*tg), GFP_KERNEL);
if (!tg) if (!tg)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL); tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
if (!tg->cfs_rq) if (!tg->cfs_rq)
goto err; goto err;
tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL); tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
if (!tg->se) if (!tg->se)
goto err; goto err;
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
struct rq *rq = cpu_rq(i); rq = cpu_rq(i);
cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
cpu_to_node(i)); cpu_to_node(i));
...@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont) ...@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
tg->cfs_rq[i] = cfs_rq; tg->cfs_rq[i] = cfs_rq;
init_cfs_rq(cfs_rq, rq); init_cfs_rq(cfs_rq, rq);
cfs_rq->tg = tg; cfs_rq->tg = tg;
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
tg->se[i] = se; tg->se[i] = se;
se->cfs_rq = &rq->cfs; se->cfs_rq = &rq->cfs;
...@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont) ...@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
se->parent = NULL; se->parent = NULL;
} }
tg->shares = NICE_0_LOAD; for_each_possible_cpu(i) {
rq = cpu_rq(i);
cfs_rq = tg->cfs_rq[i];
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
}
/* Bind the container to task_grp object we just created */ tg->shares = NICE_0_LOAD;
tg->css.container = cont;
return &tg->css; return tg;
err: err:
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
...@@ -6797,24 +6787,14 @@ sched_create_group(struct container_subsys *ss, struct container *cont) ...@@ -6797,24 +6787,14 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
/* rcu callback to free various structures associated with a task group */
/* destroy runqueue etc associated with a task group */ static void free_sched_group(struct rcu_head *rhp)
static void sched_destroy_group(struct container_subsys *ss,
struct container *cont)
{ {
struct task_grp *tg = container_tg(cont); struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
struct cfs_rq *cfs_rq; struct task_grp *tg = cfs_rq->tg;
struct sched_entity *se; struct sched_entity *se;
int i; int i;
for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
}
/* wait for possible concurrent references to cfs_rqs complete */
synchronize_sched();
/* now it should be safe to free those cfs_rqs */ /* now it should be safe to free those cfs_rqs */
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i]; cfs_rq = tg->cfs_rq[i];
...@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss, ...@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
kfree(tg); kfree(tg);
} }
static int sched_can_attach(struct container_subsys *ss, /* Destroy runqueue etc associated with a task group */
struct container *cont, struct task_struct *tsk) void sched_destroy_group(struct task_grp *tg)
{ {
/* We don't support RT-tasks being in separate groups */ struct cfs_rq *cfs_rq;
if (tsk->sched_class != &fair_sched_class) int i;
return -EINVAL;
return 0; for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
}
cfs_rq = tg->cfs_rq[0];
/* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&cfs_rq->rcu, free_sched_group);
} }
/* change task's runqueue when it moves between groups */ /* change task's runqueue when it moves between groups.
static void sched_move_task(struct container_subsys *ss, struct container *cont, * The caller of this function should have put the task in its new group
struct container *old_cont, struct task_struct *tsk) * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
* reflect its new group.
*/
void sched_move_task(struct task_struct *tsk)
{ {
int on_rq, running; int on_rq, running;
unsigned long flags; unsigned long flags;
...@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) ...@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
} }
static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype, int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
struct file *file, const char __user *userbuf,
size_t nbytes, loff_t *ppos)
{ {
int i; int i;
unsigned long shareval;
struct task_grp *tg = container_tg(cont);
char buffer[2*sizeof(unsigned long) + 1];
if (nbytes > 2*sizeof(unsigned long)) /* safety check */
return -E2BIG;
if (copy_from_user(buffer, userbuf, nbytes)) if (tg->shares == shares)
return -EFAULT; return 0;
buffer[nbytes] = 0; /* nul-terminate */ /* return -EINVAL if the new value is not sane */
shareval = simple_strtoul(buffer, NULL, 10);
tg->shares = shareval; tg->shares = shares;
for_each_possible_cpu(i) for_each_possible_cpu(i)
set_se_shares(tg->se[i], shareval); set_se_shares(tg->se[i], shares);
return nbytes;
}
static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
{
struct task_grp *tg = container_tg(cont);
return (u64) tg->shares;
}
struct cftype cpuctl_share = { return 0;
.name = "shares",
.read_uint = cpu_shares_read_uint,
.write = cpu_shares_write,
};
static int sched_populate(struct container_subsys *ss, struct container *cont)
{
return container_add_file(cont, ss, &cpuctl_share);
} }
struct container_subsys cpu_subsys = { #endif /* CONFIG_FAIR_GROUP_SCHED */
.name = "cpu",
.create = sched_create_group,
.destroy = sched_destroy_group,
.can_attach = sched_can_attach,
.attach = sched_move_task,
.populate = sched_populate,
.subsys_id = cpu_subsys_id,
.early_init = 1,
};
#endif /* CONFIG_FAIR_GROUP_SCHED */
...@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) ...@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
if (!cfs_rq->nr_running) if (!cfs_rq->nr_running)
return MAX_PRIO; return MAX_PRIO;
curr = __pick_next_entity(cfs_rq); curr = cfs_rq->curr;
if (!curr)
curr = __pick_next_entity(cfs_rq);
p = task_of(curr); p = task_of(curr);
return p->prio; return p->prio;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment