Commit f317ff9e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue changes from Tejun Heo:
 "Surprisingly, Lai and I didn't break too many things implementing
  custom pools and stuff last time around and there aren't any follow-up
  changes necessary at this point.

  The only change in this pull request is Viresh's patches to make some
  per-cpu workqueues to behave as unbound workqueues dependent on a boot
  param whose default can be configured via a config option.  This leads
  to higher processing overhead / lower bandwidth as more work items are
  bounced across CPUs; however, it can lead to noticeable powersave in
  certain configurations - ~10% w/ idlish constant workload on a
  big.LITTLE configuration according to Viresh.

  This is because per-cpu workqueues interfere with how the scheduler
  perceives whether or not each CPU is idle by forcing pinned tasks on
  them, which makes the scheduler's power-aware scheduling decisions
  less effective.

  Its effectiveness is likely less pronounced on homogenous
  configurations and this type of optimization can probably be made
  automatic; however, the changes are pretty minimal and the affected
  workqueues are clearly marked, so it's an easy gain for some
  configurations for the time being with pretty unintrusive changes."

* 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  fbcon: queue work on power efficient wq
  block: queue work on power efficient wq
  PHYLIB: queue work on system_power_efficient_wq
  workqueue: Add system wide power_efficient workqueues
  workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented workqueues
parents 13cc5601 a85f1a41
...@@ -3341,6 +3341,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ...@@ -3341,6 +3341,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that this also can be controlled per-workqueue for that this also can be controlled per-workqueue for
workqueues visible under /sys/bus/workqueue/. workqueues visible under /sys/bus/workqueue/.
workqueue.power_efficient
Per-cpu workqueues are generally preferred because
they show better performance thanks to cache
locality; unfortunately, per-cpu workqueues tend to
be more power hungry than unbound workqueues.
Enabling this makes the per-cpu workqueues which
were observed to contribute significantly to power
consumption unbound, leading to measurably lower
power usage at the cost of small performance
overhead.
The default value of this parameter is determined by
the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms default x2apic cluster mode on platforms
supporting x2apic. supporting x2apic.
......
...@@ -3180,7 +3180,8 @@ int __init blk_dev_init(void) ...@@ -3180,7 +3180,8 @@ int __init blk_dev_init(void)
/* used for unplugging and affects IO latency/throughput - HIGHPRI */ /* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd", kblockd_workqueue = alloc_workqueue("kblockd",
WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); WQ_MEM_RECLAIM | WQ_HIGHPRI |
WQ_POWER_EFFICIENT, 0);
if (!kblockd_workqueue) if (!kblockd_workqueue)
panic("Failed to create kblockd\n"); panic("Failed to create kblockd\n");
......
...@@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc) ...@@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) { if (atomic_long_dec_and_test(&ioc->refcount)) {
spin_lock_irqsave(&ioc->lock, flags); spin_lock_irqsave(&ioc->lock, flags);
if (!hlist_empty(&ioc->icq_list)) if (!hlist_empty(&ioc->icq_list))
schedule_work(&ioc->release_work); queue_work(system_power_efficient_wq,
&ioc->release_work);
else else
free_ioc = true; free_ioc = true;
spin_unlock_irqrestore(&ioc->lock, flags); spin_unlock_irqrestore(&ioc->lock, flags);
......
...@@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now) ...@@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk); intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4); set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now) if (check_now)
queue_delayed_work(system_freezable_wq, &ev->dwork, 0); queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, 0);
else if (intv) else if (intv)
queue_delayed_work(system_freezable_wq, &ev->dwork, intv); queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, intv);
out_unlock: out_unlock:
spin_unlock_irqrestore(&ev->lock, flags); spin_unlock_irqrestore(&ev->lock, flags);
} }
...@@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) ...@@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_lock_irq(&ev->lock); spin_lock_irq(&ev->lock);
ev->clearing |= mask; ev->clearing |= mask;
if (!ev->block) if (!ev->block)
mod_delayed_work(system_freezable_wq, &ev->dwork, 0); mod_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, 0);
spin_unlock_irq(&ev->lock); spin_unlock_irq(&ev->lock);
} }
...@@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev, ...@@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev,
intv = disk_events_poll_jiffies(disk); intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv) if (!ev->block && intv)
queue_delayed_work(system_freezable_wq, &ev->dwork, intv); queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, intv);
spin_unlock_irq(&ev->lock); spin_unlock_irq(&ev->lock);
......
...@@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev, ...@@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
{ {
phydev->adjust_state = handler; phydev->adjust_state = handler;
schedule_delayed_work(&phydev->state_queue, HZ); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
} }
/** /**
...@@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) ...@@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
disable_irq_nosync(irq); disable_irq_nosync(irq);
atomic_inc(&phydev->irq_disable); atomic_inc(&phydev->irq_disable);
schedule_work(&phydev->phy_queue); queue_work(system_power_efficient_wq, &phydev->phy_queue);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work) ...@@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
/* reschedule state queue work to run as soon as possible */ /* reschedule state queue work to run as soon as possible */
cancel_delayed_work_sync(&phydev->state_queue); cancel_delayed_work_sync(&phydev->state_queue);
schedule_delayed_work(&phydev->state_queue, 0); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
return; return;
...@@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work) ...@@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
if (err < 0) if (err < 0)
phy_error(phydev); phy_error(phydev);
schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ); queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
PHY_STATE_TIME * HZ);
} }
static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad, static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
......
...@@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr) ...@@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
struct fb_info *info = (struct fb_info *) dev_addr; struct fb_info *info = (struct fb_info *) dev_addr;
struct fbcon_ops *ops = info->fbcon_par; struct fbcon_ops *ops = info->fbcon_par;
schedule_work(&info->queue); queue_work(system_power_efficient_wq, &info->queue);
mod_timer(&ops->cursor_timer, jiffies + HZ/5); mod_timer(&ops->cursor_timer, jiffies + HZ/5);
} }
......
...@@ -303,6 +303,33 @@ enum { ...@@ -303,6 +303,33 @@ enum {
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
/*
* Per-cpu workqueues are generally preferred because they tend to
* show better performance thanks to cache locality. Per-cpu
* workqueues exclude the scheduler from choosing the CPU to
* execute the worker threads, which has an unfortunate side effect
* of increasing power consumption.
*
* The scheduler considers a CPU idle if it doesn't have any task
* to execute and tries to keep idle cores idle to conserve power;
* however, for example, a per-cpu work item scheduled from an
* interrupt handler on an idle CPU will force the scheduler to
* excute the work item on that CPU breaking the idleness, which in
* turn may lead to more scheduling choices which are sub-optimal
* in terms of power consumption.
*
* Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
* but become unbound if workqueue.power_efficient kernel param is
* specified. Per-cpu workqueues which are identified to
* contribute significantly to power-consumption are identified and
* marked with this flag and enabling the power_efficient mode
* leads to noticeable power saving at the cost of small
* performance disadvantage.
*
* http://thread.gmane.org/gmane.linux.kernel/1480396
*/
WQ_POWER_EFFICIENT = 1 << 7,
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
...@@ -333,11 +360,19 @@ enum { ...@@ -333,11 +360,19 @@ enum {
* *
* system_freezable_wq is equivalent to system_wq except that it's * system_freezable_wq is equivalent to system_wq except that it's
* freezable. * freezable.
*
* *_power_efficient_wq are inclined towards saving power and converted
* into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
* they are same as their non-power-efficient counterparts - e.g.
* system_power_efficient_wq is identical to system_wq if
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*/ */
extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq;
static inline struct workqueue_struct * __deprecated __system_nrt_wq(void) static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
{ {
......
...@@ -262,6 +262,26 @@ config PM_GENERIC_DOMAINS ...@@ -262,6 +262,26 @@ config PM_GENERIC_DOMAINS
bool bool
depends on PM depends on PM
config WQ_POWER_EFFICIENT_DEFAULT
bool "Enable workqueue power-efficient mode by default"
depends on PM
default n
help
Per-cpu workqueues are generally preferred because they show
better performance thanks to cache locality; unfortunately,
per-cpu workqueues tend to be more power hungry than unbound
workqueues.
Enabling workqueue.power_efficient kernel parameter makes the
per-cpu workqueues which were observed to contribute
significantly to power consumption unbound, leading to measurably
lower power usage at the cost of small performance overhead.
This config option determines whether workqueue.power_efficient
is enabled by default.
If in doubt, say N.
config PM_GENERIC_DOMAINS_SLEEP config PM_GENERIC_DOMAINS_SLEEP
def_bool y def_bool y
depends on PM_SLEEP && PM_GENERIC_DOMAINS depends on PM_SLEEP && PM_GENERIC_DOMAINS
......
...@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask; ...@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
static bool wq_disable_numa; static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444); module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
static bool wq_power_efficient = true;
#else
static bool wq_power_efficient;
#endif
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
...@@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly; ...@@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_unbound_wq); EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly; struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq); EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_power_efficient_wq);
struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker); static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to, static void copy_workqueue_attrs(struct workqueue_attrs *to,
...@@ -4086,6 +4099,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, ...@@ -4086,6 +4099,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct pool_workqueue *pwq; struct pool_workqueue *pwq;
/* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND;
/* allocate wq and format name */ /* allocate wq and format name */
if (flags & WQ_UNBOUND) if (flags & WQ_UNBOUND)
tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
...@@ -4985,8 +5002,15 @@ static int __init init_workqueues(void) ...@@ -4985,8 +5002,15 @@ static int __init init_workqueues(void)
WQ_UNBOUND_MAX_ACTIVE); WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable", system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0); WQ_FREEZABLE, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
WQ_POWER_EFFICIENT, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
WQ_FREEZABLE | WQ_POWER_EFFICIENT,
0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq || BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
!system_unbound_wq || !system_freezable_wq); !system_unbound_wq || !system_freezable_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq);
return 0; return 0;
} }
early_initcall(init_workqueues); early_initcall(init_workqueues);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment