Commit 3bc1e711 authored by Tejun Heo's avatar Tejun Heo

workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered

5c0338c6 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered")
automoatically promoted UNBOUND workqueues w/ @max_active==1 to ordered
workqueues because UNBOUND workqueues w/ @max_active==1 used to be the way
to create ordered workqueues and the new NUMA support broke it. These
problems can be subtle and the fact that they can only trigger on NUMA
machines made them even more difficult to debug.

However, overloading the UNBOUND allocation interface this way creates other
issues. It's difficult to tell whether a given workqueue actually needs to
be ordered and users that legitimately want a min concurrency level wq
unexpectedly gets an ordered one instead. With planned UNBOUND workqueue
udpates to improve execution locality and more prevalence of chiplet designs
which can benefit from such improvements, this isn't a state we wanna be in
forever.

There aren't that many UNBOUND w/ @max_active==1 users in the tree and the
preceding patches audited all and converted them to
alloc_ordered_workqueue() as appropriate. This patch removes the implicit
promotion of UNBOUND w/ @max_active==1 workqueues to ordered ones.

v2: v1 patch incorrectly dropped !list_empty(&wq->pwqs) condition in
    apply_workqueue_attrs_locked() which spuriously triggers WARNING and
    fails workqueue creation. Fix it.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reported-by: default avatarkernel test robot <oliver.sang@intel.com>
Link: https://lore.kernel.org/oe-lkp/202304251050.45a5df1f-oliver.sang@intel.com
parent 8eb17dc1
...@@ -256,15 +256,11 @@ may queue at the same time. Unless there is a specific need for ...@@ -256,15 +256,11 @@ may queue at the same time. Unless there is a specific need for
throttling the number of active work items, specifying '0' is throttling the number of active work items, specifying '0' is
recommended. recommended.
Some users depend on the strict execution ordering of ST wq. The Some users depend on strict execution ordering where only one work item
combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to is in flight at any given time and the work items are processed in
achieve this behavior. Work items on such wq were always queued to the queueing order. While the combination of ``@max_active`` of 1 and
unbound worker-pools and only one work item could be active at any given ``WQ_UNBOUND`` used to achieve this behavior, this is no longer the
time thus achieving the same ordering property as ST wq. case. Use ``alloc_ordered_queue()`` instead.
In the current implementation the above configuration only guarantees
ST behavior within a given NUMA node. Instead ``alloc_ordered_workqueue()`` should
be used to achieve system-wide ST behavior.
Example Execution Scenarios Example Execution Scenarios
......
...@@ -392,7 +392,6 @@ enum wq_flags { ...@@ -392,7 +392,6 @@ enum wq_flags {
__WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */
__WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */
__WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */
__WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */
/* BH wq only allows the following flags */ /* BH wq only allows the following flags */
__WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI,
...@@ -507,8 +506,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); ...@@ -507,8 +506,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
* Pointer to the allocated workqueue on success, %NULL on failure. * Pointer to the allocated workqueue on success, %NULL on failure.
*/ */
#define alloc_ordered_workqueue(fmt, flags, args...) \ #define alloc_ordered_workqueue(fmt, flags, args...) \
alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
__WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
#define create_workqueue(name) \ #define create_workqueue(name) \
alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
......
...@@ -5007,13 +5007,9 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, ...@@ -5007,13 +5007,9 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
return -EINVAL; return -EINVAL;
/* creating multiple pwqs breaks ordering guarantee */ /* creating multiple pwqs breaks ordering guarantee */
if (!list_empty(&wq->pwqs)) { if (!list_empty(&wq->pwqs) && WARN_ON(wq->flags & __WQ_ORDERED))
if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
return -EINVAL; return -EINVAL;
wq->flags &= ~__WQ_ORDERED;
}
ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask); ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return PTR_ERR(ctx); return PTR_ERR(ctx);
...@@ -5333,15 +5329,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, ...@@ -5333,15 +5329,6 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return NULL; return NULL;
} }
/*
* Unbound && max_active == 1 used to imply ordered, which is no longer
* the case on many machines due to per-pod pools. While
* alloc_ordered_workqueue() is the right way to create an ordered
* workqueue, keep the previous behavior to avoid subtle breakages.
*/
if ((flags & WQ_UNBOUND) && max_active == 1)
flags |= __WQ_ORDERED;
/* see the comment above the definition of WQ_POWER_EFFICIENT */ /* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND; flags |= WQ_UNBOUND;
...@@ -5564,14 +5551,13 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) ...@@ -5564,14 +5551,13 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
if (WARN_ON(wq->flags & WQ_BH)) if (WARN_ON(wq->flags & WQ_BH))
return; return;
/* disallow meddling with max_active for ordered workqueues */ /* disallow meddling with max_active for ordered workqueues */
if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) if (WARN_ON(wq->flags & __WQ_ORDERED))
return; return;
max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
mutex_lock(&wq->mutex); mutex_lock(&wq->mutex);
wq->flags &= ~__WQ_ORDERED;
wq->saved_max_active = max_active; wq->saved_max_active = max_active;
if (wq->flags & WQ_UNBOUND) if (wq->flags & WQ_UNBOUND)
wq->saved_min_active = min(wq->saved_min_active, max_active); wq->saved_min_active = min(wq->saved_min_active, max_active);
...@@ -7028,7 +7014,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) ...@@ -7028,7 +7014,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
* attributes breaks ordering guarantee. Disallow exposing ordered * attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues. * workqueues.
*/ */
if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT)) if (WARN_ON(wq->flags & __WQ_ORDERED))
return -EINVAL; return -EINVAL;
wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment