Commit 8aa7e847 authored by Jens Axboe's avatar Jens Axboe Committed by Jens Axboe

Fix congestion_wait() sync/async vs read/write confusion

Commit 1faa16d2 accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent c2cc49a2
...@@ -751,7 +751,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from, ...@@ -751,7 +751,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
if (retval == -ENOMEM && is_global_init(current)) { if (retval == -ENOMEM && is_global_init(current)) {
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
goto survive; goto survive;
} }
......
...@@ -1372,8 +1372,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd) ...@@ -1372,8 +1372,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
wakeup = (pd->write_congestion_on > 0 wakeup = (pd->write_congestion_on > 0
&& pd->bio_queue_size <= pd->write_congestion_off); && pd->bio_queue_size <= pd->write_congestion_off);
spin_unlock(&pd->lock); spin_unlock(&pd->lock);
if (wakeup) if (wakeup) {
clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE); clear_bdi_congested(&pd->disk->queue->backing_dev_info,
BLK_RW_ASYNC);
}
pkt->sleep_time = max(PACKET_WAIT_TIME, 1); pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE); pkt_set_state(pkt, PACKET_WAITING_STATE);
...@@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) ...@@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
spin_lock(&pd->lock); spin_lock(&pd->lock);
if (pd->write_congestion_on > 0 if (pd->write_congestion_on > 0
&& pd->bio_queue_size >= pd->write_congestion_on) { && pd->bio_queue_size >= pd->write_congestion_on) {
set_bdi_congested(&q->backing_dev_info, WRITE); set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
do { do {
spin_unlock(&pd->lock); spin_unlock(&pd->lock);
congestion_wait(WRITE, HZ); congestion_wait(BLK_RW_ASYNC, HZ);
spin_lock(&pd->lock); spin_lock(&pd->lock);
} while(pd->bio_queue_size > pd->write_congestion_off); } while(pd->bio_queue_size > pd->write_congestion_off);
} }
......
...@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) ...@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* But don't wait if split was due to the io size restriction * But don't wait if split was due to the io size restriction
*/ */
if (unlikely(out_of_pages)) if (unlikely(out_of_pages))
congestion_wait(WRITE, HZ/100); congestion_wait(BLK_RW_ASYNC, HZ/100);
/* /*
* With async crypto it is unsafe to share the crypto context * With async crypto it is unsafe to share the crypto context
......
...@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp) ...@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
if ((filp->f_mode & FMODE_WRITE) && if ((filp->f_mode & FMODE_WRITE) &&
MSDOS_SB(inode->i_sb)->options.flush) { MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL); fat_flush_inodes(inode->i_sb, inode, NULL);
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
} }
return 0; return 0;
} }
......
...@@ -286,8 +286,8 @@ __releases(&fc->lock) ...@@ -286,8 +286,8 @@ __releases(&fc->lock)
} }
if (fc->num_background == FUSE_CONGESTION_THRESHOLD && if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
fc->connected && fc->bdi_initialized) { fc->connected && fc->bdi_initialized) {
clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
clear_bdi_congested(&fc->bdi, WRITE); clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
} }
fc->num_background--; fc->num_background--;
fc->active_background--; fc->active_background--;
...@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, ...@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
fc->blocked = 1; fc->blocked = 1;
if (fc->num_background == FUSE_CONGESTION_THRESHOLD && if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
fc->bdi_initialized) { fc->bdi_initialized) {
set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
set_bdi_congested(&fc->bdi, WRITE); set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
} }
list_add_tail(&req->list, &fc->bg_queue); list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc); flush_bg_queue(fc);
......
...@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page) ...@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_server *nfss = NFS_SERVER(inode);
if (atomic_long_inc_return(&nfss->writeback) > if (atomic_long_inc_return(&nfss->writeback) >
NFS_CONGESTION_ON_THRESH) NFS_CONGESTION_ON_THRESH) {
set_bdi_congested(&nfss->backing_dev_info, WRITE); set_bdi_congested(&nfss->backing_dev_info,
BLK_RW_ASYNC);
}
} }
return ret; return ret;
} }
...@@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page) ...@@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
end_page_writeback(page); end_page_writeback(page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, WRITE); clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
} }
/* /*
......
...@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s) ...@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s); struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle)) if (atomic_read(&j->j_async_throttle))
congestion_wait(WRITE, HZ / 10); congestion_wait(BLK_RW_ASYNC, HZ / 10);
return 0; return 0;
} }
......
...@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) ...@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
printk(KERN_ERR "XFS: possible memory allocation " printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n", "deadlock in %s (mode:0x%x)\n",
__func__, lflags); __func__, lflags);
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1); } while (1);
} }
...@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) ...@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
printk(KERN_ERR "XFS: possible memory allocation " printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n", "deadlock in %s (mode:0x%x)\n",
__func__, lflags); __func__, lflags);
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1); } while (1);
} }
......
...@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages( ...@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
XFS_STATS_INC(xb_page_retries); XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask); xfsbufd_wakeup(0, gfp_mask);
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry; goto retry;
} }
......
...@@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) ...@@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << BDI_async_congested)); (1 << BDI_async_congested));
} }
void clear_bdi_congested(struct backing_dev_info *bdi, int rw); void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
void set_bdi_congested(struct backing_dev_info *bdi, int rw); void set_bdi_congested(struct backing_dev_info *bdi, int sync);
long congestion_wait(int rw, long timeout); long congestion_wait(int sync, long timeout);
static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
......
...@@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, ...@@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
* congested queues, and wake up anyone who was waiting for requests to be * congested queues, and wake up anyone who was waiting for requests to be
* put back. * put back.
*/ */
static inline void blk_clear_queue_congested(struct request_queue *q, int rw) static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
{ {
clear_bdi_congested(&q->backing_dev_info, rw); clear_bdi_congested(&q->backing_dev_info, sync);
} }
/* /*
* A queue has just entered congestion. Flag that in the queue's VM-visible * A queue has just entered congestion. Flag that in the queue's VM-visible
* state flags and increment the global gounter of congested queues. * state flags and increment the global gounter of congested queues.
*/ */
static inline void blk_set_queue_congested(struct request_queue *q, int rw) static inline void blk_set_queue_congested(struct request_queue *q, int sync)
{ {
set_bdi_congested(&q->backing_dev_info, rw); set_bdi_congested(&q->backing_dev_info, sync);
} }
extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue(struct request_queue *q);
......
...@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = { ...@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
}; };
void clear_bdi_congested(struct backing_dev_info *bdi, int sync) void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
{ {
enum bdi_state bit; enum bdi_state bit;
...@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested); ...@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested);
/** /**
* congestion_wait - wait for a backing_dev to become uncongested * congestion_wait - wait for a backing_dev to become uncongested
* @rw: READ or WRITE * @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies * @timeout: timeout in jiffies
* *
* Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
* write congestion. If no backing_devs are congested then just wait for the * write congestion. If no backing_devs are congested then just wait for the
* next write to be completed. * next write to be completed.
*/ */
long congestion_wait(int rw, long timeout) long congestion_wait(int sync, long timeout)
{ {
long ret; long ret;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[rw]; wait_queue_head_t *wqh = &congestion_wqh[sync];
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout); ret = io_schedule_timeout(timeout);
......
...@@ -1973,7 +1973,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) ...@@ -1973,7 +1973,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
if (!progress) { if (!progress) {
nr_retries--; nr_retries--;
/* maybe some writeback is necessary */ /* maybe some writeback is necessary */
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
} }
} }
......
...@@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping) ...@@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping)
if (pages_written >= write_chunk) if (pages_written >= write_chunk)
break; /* We've done our duty */ break; /* We've done our duty */
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
} }
if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
...@@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask) ...@@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
if (global_page_state(NR_UNSTABLE_NFS) + if (global_page_state(NR_UNSTABLE_NFS) +
global_page_state(NR_WRITEBACK) <= dirty_thresh) global_page_state(NR_WRITEBACK) <= dirty_thresh)
break; break;
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
/* /*
* The caller might hold locks which can prevent IO completion * The caller might hold locks which can prevent IO completion
...@@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages) ...@@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages)
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
/* Wrote less than expected */ /* Wrote less than expected */
if (wbc.encountered_congestion || wbc.more_io) if (wbc.encountered_congestion || wbc.more_io)
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
else else
break; break;
} }
...@@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg) ...@@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg)
writeback_inodes(&wbc); writeback_inodes(&wbc);
if (wbc.nr_to_write > 0) { if (wbc.nr_to_write > 0) {
if (wbc.encountered_congestion || wbc.more_io) if (wbc.encountered_congestion || wbc.more_io)
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
else else
break; /* All the old data is written */ break; /* All the old data is written */
} }
......
...@@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, ...@@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
preferred_zone, migratetype); preferred_zone, migratetype);
if (!page && gfp_mask & __GFP_NOFAIL) if (!page && gfp_mask & __GFP_NOFAIL)
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL)); } while (!page && (gfp_mask & __GFP_NOFAIL));
return page; return page;
...@@ -1831,7 +1831,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -1831,7 +1831,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
pages_reclaimed += did_some_progress; pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
/* Wait for some write requests to complete then retry */ /* Wait for some write requests to complete then retry */
congestion_wait(WRITE, HZ/50); congestion_wait(BLK_RW_ASYNC, HZ/50);
goto rebalance; goto rebalance;
} }
......
...@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, ...@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
*/ */
if (nr_freed < nr_taken && !current_is_kswapd() && if (nr_freed < nr_taken && !current_is_kswapd() &&
lumpy_reclaim) { lumpy_reclaim) {
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
/* /*
* The attempt at page out may have made some * The attempt at page out may have made some
...@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, ...@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
/* Take a nap, wait for some writeback to complete */ /* Take a nap, wait for some writeback to complete */
if (sc->nr_scanned && priority < DEF_PRIORITY - 2) if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
} }
/* top priority shrink_zones still had more to do? don't OOM, then */ /* top priority shrink_zones still had more to do? don't OOM, then */
if (!sc->all_unreclaimable && scanning_global_lru(sc)) if (!sc->all_unreclaimable && scanning_global_lru(sc))
...@@ -1960,7 +1960,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) ...@@ -1960,7 +1960,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
* another pass across the zones. * another pass across the zones.
*/ */
if (total_scanned && priority < DEF_PRIORITY - 2) if (total_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10); congestion_wait(BLK_RW_ASYNC, HZ/10);
/* /*
* We do this so kswapd doesn't build up large priorities for * We do this so kswapd doesn't build up large priorities for
...@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) ...@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
goto out; goto out;
if (sc.nr_scanned && prio < DEF_PRIORITY - 2) if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ / 10); congestion_wait(BLK_RW_ASYNC, HZ / 10);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment