Commit f07c2250 authored by Nathan Scott's avatar Nathan Scott Committed by Tim Shimmin

[XFS] Improve xfsbufd delayed write submission patterns, after blktrace

analysis.

Under a sequential create+allocate workload, blktrace reported backward
writes being issued by xfsbufd, and frequent inappropriate queue unplugs.
We now insert at the tail when moving from the delwri lists to the temp
lists, which maintains correct ordering, and we avoid unplugging queues
deep in the submit paths when we'd shortly do it at a higher level anyway.
blktrace now reports much healthier write patterns from xfsbufd for this
workload (and likely many others).

SGI-PV: 954310
SGI-Modid: xfs-linux-melb:xfs-kern:26396a
Signed-off-by: default avatarNathan Scott <nathans@sgi.com>
Signed-off-by: default avatarTim Shimmin <tes@sgi.com>
parent f37ea149
/* /*
* Copyright (c) 2000-2005 Silicon Graphics, Inc. * Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved. * All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
...@@ -1681,6 +1681,7 @@ xfsbufd( ...@@ -1681,6 +1681,7 @@ xfsbufd(
xfs_buf_t *bp, *n; xfs_buf_t *bp, *n;
struct list_head *dwq = &target->bt_delwrite_queue; struct list_head *dwq = &target->bt_delwrite_queue;
spinlock_t *dwlk = &target->bt_delwrite_lock; spinlock_t *dwlk = &target->bt_delwrite_lock;
int count;
current->flags |= PF_MEMALLOC; current->flags |= PF_MEMALLOC;
...@@ -1696,6 +1697,7 @@ xfsbufd( ...@@ -1696,6 +1697,7 @@ xfsbufd(
schedule_timeout_interruptible( schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10)); xfs_buf_timer_centisecs * msecs_to_jiffies(10));
count = 0;
age = xfs_buf_age_centisecs * msecs_to_jiffies(10); age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
spin_lock(dwlk); spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) { list_for_each_entry_safe(bp, n, dwq, b_list) {
...@@ -1711,9 +1713,11 @@ xfsbufd( ...@@ -1711,9 +1713,11 @@ xfsbufd(
break; break;
} }
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE; bp->b_flags |= XBF_WRITE;
list_move(&bp->b_list, &tmp); list_move_tail(&bp->b_list, &tmp);
count++;
} }
} }
spin_unlock(dwlk); spin_unlock(dwlk);
...@@ -1724,12 +1728,12 @@ xfsbufd( ...@@ -1724,12 +1728,12 @@ xfsbufd(
list_del_init(&bp->b_list); list_del_init(&bp->b_list);
xfs_buf_iostrategy(bp); xfs_buf_iostrategy(bp);
blk_run_address_space(target->bt_mapping);
} }
if (as_list_len > 0) if (as_list_len > 0)
purge_addresses(); purge_addresses();
if (count)
blk_run_address_space(target->bt_mapping);
clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop()); } while (!kthread_should_stop());
...@@ -1767,7 +1771,7 @@ xfs_flush_buftarg( ...@@ -1767,7 +1771,7 @@ xfs_flush_buftarg(
continue; continue;
} }
list_move(&bp->b_list, &tmp); list_move_tail(&bp->b_list, &tmp);
} }
spin_unlock(dwlk); spin_unlock(dwlk);
...@@ -1776,7 +1780,7 @@ xfs_flush_buftarg( ...@@ -1776,7 +1780,7 @@ xfs_flush_buftarg(
*/ */
list_for_each_entry_safe(bp, n, &tmp, b_list) { list_for_each_entry_safe(bp, n, &tmp, b_list) {
xfs_buf_lock(bp); xfs_buf_lock(bp);
bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE; bp->b_flags |= XBF_WRITE;
if (wait) if (wait)
bp->b_flags &= ~XBF_ASYNC; bp->b_flags &= ~XBF_ASYNC;
...@@ -1786,6 +1790,9 @@ xfs_flush_buftarg( ...@@ -1786,6 +1790,9 @@ xfs_flush_buftarg(
xfs_buf_iostrategy(bp); xfs_buf_iostrategy(bp);
} }
if (wait)
blk_run_address_space(target->bt_mapping);
/* /*
* Remaining list items must be flushed before returning * Remaining list items must be flushed before returning
*/ */
...@@ -1797,9 +1804,6 @@ xfs_flush_buftarg( ...@@ -1797,9 +1804,6 @@ xfs_flush_buftarg(
xfs_buf_relse(bp); xfs_buf_relse(bp);
} }
if (wait)
blk_run_address_space(target->bt_mapping);
return pincount; return pincount;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment