Commit 33177f05 authored by Dave Chinner's avatar Dave Chinner Committed by Ben Myers

xfs: swalloc doesn't align allocations properly

When swalloc is specified as a mount option, allocations are
supposed to be aligned to the stripe width rather than the stripe
unit of the underlying filesystem. However, it does not do this.

What the implementation does is round up the allocation size to a
stripe width, hence ensuring that all allocations span a full stripe
width. It does not, however, ensure that that allocation is aligned
to a stripe width, and hence the allocations can span multiple
underlying stripes and so still see RMW cycles for things like
direct IO on MD RAID.

So, if the swalloc mount option is set, change the allocation
alignment in xfs_bmap_btalloc() to use the stripe width rather than
the stripe unit.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBen Myers <bpm@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 83a0adc3
...@@ -3648,10 +3648,19 @@ xfs_bmap_btalloc( ...@@ -3648,10 +3648,19 @@ xfs_bmap_btalloc(
int isaligned; int isaligned;
int tryagain; int tryagain;
int error; int error;
int stripe_align;
ASSERT(ap->length); ASSERT(ap->length);
mp = ap->ip->i_mount; mp = ap->ip->i_mount;
/* stripe alignment for allocation is determined by mount parameters */
stripe_align = 0;
if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
stripe_align = mp->m_swidth;
else if (mp->m_dalign)
stripe_align = mp->m_dalign;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) { if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
...@@ -3660,6 +3669,8 @@ xfs_bmap_btalloc( ...@@ -3660,6 +3669,8 @@ xfs_bmap_btalloc(
ASSERT(!error); ASSERT(!error);
ASSERT(ap->length); ASSERT(ap->length);
} }
nullfb = *ap->firstblock == NULLFSBLOCK; nullfb = *ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
if (nullfb) { if (nullfb) {
...@@ -3735,7 +3746,7 @@ xfs_bmap_btalloc( ...@@ -3735,7 +3746,7 @@ xfs_bmap_btalloc(
*/ */
if (!ap->flist->xbf_low && ap->aeof) { if (!ap->flist->xbf_low && ap->aeof) {
if (!ap->offset) { if (!ap->offset) {
args.alignment = mp->m_dalign; args.alignment = stripe_align;
atype = args.type; atype = args.type;
isaligned = 1; isaligned = 1;
/* /*
...@@ -3760,13 +3771,13 @@ xfs_bmap_btalloc( ...@@ -3760,13 +3771,13 @@ xfs_bmap_btalloc(
* of minlen+alignment+slop doesn't go up * of minlen+alignment+slop doesn't go up
* between the calls. * between the calls.
*/ */
if (blen > mp->m_dalign && blen <= args.maxlen) if (blen > stripe_align && blen <= args.maxlen)
nextminlen = blen - mp->m_dalign; nextminlen = blen - stripe_align;
else else
nextminlen = args.minlen; nextminlen = args.minlen;
if (nextminlen + mp->m_dalign > args.minlen + 1) if (nextminlen + stripe_align > args.minlen + 1)
args.minalignslop = args.minalignslop =
nextminlen + mp->m_dalign - nextminlen + stripe_align -
args.minlen - 1; args.minlen - 1;
else else
args.minalignslop = 0; args.minalignslop = 0;
...@@ -3788,7 +3799,7 @@ xfs_bmap_btalloc( ...@@ -3788,7 +3799,7 @@ xfs_bmap_btalloc(
*/ */
args.type = atype; args.type = atype;
args.fsbno = ap->blkno; args.fsbno = ap->blkno;
args.alignment = mp->m_dalign; args.alignment = stripe_align;
args.minlen = nextminlen; args.minlen = nextminlen;
args.minalignslop = 0; args.minalignslop = 0;
isaligned = 1; isaligned = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment