Commit 214d55ef authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Greg Kroah-Hartman

xfs: fix COW writeback race

commit d2b3964a upstream.

Due to the way how xfs_iomap_write_allocate tries to convert the whole
found extents from delalloc to real space we can run into a race
condition with multiple threads doing writes to this same extent.
For the non-COW case that is harmless as the only thing that can happen
is that we call xfs_bmapi_write on an extent that has already been
converted to a real allocation.  For COW writes where we move the extent
from the COW to the data fork after I/O completion the race is, however,
not quite as harmless.  In the worst case we are now calling
xfs_bmapi_write on a region that contains hole in the COW work, which
will trip up an assert in debug builds or lead to file system corruption
in non-debug builds.  This seems to be reproducible with workloads of
small O_DSYNC write, although so far I've not managed to come up with
a with an isolated reproducer.

The fix for the issue is relatively simple:  tell xfs_bmapi_write
that we are only asked to convert delayed allocations and skip holes
in that case.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 29f31927
...@@ -4607,8 +4607,6 @@ xfs_bmapi_write( ...@@ -4607,8 +4607,6 @@ xfs_bmapi_write(
int n; /* current extent index */ int n; /* current extent index */
xfs_fileoff_t obno; /* old block number (offset) */ xfs_fileoff_t obno; /* old block number (offset) */
int whichfork; /* data or attr fork */ int whichfork; /* data or attr fork */
char inhole; /* current location is hole in file */
char wasdelay; /* old extent was delayed */
#ifdef DEBUG #ifdef DEBUG
xfs_fileoff_t orig_bno; /* original block number value */ xfs_fileoff_t orig_bno; /* original block number value */
...@@ -4694,22 +4692,44 @@ xfs_bmapi_write( ...@@ -4694,22 +4692,44 @@ xfs_bmapi_write(
bma.firstblock = firstblock; bma.firstblock = firstblock;
while (bno < end && n < *nmap) { while (bno < end && n < *nmap) {
inhole = eof || bma.got.br_startoff > bno; bool need_alloc = false, wasdelay = false;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
/* /* in hole or beyoned EOF? */
* Make sure we only reflink into a hole. if (eof || bma.got.br_startoff > bno) {
*/ if (flags & XFS_BMAPI_DELALLOC) {
if (flags & XFS_BMAPI_REMAP) /*
ASSERT(inhole); * For the COW fork we can reasonably get a
if (flags & XFS_BMAPI_COWFORK) * request for converting an extent that races
ASSERT(!inhole); * with other threads already having converted
* part of it, as there converting COW to
* regular blocks is not protected using the
* IOLOCK.
*/
ASSERT(flags & XFS_BMAPI_COWFORK);
if (!(flags & XFS_BMAPI_COWFORK)) {
error = -EIO;
goto error0;
}
if (eof || bno >= end)
break;
} else {
need_alloc = true;
}
} else {
/*
* Make sure we only reflink into a hole.
*/
ASSERT(!(flags & XFS_BMAPI_REMAP));
if (isnullstartblock(bma.got.br_startblock))
wasdelay = true;
}
/* /*
* First, deal with the hole before the allocated space * First, deal with the hole before the allocated space
* that we found, if any. * that we found, if any.
*/ */
if (inhole || wasdelay) { if (need_alloc || wasdelay) {
bma.eof = eof; bma.eof = eof;
bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay; bma.wasdel = wasdelay;
......
...@@ -110,6 +110,9 @@ struct xfs_extent_free_item ...@@ -110,6 +110,9 @@ struct xfs_extent_free_item
/* Map something in the CoW fork. */ /* Map something in the CoW fork. */
#define XFS_BMAPI_COWFORK 0x200 #define XFS_BMAPI_COWFORK 0x200
/* Only convert delalloc space, don't allocate entirely new extents */
#define XFS_BMAPI_DELALLOC 0x400
#define XFS_BMAPI_FLAGS \ #define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \
...@@ -120,7 +123,8 @@ struct xfs_extent_free_item ...@@ -120,7 +123,8 @@ struct xfs_extent_free_item
{ XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \
{ XFS_BMAPI_REMAP, "REMAP" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \
{ XFS_BMAPI_COWFORK, "COWFORK" } { XFS_BMAPI_COWFORK, "COWFORK" }, \
{ XFS_BMAPI_DELALLOC, "DELALLOC" }
static inline int xfs_bmapi_aflag(int w) static inline int xfs_bmapi_aflag(int w)
......
...@@ -681,7 +681,7 @@ xfs_iomap_write_allocate( ...@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
xfs_trans_t *tp; xfs_trans_t *tp;
int nimaps; int nimaps;
int error = 0; int error = 0;
int flags = 0; int flags = XFS_BMAPI_DELALLOC;
int nres; int nres;
if (whichfork == XFS_COW_FORK) if (whichfork == XFS_COW_FORK)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment