Commit 2dd3d709 authored by Brian Foster's avatar Brian Foster Committed by Darrick J. Wong

xfs: relog dirty buffers during swapext bmbt owner change

The owner change bmbt scan that occurs during extent swap operations
does not handle ordered buffer failures. Buffers that cannot be
marked ordered must be physically logged so previously dirty ranges
of the buffer can be relogged in the transaction.

Since the bmbt scan may need to process and potentially log a large
number of blocks, we can't expect to complete this operation in a
single transaction. Update extent swap to use a permanent
transaction with enough log reservation to physically log a buffer.
Update the bmbt scan to physically log any buffers that cannot be
ordered and to terminate the scan with -EAGAIN. On -EAGAIN, the
caller rolls the transaction and restarts the scan. Finally, update
the bmbt scan helper function to skip bmbt blocks that already match
the expected owner so they are not reprocessed after scan restarts.
Signed-off-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
[darrick: fix the xfs_trans_roll call]
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent a5814bce
...@@ -4452,10 +4452,15 @@ xfs_btree_block_change_owner( ...@@ -4452,10 +4452,15 @@ xfs_btree_block_change_owner(
/* modify the owner */ /* modify the owner */
block = xfs_btree_get_block(cur, level, &bp); block = xfs_btree_get_block(cur, level, &bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
return 0;
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
else } else {
if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
return 0;
block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
}
/* /*
* If the block is a root block hosted in an inode, we might not have a * If the block is a root block hosted in an inode, we might not have a
...@@ -4464,14 +4469,19 @@ xfs_btree_block_change_owner( ...@@ -4464,14 +4469,19 @@ xfs_btree_block_change_owner(
* block is formatted into the on-disk inode fork. We still change it, * block is formatted into the on-disk inode fork. We still change it,
* though, so everything is consistent in memory. * though, so everything is consistent in memory.
*/ */
if (bp) { if (!bp) {
if (cur->bc_tp)
xfs_trans_ordered_buf(cur->bc_tp, bp);
else
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
} else {
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
ASSERT(level == cur->bc_nlevels - 1); ASSERT(level == cur->bc_nlevels - 1);
return 0;
}
if (cur->bc_tp) {
if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
return -EAGAIN;
}
} else {
xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
} }
return 0; return 0;
......
...@@ -1931,6 +1931,48 @@ xfs_swap_extent_forks( ...@@ -1931,6 +1931,48 @@ xfs_swap_extent_forks(
return 0; return 0;
} }
/*
* Fix up the owners of the bmbt blocks to refer to the current inode. The
* change owner scan attempts to order all modified buffers in the current
* transaction. In the event of ordered buffer failure, the offending buffer is
* physically logged as a fallback and the scan returns -EAGAIN. We must roll
* the transaction in this case to replenish the fallback log reservation and
* restart the scan. This process repeats until the scan completes.
*/
static int
xfs_swap_change_owner(
struct xfs_trans **tpp,
struct xfs_inode *ip,
struct xfs_inode *tmpip)
{
int error;
struct xfs_trans *tp = *tpp;
do {
error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
NULL);
/* success or fatal error */
if (error != -EAGAIN)
break;
error = xfs_trans_roll(tpp);
if (error)
break;
tp = *tpp;
/*
* Redirty both inodes so they can relog and keep the log tail
* moving forward.
*/
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_ijoin(tp, tmpip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
} while (true);
return error;
}
int int
xfs_swap_extents( xfs_swap_extents(
struct xfs_inode *ip, /* target inode */ struct xfs_inode *ip, /* target inode */
...@@ -1945,7 +1987,7 @@ xfs_swap_extents( ...@@ -1945,7 +1987,7 @@ xfs_swap_extents(
int lock_flags; int lock_flags;
struct xfs_ifork *cowfp; struct xfs_ifork *cowfp;
uint64_t f; uint64_t f;
int resblks; int resblks = 0;
/* /*
* Lock the inodes against other IO, page faults and truncate to * Lock the inodes against other IO, page faults and truncate to
...@@ -1993,11 +2035,8 @@ xfs_swap_extents( ...@@ -1993,11 +2035,8 @@ xfs_swap_extents(
XFS_SWAP_RMAP_SPACE_RES(mp, XFS_SWAP_RMAP_SPACE_RES(mp,
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
XFS_DATA_FORK); XFS_DATA_FORK);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, }
0, 0, &tp); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
} else
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0,
0, 0, &tp);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -2089,14 +2128,12 @@ xfs_swap_extents( ...@@ -2089,14 +2128,12 @@ xfs_swap_extents(
* inode number of the current inode. * inode number of the current inode.
*/ */
if (src_log_flags & XFS_ILOG_DOWNER) { if (src_log_flags & XFS_ILOG_DOWNER) {
error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, error = xfs_swap_change_owner(&tp, ip, tip);
ip->i_ino, NULL);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
} }
if (target_log_flags & XFS_ILOG_DOWNER) { if (target_log_flags & XFS_ILOG_DOWNER) {
error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, error = xfs_swap_change_owner(&tp, tip, ip);
tip->i_ino, NULL);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment