Commit a349e4c6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.10-fixes-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "The critical fixes are for a crash that someone reported in the xattr
  code on 32-bit arm last week; and a revert of the rmap key comparison
  change from last week as it was totally wrong. I need a vacation. :(

  Summary:

   - Fix various deficiencies in online fsck's metadata checking code

   - Fix an integer casting bug in the xattr code on 32-bit systems

   - Fix a hang in an inode walk when the inode index is corrupt

   - Fix error codes being dropped when initializing per-AG structures

   - Fix nowait directio writes that partially succeed but return EAGAIN

   - Revert last week's rmap comparison patch because it was wrong"

* tag 'xfs-5.10-fixes-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: revert "xfs: fix rmap key and record comparison functions"
  xfs: don't allow NOWAIT DIO across extent boundaries
  xfs: return corresponding errcode if xfs_initialize_perag() fail
  xfs: ensure inobt record walks always make forward progress
  xfs: fix forkoff miscalculation related to XFS_LITINO(mp)
  xfs: directory scrub should check the null bestfree entries too
  xfs: strengthen rmap record flags checking
  xfs: fix the minrecs logic when dealing with inode root child blocks
parents ba911108 eb840907
...@@ -515,7 +515,7 @@ xfs_attr_copy_value( ...@@ -515,7 +515,7 @@ xfs_attr_copy_value(
*========================================================================*/ *========================================================================*/
/* /*
* Query whether the requested number of additional bytes of extended * Query whether the total requested number of attr fork bytes of extended
* attribute space will be able to fit inline. * attribute space will be able to fit inline.
* *
* Returns zero if not, else the di_forkoff fork offset to be used in the * Returns zero if not, else the di_forkoff fork offset to be used in the
...@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit( ...@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit(
int maxforkoff; int maxforkoff;
int offset; int offset;
/*
* Check if the new size could fit at all first:
*/
if (bytes > XFS_LITINO(mp))
return 0;
/* rounded down */ /* rounded down */
offset = (XFS_LITINO(mp) - bytes) >> 3; offset = (XFS_LITINO(mp) - bytes) >> 3;
......
...@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff( ...@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff(
else if (y > x) else if (y > x)
return -1; return -1;
x = be64_to_cpu(kp->rm_offset); x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
y = xfs_rmap_irec_offset_pack(rec); y = rec->rm_offset;
if (x > y) if (x > y)
return 1; return 1;
else if (y > x) else if (y > x)
...@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys( ...@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x) else if (y > x)
return -1; return -1;
x = be64_to_cpu(kp1->rm_offset); x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
y = be64_to_cpu(kp2->rm_offset); y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
if (x > y) if (x > y)
return 1; return 1;
else if (y > x) else if (y > x)
...@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder( ...@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder(
return 1; return 1;
else if (a > b) else if (a > b)
return 0; return 0;
a = be64_to_cpu(k1->rmap.rm_offset); a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
b = be64_to_cpu(k2->rmap.rm_offset); b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
if (a <= b) if (a <= b)
return 1; return 1;
return 0; return 0;
...@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder( ...@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder(
return 1; return 1;
else if (a > b) else if (a > b)
return 0; return 0;
a = be64_to_cpu(r1->rmap.rm_offset); a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
b = be64_to_cpu(r2->rmap.rm_offset); b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
if (a <= b) if (a <= b)
return 1; return 1;
return 0; return 0;
......
...@@ -218,13 +218,13 @@ xchk_bmap_xref_rmap( ...@@ -218,13 +218,13 @@ xchk_bmap_xref_rmap(
* which doesn't track unwritten state. * which doesn't track unwritten state.
*/ */
if (owner != XFS_RMAP_OWN_COW && if (owner != XFS_RMAP_OWN_COW &&
irec->br_state == XFS_EXT_UNWRITTEN && !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff); irec->br_startoff);
if (info->whichfork == XFS_ATTR_FORK && if (!!(info->whichfork == XFS_ATTR_FORK) !=
!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff); irec->br_startoff);
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
......
...@@ -452,32 +452,41 @@ xchk_btree_check_minrecs( ...@@ -452,32 +452,41 @@ xchk_btree_check_minrecs(
int level, int level,
struct xfs_btree_block *block) struct xfs_btree_block *block)
{ {
unsigned int numrecs; struct xfs_btree_cur *cur = bs->cur;
int ok_level; unsigned int root_level = cur->bc_nlevels - 1;
unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */ /* More records than minrecs means the block is ok. */
if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return; return;
/* /*
* Certain btree blocks /can/ have fewer than minrecs records. Any * For btrees rooted in the inode, it's possible that the root block
* level greater than or equal to the level of the highest dedicated * contents spilled into a regular ondisk block because there wasn't
* btree block are allowed to violate this constraint. * enough space in the inode root. The number of records in that
* * child block might be less than the standard minrecs, but that's ok
* For a btree rooted in a block, the btree root can have fewer than * provided that there's only one direct child of the root.
* minrecs records. If the btree is rooted in an inode and does not
* store records in the root, the direct children of the root and the
* root itself can have fewer than minrecs records.
*/ */
ok_level = bs->cur->bc_nlevels - 1; if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) level == cur->bc_nlevels - 2) {
ok_level--; struct xfs_btree_block *root_block;
if (level >= ok_level) struct xfs_buf *root_bp;
int root_maxrecs;
root_block = xfs_btree_get_block(cur, root_level, &root_bp);
root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
numrecs <= root_maxrecs)
xchk_btree_set_corrupt(bs->sc, cur, level);
return; return;
}
xchk_btree_set_corrupt(bs->sc, bs->cur, level); /*
* Otherwise, only the root level is allowed to have fewer than minrecs
* records or keyptrs.
*/
if (level < root_level)
xchk_btree_set_corrupt(bs->sc, cur, level);
} }
/* /*
......
...@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree( ...@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree(
/* Check all the bestfree entries. */ /* Check all the bestfree entries. */
for (i = 0; i < bestcount; i++, bestp++) { for (i = 0; i < bestcount; i++, bestp++) {
best = be16_to_cpu(*bestp); best = be16_to_cpu(*bestp);
if (best == NULLDATAOFF)
continue;
error = xfs_dir3_data_read(sc->tp, sc->ip, error = xfs_dir3_data_read(sc->tp, sc->ip,
i * args->geo->fsbcount, 0, &dbp); xfs_dir2_db_to_da(args->geo, i),
XFS_DABUF_MAP_HOLE_OK,
&dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error)) &error))
break; break;
xchk_directory_check_freesp(sc, lblk, dbp, best);
if (!dbp) {
if (best != NULLDATAOFF) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
lblk);
break;
}
continue;
}
if (best == NULLDATAOFF)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
else
xchk_directory_check_freesp(sc, lblk, dbp, best);
xfs_trans_brelse(sc->tp, dbp); xfs_trans_brelse(sc->tp, dbp);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break; break;
......
...@@ -706,6 +706,23 @@ xfs_ilock_for_iomap( ...@@ -706,6 +706,23 @@ xfs_ilock_for_iomap(
return 0; return 0;
} }
/*
* Check that the imap we are going to return to the caller spans the entire
* range that the caller requested for the IO.
*/
static bool
imap_spans_range(
struct xfs_bmbt_irec *imap,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
if (imap->br_startoff > offset_fsb)
return false;
if (imap->br_startoff + imap->br_blockcount < end_fsb)
return false;
return true;
}
static int static int
xfs_direct_write_iomap_begin( xfs_direct_write_iomap_begin(
struct inode *inode, struct inode *inode,
...@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin( ...@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin(
if (imap_needs_alloc(inode, flags, &imap, nimaps)) if (imap_needs_alloc(inode, flags, &imap, nimaps))
goto allocate_blocks; goto allocate_blocks;
/*
* NOWAIT IO needs to span the entire requested IO with a single map so
* that we avoid partial IO failures due to the rest of the IO range not
* covered by this map triggering an EAGAIN condition when it is
* subsequently mapped and aborting the IO.
*/
if ((flags & IOMAP_NOWAIT) &&
!imap_spans_range(&imap, offset_fsb, end_fsb)) {
error = -EAGAIN;
goto out_unlock;
}
xfs_iunlock(ip, lockmode); xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
......
...@@ -55,6 +55,9 @@ struct xfs_iwalk_ag { ...@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
/* Where do we start the traversal? */ /* Where do we start the traversal? */
xfs_ino_t startino; xfs_ino_t startino;
/* What was the last inode number we saw when iterating the inobt? */
xfs_ino_t lastino;
/* Array of inobt records we cache. */ /* Array of inobt records we cache. */
struct xfs_inobt_rec_incore *recs; struct xfs_inobt_rec_incore *recs;
...@@ -301,6 +304,9 @@ xfs_iwalk_ag_start( ...@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
if (XFS_IS_CORRUPT(mp, *has_more != 1)) if (XFS_IS_CORRUPT(mp, *has_more != 1))
return -EFSCORRUPTED; return -EFSCORRUPTED;
iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
/* /*
* If the LE lookup yielded an inobt record before the cursor position, * If the LE lookup yielded an inobt record before the cursor position,
* skip it and see if there's another one after it. * skip it and see if there's another one after it.
...@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( ...@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
struct xfs_mount *mp = iwag->mp; struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp; struct xfs_trans *tp = iwag->tp;
struct xfs_inobt_rec_incore *irec; struct xfs_inobt_rec_incore *irec;
xfs_agino_t restart; xfs_agino_t next_agino;
int error; int error;
next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
ASSERT(iwag->nr_recs > 0); ASSERT(iwag->nr_recs > 0);
/* Delete cursor but remember the last record we cached... */ /* Delete cursor but remember the last record we cached... */
xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
irec = &iwag->recs[iwag->nr_recs - 1]; irec = &iwag->recs[iwag->nr_recs - 1];
restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
error = xfs_iwalk_ag_recs(iwag); error = xfs_iwalk_ag_recs(iwag);
if (error) if (error)
...@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( ...@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
if (error) if (error)
return error; return error;
return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
} }
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
...@@ -396,6 +404,7 @@ xfs_iwalk_ag( ...@@ -396,6 +404,7 @@ xfs_iwalk_ag(
while (!error && has_more) { while (!error && has_more) {
struct xfs_inobt_rec_incore *irec; struct xfs_inobt_rec_incore *irec;
xfs_ino_t rec_fsino;
cond_resched(); cond_resched();
if (xfs_pwork_want_abort(&iwag->pwork)) if (xfs_pwork_want_abort(&iwag->pwork))
...@@ -407,6 +416,15 @@ xfs_iwalk_ag( ...@@ -407,6 +416,15 @@ xfs_iwalk_ag(
if (error || !has_more) if (error || !has_more)
break; break;
/* Make sure that we always move forward. */
rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
if (iwag->lastino != NULLFSINO &&
XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
error = -EFSCORRUPTED;
goto out;
}
iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
/* No allocated inodes in this chunk; skip it. */ /* No allocated inodes in this chunk; skip it. */
if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
error = xfs_btree_increment(cur, 0, &has_more); error = xfs_btree_increment(cur, 0, &has_more);
...@@ -535,6 +553,7 @@ xfs_iwalk( ...@@ -535,6 +553,7 @@ xfs_iwalk(
.trim_start = 1, .trim_start = 1,
.skip_empty = 1, .skip_empty = 1,
.pwork = XFS_PWORK_SINGLE_THREADED, .pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
}; };
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error; int error;
...@@ -623,6 +642,7 @@ xfs_iwalk_threaded( ...@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
iwag->data = data; iwag->data = data;
iwag->startino = startino; iwag->startino = startino;
iwag->sz_recs = xfs_iwalk_prefetch(inode_records); iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork); xfs_pwork_queue(&pctl, &iwag->pwork);
startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
if (flags & XFS_INOBT_WALK_SAME_AG) if (flags & XFS_INOBT_WALK_SAME_AG)
...@@ -696,6 +716,7 @@ xfs_inobt_walk( ...@@ -696,6 +716,7 @@ xfs_inobt_walk(
.startino = startino, .startino = startino,
.sz_recs = xfs_inobt_walk_prefetch(inobt_records), .sz_recs = xfs_inobt_walk_prefetch(inobt_records),
.pwork = XFS_PWORK_SINGLE_THREADED, .pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
}; };
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error; int error;
......
...@@ -194,20 +194,25 @@ xfs_initialize_perag( ...@@ -194,20 +194,25 @@ xfs_initialize_perag(
} }
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag) if (!pag) {
error = -ENOMEM;
goto out_unwind_new_pags; goto out_unwind_new_pags;
}
pag->pag_agno = index; pag->pag_agno = index;
pag->pag_mount = mp; pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock); spin_lock_init(&pag->pag_ici_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
error = xfs_buf_hash_init(pag);
if (error)
goto out_free_pag; goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait); init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock); spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0; pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT; pag->pagb_tree = RB_ROOT;
if (radix_tree_preload(GFP_NOFS)) error = radix_tree_preload(GFP_NOFS);
if (error)
goto out_hash_destroy; goto out_hash_destroy;
spin_lock(&mp->m_perag_lock); spin_lock(&mp->m_perag_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment