Commit a349e4c6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.10-fixes-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "The critical fixes are for a crash that someone reported in the xattr
  code on 32-bit arm last week; and a revert of the rmap key comparison
  change from last week as it was totally wrong. I need a vacation. :(

  Summary:

   - Fix various deficiencies in online fsck's metadata checking code

   - Fix an integer casting bug in the xattr code on 32-bit systems

   - Fix a hang in an inode walk when the inode index is corrupt

   - Fix error codes being dropped when initializing per-AG structures

   - Fix nowait directio writes that partially succeed but return EAGAIN

   - Revert last week's rmap comparison patch because it was wrong"

* tag 'xfs-5.10-fixes-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: revert "xfs: fix rmap key and record comparison functions"
  xfs: don't allow NOWAIT DIO across extent boundaries
  xfs: return corresponding errcode if xfs_initialize_perag() fail
  xfs: ensure inobt record walks always make forward progress
  xfs: fix forkoff miscalculation related to XFS_LITINO(mp)
  xfs: directory scrub should check the null bestfree entries too
  xfs: strengthen rmap record flags checking
  xfs: fix the minrecs logic when dealing with inode root child blocks
parents ba911108 eb840907
......@@ -515,7 +515,7 @@ xfs_attr_copy_value(
*========================================================================*/
/*
* Query whether the requested number of additional bytes of extended
* Query whether the total requested number of attr fork bytes of extended
* attribute space will be able to fit inline.
*
* Returns zero if not, else the di_forkoff fork offset to be used in the
......@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit(
int maxforkoff;
int offset;
/*
* Check if the new size could fit at all first:
*/
if (bytes > XFS_LITINO(mp))
return 0;
/* rounded down */
offset = (XFS_LITINO(mp) - bytes) >> 3;
......
......@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
x = be64_to_cpu(kp->rm_offset);
y = xfs_rmap_irec_offset_pack(rec);
x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
y = rec->rm_offset;
if (x > y)
return 1;
else if (y > x)
......@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x)
return -1;
x = be64_to_cpu(kp1->rm_offset);
y = be64_to_cpu(kp2->rm_offset);
x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
if (x > y)
return 1;
else if (y > x)
......@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
a = be64_to_cpu(k1->rmap.rm_offset);
b = be64_to_cpu(k2->rmap.rm_offset);
a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
......@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
a = be64_to_cpu(r1->rmap.rm_offset);
b = be64_to_cpu(r2->rmap.rm_offset);
a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
......
......@@ -218,13 +218,13 @@ xchk_bmap_xref_rmap(
* which doesn't track unwritten state.
*/
if (owner != XFS_RMAP_OWN_COW &&
irec->br_state == XFS_EXT_UNWRITTEN &&
!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
!!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (info->whichfork == XFS_ATTR_FORK &&
!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
if (!!(info->whichfork == XFS_ATTR_FORK) !=
!!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
......
......@@ -452,32 +452,41 @@ xchk_btree_check_minrecs(
int level,
struct xfs_btree_block *block)
{
unsigned int numrecs;
int ok_level;
numrecs = be16_to_cpu(block->bb_numrecs);
struct xfs_btree_cur *cur = bs->cur;
unsigned int root_level = cur->bc_nlevels - 1;
unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */
if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return;
/*
* Certain btree blocks /can/ have fewer than minrecs records. Any
* level greater than or equal to the level of the highest dedicated
* btree block are allowed to violate this constraint.
*
* For a btree rooted in a block, the btree root can have fewer than
* minrecs records. If the btree is rooted in an inode and does not
* store records in the root, the direct children of the root and the
* root itself can have fewer than minrecs records.
* For btrees rooted in the inode, it's possible that the root block
* contents spilled into a regular ondisk block because there wasn't
* enough space in the inode root. The number of records in that
* child block might be less than the standard minrecs, but that's ok
* provided that there's only one direct child of the root.
*/
ok_level = bs->cur->bc_nlevels - 1;
if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
ok_level--;
if (level >= ok_level)
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 2) {
struct xfs_btree_block *root_block;
struct xfs_buf *root_bp;
int root_maxrecs;
root_block = xfs_btree_get_block(cur, root_level, &root_bp);
root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
numrecs <= root_maxrecs)
xchk_btree_set_corrupt(bs->sc, cur, level);
return;
}
xchk_btree_set_corrupt(bs->sc, bs->cur, level);
/*
* Otherwise, only the root level is allowed to have fewer than minrecs
* records or keyptrs.
*/
if (level < root_level)
xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
......
......@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree(
/* Check all the bestfree entries. */
for (i = 0; i < bestcount; i++, bestp++) {
best = be16_to_cpu(*bestp);
if (best == NULLDATAOFF)
continue;
error = xfs_dir3_data_read(sc->tp, sc->ip,
i * args->geo->fsbcount, 0, &dbp);
xfs_dir2_db_to_da(args->geo, i),
XFS_DABUF_MAP_HOLE_OK,
&dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error))
break;
xchk_directory_check_freesp(sc, lblk, dbp, best);
if (!dbp) {
if (best != NULLDATAOFF) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
lblk);
break;
}
continue;
}
if (best == NULLDATAOFF)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
else
xchk_directory_check_freesp(sc, lblk, dbp, best);
xfs_trans_brelse(sc->tp, dbp);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break;
......
......@@ -706,6 +706,23 @@ xfs_ilock_for_iomap(
return 0;
}
/*
* Check that the imap we are going to return to the caller spans the entire
* range that the caller requested for the IO.
*/
static bool
imap_spans_range(
struct xfs_bmbt_irec *imap,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
if (imap->br_startoff > offset_fsb)
return false;
if (imap->br_startoff + imap->br_blockcount < end_fsb)
return false;
return true;
}
static int
xfs_direct_write_iomap_begin(
struct inode *inode,
......@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin(
if (imap_needs_alloc(inode, flags, &imap, nimaps))
goto allocate_blocks;
/*
* NOWAIT IO needs to span the entire requested IO with a single map so
* that we avoid partial IO failures due to the rest of the IO range not
* covered by this map triggering an EAGAIN condition when it is
* subsequently mapped and aborting the IO.
*/
if ((flags & IOMAP_NOWAIT) &&
!imap_spans_range(&imap, offset_fsb, end_fsb)) {
error = -EAGAIN;
goto out_unlock;
}
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
......
......@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
/* Where do we start the traversal? */
xfs_ino_t startino;
/* What was the last inode number we saw when iterating the inobt? */
xfs_ino_t lastino;
/* Array of inobt records we cache. */
struct xfs_inobt_rec_incore *recs;
......@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
if (XFS_IS_CORRUPT(mp, *has_more != 1))
return -EFSCORRUPTED;
iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
/*
* If the LE lookup yielded an inobt record before the cursor position,
* skip it and see if there's another one after it.
......@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp;
struct xfs_inobt_rec_incore *irec;
xfs_agino_t restart;
xfs_agino_t next_agino;
int error;
next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
ASSERT(iwag->nr_recs > 0);
/* Delete cursor but remember the last record we cached... */
xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
irec = &iwag->recs[iwag->nr_recs - 1];
restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
error = xfs_iwalk_ag_recs(iwag);
if (error)
......@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
if (error)
return error;
return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
}
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
......@@ -396,6 +404,7 @@ xfs_iwalk_ag(
while (!error && has_more) {
struct xfs_inobt_rec_incore *irec;
xfs_ino_t rec_fsino;
cond_resched();
if (xfs_pwork_want_abort(&iwag->pwork))
......@@ -407,6 +416,15 @@ xfs_iwalk_ag(
if (error || !has_more)
break;
/* Make sure that we always move forward. */
rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
if (iwag->lastino != NULLFSINO &&
XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
error = -EFSCORRUPTED;
goto out;
}
iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
/* No allocated inodes in this chunk; skip it. */
if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
error = xfs_btree_increment(cur, 0, &has_more);
......@@ -535,6 +553,7 @@ xfs_iwalk(
.trim_start = 1,
.skip_empty = 1,
.pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
......@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
iwag->data = data;
iwag->startino = startino;
iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork);
startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
if (flags & XFS_INOBT_WALK_SAME_AG)
......@@ -696,6 +716,7 @@ xfs_inobt_walk(
.startino = startino,
.sz_recs = xfs_inobt_walk_prefetch(inobt_records),
.pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
......
......@@ -194,20 +194,25 @@ xfs_initialize_perag(
}
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
if (!pag) {
error = -ENOMEM;
goto out_unwind_new_pags;
}
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
error = xfs_buf_hash_init(pag);
if (error)
goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
if (radix_tree_preload(GFP_NOFS))
error = radix_tree_preload(GFP_NOFS);
if (error)
goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment