Commit 1aef882f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-for-linus-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs update from Dave Chinner:
 "This update contains:

   - RENAME_WHITEOUT support

   - conversion of per-cpu superblock accounting to use generic counters

   - new inode mmap lock so that we can lock page faults out of
     truncate, hole punch and other direct extent manipulation functions
     to avoid racing mmap writes from causing data corruption

   - rework of direct IO submission and completion to solve data
     corruption issue when running concurrent extending DIO writes.
     Also solves problem of running IO completion transactions in
     interrupt context during size extending AIO writes.

   - FALLOC_FL_INSERT_RANGE support for inserting holes into a file via
     direct extent manipulation to avoid needing to copy data within the
     file

   - attribute block header field overflow fix for 64k block size
     filesystems

   - Lots of changes to log messaging to be more informative and concise
     when errors occur.  Also prevent a lot of unnecessary log spamming
     due to cascading failures in error conditions.

   - lots of cleanups and bug fixes

  One thing of note is the direct IO fixes that we merged last week
  after the window opened.  Even though a little late, they fix a user
  reported data corruption and have been pretty well tested.  I figured
  there was not much point waiting another 2 weeks for -rc1 to be
  released just so I could send them to you..."

* tag 'xfs-for-linus-4.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (49 commits)
  xfs: using generic_file_direct_write() is unnecessary
  xfs: direct IO EOF zeroing needs to drain AIO
  xfs: DIO write completion size updates race
  xfs: DIO writes within EOF don't need an ioend
  xfs: handle DIO overwrite EOF update completion correctly
  xfs: DIO needs an ioend for writes
  xfs: move DIO mapping size calculation
  xfs: factor DIO write mapping from get_blocks
  xfs: unlock i_mutex in xfs_break_layouts
  xfs: kill unnecessary firstused overflow check on attr3 leaf removal
  xfs: use larger in-core attr firstused field and detect overflow
  xfs: pass attr geometry to attr leaf header conversion functions
  xfs: disallow ro->rw remount on norecovery mount
  xfs: xfs_shift_file_space can be static
  xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate
  fs: Add support FALLOC_FL_INSERT_RANGE for fallocate
  xfs: Fix incorrect positive ENOMEM return
  xfs: xfs_mru_cache_insert() should use GFP_NOFS
  xfs: %pF is only for function pointers
  xfs: fix shadow warning in xfs_da3_root_split()
  ...
parents d869844b 542c3118
......@@ -228,30 +228,19 @@ default behaviour.
Deprecated Mount Options
========================
delaylog/nodelaylog
Delayed logging is the only logging method that XFS supports
now, so these mount options are now ignored.
Due for removal in 3.12.
ihashsize=value
In memory inode hashes have been removed, so this option has
no function as of August 2007. Option is deprecated.
Due for removal in 3.12.
None at present.
irixsgid
This behaviour is now controlled by a sysctl, so the mount
option is ignored.
Due for removal in 3.12.
Removed Mount Options
=====================
osyncisdsync
osyncisosync
O_SYNC and O_DSYNC are fully supported, so there is no need
for these options any more.
Name Removed
---- -------
delaylog/nodelaylog v3.20
ihashsize v3.20
irixsgid v3.20
osyncisdsync/osyncisosync v3.20
Due for removal in 3.12.
sysctls
=======
......
......@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EINVAL;
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
if (mode & ~FALLOC_FL_SUPPORTED_MASK)
return -EOPNOTSUPP;
/* Punch hole and zero range are mutually exclusive */
......@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
(mode & ~FALLOC_FL_COLLAPSE_RANGE))
return -EINVAL;
/* Insert range should only be used exclusively. */
if ((mode & FALLOC_FL_INSERT_RANGE) &&
(mode & ~FALLOC_FL_INSERT_RANGE))
return -EINVAL;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
......
This diff is collapsed.
This diff is collapsed.
......@@ -100,9 +100,11 @@ int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp);
void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
struct xfs_attr3_icleaf_hdr *to,
struct xfs_attr_leafblock *from);
void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
struct xfs_attr_leafblock *to,
struct xfs_attr3_icleaf_hdr *from);
#endif /* __XFS_ATTR_LEAF_H__ */
This diff is collapsed.
......@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
*/
#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
enum shift_direction {
SHIFT_LEFT = 0,
SHIFT_RIGHT,
};
#ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
......@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist, int num_exts);
xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
struct xfs_bmap_free *flist, enum shift_direction direction,
int num_exts);
int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
#endif /* __XFS_BMAP_H__ */
......@@ -168,7 +168,7 @@ xfs_btree_check_lptr(
xfs_fsblock_t bno, /* btree block disk address */
int level) /* btree block level */
{
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
level > 0 &&
bno != NULLFSBLOCK &&
XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
......@@ -187,7 +187,7 @@ xfs_btree_check_sptr(
{
xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
level > 0 &&
bno != NULLAGBLOCK &&
bno != 0 &&
......@@ -1825,7 +1825,7 @@ xfs_btree_lookup(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
......@@ -2285,7 +2285,7 @@ xfs_btree_rshift(
if (error)
goto error0;
i = xfs_btree_lastrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
error = xfs_btree_increment(tcur, level, &i);
if (error)
......@@ -3138,7 +3138,7 @@ xfs_btree_insert(
goto error0;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
level++;
/*
......@@ -3582,15 +3582,15 @@ xfs_btree_delrec(
* Actually any entry but the first would suffice.
*/
i = xfs_btree_lastrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
error = xfs_btree_increment(tcur, level, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
i = xfs_btree_lastrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
/* Grab a pointer to the block. */
right = xfs_btree_get_block(tcur, level, &rbp);
......@@ -3634,12 +3634,12 @@ xfs_btree_delrec(
rrecs = xfs_btree_get_numrecs(right);
if (!xfs_btree_ptr_is_null(cur, &lptr)) {
i = xfs_btree_firstrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
}
}
......@@ -3653,13 +3653,13 @@ xfs_btree_delrec(
* previous block.
*/
i = xfs_btree_firstrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
/* Grab a pointer to the block. */
left = xfs_btree_get_block(tcur, level, &lbp);
......
......@@ -538,12 +538,12 @@ xfs_da3_root_split(
oldroot = blk1->bp->b_addr;
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da3_icnode_hdr icnodehdr;
dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
btree = dp->d_ops->node_tree_p(oldroot);
size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
level = nodehdr.level;
size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
level = icnodehdr.level;
/*
* we are about to copy oldroot to bp, so set up the type
......
......@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr {
__uint16_t magic;
__uint16_t count;
__uint16_t usedbytes;
__uint16_t firstused;
/*
* firstused is 32-bit here instead of 16-bit like the on-disk variant
* to support maximum fsb size of 64k without overflow issues throughout
* the attr code. Instead, the overflow condition is handled on
* conversion to/from disk.
*/
__uint32_t firstused;
__u8 holes;
struct {
__uint16_t base;
......@@ -733,6 +739,12 @@ struct xfs_attr3_icleaf_hdr {
} freemap[XFS_ATTR_LEAF_MAPSIZE];
};
/*
* Special value to represent fs block size in the leaf header firstused field.
* Only used when block size overflows the 2-bytes available on disk.
*/
#define XFS_ATTR3_LEAF_NULLOFF 0
/*
* Flags used in the leaf_entry[i].flags field.
* NOTE: the INCOMPLETE bit must not collide with the flags bits specified
......
......@@ -89,7 +89,7 @@ __xfs_dir3_data_check(
* so just ensure that the count falls somewhere inside the
* block right now.
*/
XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
break;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
......@@ -107,21 +107,21 @@ __xfs_dir3_data_check(
bf = ops->data_bestfree_p(hdr);
count = lastfree = freeseen = 0;
if (!bf[0].length) {
XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
freeseen |= 1 << 0;
}
if (!bf[1].length) {
XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
freeseen |= 1 << 1;
}
if (!bf[2].length) {
XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
freeseen |= 1 << 2;
}
XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
be16_to_cpu(bf[1].length));
XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
be16_to_cpu(bf[2].length));
/*
* Loop over the data/unused entries.
......@@ -134,18 +134,18 @@ __xfs_dir3_data_check(
* doesn't need to be there.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
XFS_WANT_CORRUPTED_RETURN(mp,
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
(char *)dup - (char *)hdr);
dfp = xfs_dir2_data_freefind(hdr, bf, dup);
if (dfp) {
i = (int)(dfp - bf);
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp,
(freeseen & (1 << i)) == 0);
freeseen |= 1 << i;
} else {
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp,
be16_to_cpu(dup->length) <=
be16_to_cpu(bf[2].length));
}
......@@ -160,13 +160,13 @@ __xfs_dir3_data_check(
* The linear search is crude but this is DEBUG code.
*/
dep = (xfs_dir2_data_entry_t *)p;
XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
XFS_WANT_CORRUPTED_RETURN(mp,
!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp,
be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
(char *)dep - (char *)hdr);
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp,
ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
count++;
lastfree = 0;
......@@ -183,14 +183,15 @@ __xfs_dir3_data_check(
be32_to_cpu(lep[i].hashval) == hash)
break;
}
XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
XFS_WANT_CORRUPTED_RETURN(mp,
i < be32_to_cpu(btp->count));
}
p += ops->data_entsize(dep->namelen);
}
/*
* Need to have seen all the entries and all the bestfree slots.
*/
XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
......@@ -198,13 +199,13 @@ __xfs_dir3_data_check(
cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
if (i > 0)
XFS_WANT_CORRUPTED_RETURN(
XFS_WANT_CORRUPTED_RETURN(mp,
be32_to_cpu(lep[i].hashval) >=
be32_to_cpu(lep[i - 1].hashval));
}
XFS_WANT_CORRUPTED_RETURN(count ==
XFS_WANT_CORRUPTED_RETURN(mp, count ==
be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
}
return 0;
}
......
......@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
/* must be padded to 64 bit alignment */
} xfs_dsb_t;
/*
* Sequence number values for the fields.
*/
typedef enum {
XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
XFS_SBS_PQUOTINO, XFS_SBS_LSN,
XFS_SBS_FIELDCOUNT
} xfs_sb_field_t;
/*
* Mask values, defined based on the xfs_sb_field_t values.
* Only define the ones we're using.
*/
#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
#define XFS_SB_UUID XFS_SB_MVAL(UUID)
#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
XFS_SB_MVAL(BAD_FEATURES2))
#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
#define XFS_SB_CRC XFS_SB_MVAL(CRC)
#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
#define XFS_SB_MOD_BITS \
(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
XFS_SB_PQUOTINO)
/*
* Misc. Flags - warning - these will be cleared by xfs_repair unless
......
......@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
*/
newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount &&
args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
percpu_counter_read(&args.mp->m_icount) + newlen >
args.mp->m_maxicount)
return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks;
/*
......@@ -700,7 +701,7 @@ xfs_ialloc_next_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
}
return 0;
......@@ -724,7 +725,7 @@ xfs_ialloc_get_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
}
return 0;
......@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt(
error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
error = xfs_inobt_get_rec(cur, &rec, &j);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
if (rec.ir_freecount > 0) {
/*
......@@ -944,19 +945,19 @@ xfs_dialloc_ag_inobt(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
for (;;) {
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
if (rec.ir_freecount > 0)
break;
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
}
alloc_inode:
......@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(lcur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
/*
* See if we've landed in the parent inode record. The finobt
......@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(rcur, &rrec, &j);
if (error)
goto error_rcur;
XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
}
XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
if (i == 1 && j == 1) {
/*
* Both the left and right records are valid. Choose the closer
......@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
return 0;
}
}
......@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
return 0;
}
......@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt(
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(i == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
(rec.ir_freecount == frec->ir_freecount));
return xfs_inobt_update(cur, &rec);
......@@ -1340,7 +1341,8 @@ xfs_dialloc(
* inode.
*/
if (mp->m_maxicount &&
mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
mp->m_maxicount) {
noroom = 1;
okalloc = 0;
}
......@@ -1475,14 +1477,14 @@ xfs_difree_inobt(
__func__, error);
goto error0;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error) {
xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
__func__, error);
goto error0;
}
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
/*
* Get the offset in the inode chunk.
*/
......@@ -1592,7 +1594,7 @@ xfs_difree_finobt(
* freed an inode in a previously fully allocated chunk. If not,
* something is out of sync.
*/
XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
ibtrec->ir_free, &i);
......@@ -1613,12 +1615,12 @@ xfs_difree_finobt(
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error;
XFS_WANT_CORRUPTED_GOTO(i == 1, error);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
rec.ir_free |= XFS_INOBT_MASK(offset);
rec.ir_freecount++;
XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
(rec.ir_freecount == ibtrec->ir_freecount),
error);
......
......@@ -111,14 +111,6 @@ xfs_mount_validate_sb(
bool check_inprogress,
bool check_version)
{
/*
* If the log device and data device have the
* same device number, the log is internal.
* Consequently, the sb_logstart should be non-zero. If
* we have a zero sb_logstart in this case, we may be trying to mount
* a volume filesystem in a non-volume manner.
*/
if (sbp->sb_magicnum != XFS_SB_MAGIC) {
xfs_warn(mp, "bad magic number");
return -EWRONGFS;
......@@ -743,17 +735,15 @@ xfs_initialize_perag_data(
btree += pag->pagf_btreeblks;
xfs_perag_put(pag);
}
/*
* Overwrite incore superblock counters with just-read data
*/
/* Overwrite incore superblock counters with just-read data */
spin_lock(&mp->m_sb_lock);
sbp->sb_ifree = ifree;
sbp->sb_icount = ialloc;
sbp->sb_fdblocks = bfree + bfreelst + btree;
spin_unlock(&mp->m_sb_lock);
/* Fixup the per-cpu counters as well. */
xfs_icsb_reinit_counters(mp);
xfs_reinit_percpu_counters(mp);
return 0;
}
......@@ -771,6 +761,10 @@ xfs_log_sb(
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
......
This diff is collapsed.
......@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive(
int size;
int tmp;
int i;
struct xfs_mount *mp = bp->b_target->bt_mount;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
/*
* Count the number of "remote" value extents.
......
......@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
int error, i;
struct xfs_buf *bp;
struct xfs_inode *dp = context->dp;
struct xfs_mount *mp = dp->i_mount;
trace_xfs_attr_node_list(context);
......@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo,
&leafhdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
if (cursor->hashval > be32_to_cpu(
entries[leafhdr.count - 1].hashval)) {
......@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
xfs_trans_brelse(NULL, bp);
return error;
}
xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (context->seen_enough || leafhdr.forw == 0)
break;
cursor->blkno = leafhdr.forw;
......@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int(
struct xfs_attr_leaf_entry *entry;
int retval;
int i;
struct xfs_mount *mp = context->dp->i_mount;
trace_xfs_attr_list_leaf(context);
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
cursor = context->cursor;
......
......@@ -1376,22 +1376,19 @@ xfs_zero_file_space(
}
/*
* xfs_collapse_file_space()
* This routine frees disk space and shift extent for the given file.
* The first thing we do is to free data blocks in the specified range
* by calling xfs_free_file_space(). It would also sync dirty data
* and invalidate page cache over the region on which collapse range
* is working. And Shift extent records to the left to cover a hole.
* RETURNS:
* 0 on success
* errno on error
*
* @next_fsb will keep track of the extent currently undergoing shift.
* @stop_fsb will keep track of the extent at which we have to stop.
* If we are shifting left, we will start with block (offset + len) and
* shift each extent till last extent.
* If we are shifting right, we will start with last extent inside file space
* and continue until we reach the block corresponding to offset.
*/
int
xfs_collapse_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t len)
static int
xfs_shift_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t len,
enum shift_direction direction)
{
int done = 0;
struct xfs_mount *mp = ip->i_mount;
......@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
int committed;
xfs_fileoff_t start_fsb;
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
trace_xfs_collapse_file_space(ip);
if (direction == SHIFT_LEFT) {
next_fsb = XFS_B_TO_FSB(mp, offset + len);
stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
} else {
/*
* If right shift, delegate the work of initialization of
* next_fsb to xfs_bmap_shift_extent as it has ilock held.
*/
next_fsb = NULLFSBLOCK;
stop_fsb = XFS_B_TO_FSB(mp, offset);
}
next_fsb = XFS_B_TO_FSB(mp, offset + len);
shift_fsb = XFS_B_TO_FSB(mp, len);
error = xfs_free_file_space(ip, offset, len);
if (error)
return error;
/*
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
* into the accessible region of the file.
......@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
/*
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from the collapse range to EOF. The
* free of the collapse range above might have already done some of
* this, but we shouldn't rely on it to do anything outside of the range
* that was freed.
* about to shift down every extent from offset to EOF.
*/
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
offset + len, -1);
offset, -1);
if (error)
return error;
error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
(offset + len) >> PAGE_CACHE_SHIFT, -1);
offset >> PAGE_CACHE_SHIFT, -1);
if (error)
return error;
/*
* The extent shiting code works on extent granularity. So, if
* stop_fsb is not the starting block of extent, we need to split
* the extent at stop_fsb.
*/
if (direction == SHIFT_RIGHT) {
error = xfs_bmap_split_extent(ip, stop_fsb);
if (error)
return error;
}
while (!error && !done) {
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
/*
......@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
if (error)
goto out;
xfs_trans_ijoin(tp, ip, 0);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_bmap_init(&free_list, &first_block);
......@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
* We are using the write transaction in which max 2 bmbt
* updates are allowed
*/
start_fsb = next_fsb;
error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
&done, &next_fsb, &first_block, &free_list,
XFS_BMAP_MAX_SHIFT_EXTENTS);
error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
&done, stop_fsb, &first_block, &free_list,
direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
if (error)
goto out;
......@@ -1484,17 +1493,69 @@ xfs_collapse_file_space(
goto out;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
return error;
out:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
/*
* xfs_collapse_file_space()
* This routine frees disk space and shift extent for the given file.
* The first thing we do is to free data blocks in the specified range
* by calling xfs_free_file_space(). It would also sync dirty data
* and invalidate page cache over the region on which collapse range
* is working. And Shift extent records to the left to cover a hole.
* RETURNS:
* 0 on success
* errno on error
*
*/
int
xfs_collapse_file_space(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t len)
{
int error;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
trace_xfs_collapse_file_space(ip);
error = xfs_free_file_space(ip, offset, len);
if (error)
return error;
return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
}
/*
* xfs_insert_file_space()
* This routine create hole space by shifting extents for the given file.
* The first thing we do is to sync dirty data and invalidate page cache
* over the region on which insert range is working. And split an extent
* to two extents at given offset by calling xfs_bmap_split_extent.
* And shift all extent records which are laying between [offset,
* last allocated extent] to the right to reserve hole range.
* RETURNS:
* 0 on success
* errno on error
*/
int
xfs_insert_file_space(
struct xfs_inode *ip,
loff_t offset,
loff_t len)
{
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
trace_xfs_insert_file_space(ip);
return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
}
/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
......@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush(
/* Verify O_DIRECT for ftmp */
if (VFS_I(ip)->i_mapping->nrpages)
return -EINVAL;
/*
* Don't try to swap extents on mmap()d files because we can't lock
* out races against page faults safely.
*/
if (mapping_mapped(VFS_I(ip)->i_mapping))
return -EBUSY;
return 0;
}
......@@ -1633,13 +1687,14 @@ xfs_swap_extents(
}
/*
* Lock up the inodes against other IO and truncate to begin with.
* Then we can ensure the inodes are flushed and have no page cache
* safely. Once we have done this we can take the ilocks and do the rest
* of the checks.
* Lock the inodes against other IO, page faults and truncate to
* begin with. Then we can ensure the inodes are flushed and have no
* page cache safely. Once we have done this we can take the ilocks and
* do the rest of the checks.
*/
lock_flags = XFS_IOLOCK_EXCL;
lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
......@@ -1666,8 +1721,16 @@ xfs_swap_extents(
xfs_trans_cancel(tp, 0);
goto out_unlock;
}
/*
* Lock and join the inodes to the tansaction so that transaction commit
* or cancel will unlock the inodes from this point onwards.
*/
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
lock_flags |= XFS_ILOCK_EXCL;
xfs_trans_ijoin(tp, ip, lock_flags);
xfs_trans_ijoin(tp, tip, lock_flags);
/* Verify all data are being swapped */
if (sxp->sx_offset != 0 ||
......@@ -1720,9 +1783,6 @@ xfs_swap_extents(
goto out_trans_cancel;
}
xfs_trans_ijoin(tp, ip, lock_flags);
xfs_trans_ijoin(tp, tip, lock_flags);
/*
* Before we've swapped the forks, lets set the owners of the forks
* appropriately. We have to do this as we are demand paging the btree
......@@ -1856,5 +1916,5 @@ xfs_swap_extents(
out_trans_cancel:
xfs_trans_cancel(tp, 0);
goto out_unlock;
goto out;
}
......@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
......
......@@ -537,9 +537,9 @@ xfs_buf_item_push(
/* has a previous flush failed due to IO errors? */
if ((bp->b_flags & XBF_WRITE_FAIL) &&
___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
xfs_warn(bp->b_target->bt_mount,
"Detected failing async write on buffer block 0x%llx. Retrying async write.",
"Failing async write on buffer block 0x%llx. Retrying async write.",
(long long)bp->b_bn);
}
......
......@@ -84,7 +84,7 @@ xfs_trim_extents(
error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
if (error)
goto out_del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
/*
......
......@@ -131,7 +131,7 @@ xfs_error_report(
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
"Internal error %s at line %d of file %s. Caller %pF",
"Internal error %s at line %d of file %s. Caller %pS",
tag, linenum, filename, ra);
xfs_stack_trace();
......
......@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
/*
* Macros to set EFSCORRUPTED & return/branch.
*/
#define XFS_WANT_CORRUPTED_GOTO(x,l) \
#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \
{ \
int fs_is_ok = (x); \
ASSERT(fs_is_ok); \
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
XFS_ERRLEVEL_LOW, NULL); \
XFS_ERRLEVEL_LOW, mp); \
error = -EFSCORRUPTED; \
goto l; \
} \
}
#define XFS_WANT_CORRUPTED_RETURN(x) \
#define XFS_WANT_CORRUPTED_RETURN(mp, x) \
{ \
int fs_is_ok = (x); \
ASSERT(fs_is_ok); \
if (unlikely(!fs_is_ok)) { \
XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
XFS_ERRLEVEL_LOW, NULL); \
XFS_ERRLEVEL_LOW, mp); \
return -EFSCORRUPTED; \
} \
}
......
......@@ -559,7 +559,7 @@ xfs_file_aio_write_checks(
if (error <= 0)
return error;
error = xfs_break_layouts(inode, iolock);
error = xfs_break_layouts(inode, iolock, true);
if (error)
return error;
......@@ -569,21 +569,42 @@ xfs_file_aio_write_checks(
* write. If zeroing is needed and we are currently holding the
* iolock shared, we need to update it to exclusive which implies
* having to redo all checks before.
*
* We need to serialise against EOF updates that occur in IO
* completions here. We want to make sure that nobody is changing the
* size while we do this check until we have placed an IO barrier (i.e.
* hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
* The spinlock effectively forms a memory barrier once we have the
* XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
* and hence be able to correctly determine if we need to run zeroing.
*/
spin_lock(&ip->i_flags_lock);
if (iocb->ki_pos > i_size_read(inode)) {
bool zero = false;
spin_unlock(&ip->i_flags_lock);
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
iov_iter_reexpand(from, count);
/*
* We now have an IO submission barrier in place, but
* AIO can do EOF updates during IO completion and hence
* we now need to wait for all of them to drain. Non-AIO
* DIO will have drained before we are given the
* XFS_IOLOCK_EXCL, and so for most cases this wait is a
* no-op.
*/
inode_dio_wait(inode);
goto restart;
}
error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
if (error)
return error;
}
} else
spin_unlock(&ip->i_flags_lock);
/*
* Updating the timestamps will grab the ilock again from
......@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
int iolock;
size_t count = iov_iter_count(from);
loff_t pos = iocb->ki_pos;
loff_t end;
struct iov_iter data;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
......@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
goto out;
count = iov_iter_count(from);
pos = iocb->ki_pos;
end = pos + count - 1;
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
pos, pos + count - 1);
pos, end);
if (ret)
goto out;
/*
......@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
*/
ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
pos >> PAGE_CACHE_SHIFT,
(pos + count - 1) >> PAGE_CACHE_SHIFT);
end >> PAGE_CACHE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
......@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, from, pos);
data = *from;
ret = mapping->a_ops->direct_IO(iocb, &data, pos);
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
pos >> PAGE_CACHE_SHIFT,
end >> PAGE_CACHE_SHIFT);
}
if (ret > 0) {
pos += ret;
iov_iter_advance(from, ret);
iocb->ki_pos = pos;
}
out:
xfs_rw_iunlock(ip, iolock);
......@@ -822,6 +860,11 @@ xfs_file_write_iter(
return ret;
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE)
STATIC long
xfs_file_fallocate(
struct file *file,
......@@ -835,18 +878,21 @@ xfs_file_fallocate(
enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL;
loff_t new_size = 0;
bool do_file_insert = 0;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
if (mode & ~XFS_FALLOC_FL_SUPPORTED)
return -EOPNOTSUPP;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(inode, &iolock);
error = xfs_break_layouts(inode, &iolock, false);
if (error)
goto out_unlock;
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
iolock |= XFS_MMAPLOCK_EXCL;
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
......@@ -873,6 +919,27 @@ xfs_file_fallocate(
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
goto out_unlock;
}
/* check the new inode size does not wrap through zero */
if (new_size > inode->i_sb->s_maxbytes) {
error = -EFBIG;
goto out_unlock;
}
/* Offset should be less than i_size */
if (offset >= i_size_read(inode)) {
error = -EINVAL;
goto out_unlock;
}
do_file_insert = 1;
} else {
flags |= XFS_PREALLOC_SET;
......@@ -907,8 +974,19 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
error = xfs_setattr_size(ip, &iattr);
if (error)
goto out_unlock;
}
/*
* Perform hole insertion now that the file size has been
* updated so that if we crash during the operation we don't
* leave shifted extents past EOF and hence losing access to
* the data that is contained within them.
*/
if (do_file_insert)
error = xfs_insert_file_space(ip, offset, len);
out_unlock:
xfs_iunlock(ip, iolock);
return error;
......@@ -996,20 +1074,6 @@ xfs_file_mmap(
return 0;
}
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
* page, which means we can do correct delalloc accounting (ENOSPC
* checking!) and unwritten extent mapping.
*/
STATIC int
xfs_vm_page_mkwrite(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
return block_page_mkwrite(vma, vmf, xfs_get_blocks);
}
/*
* This type is designed to indicate the type of offset we would like
* to search from page cache for xfs_seek_hole_data().
......@@ -1385,6 +1449,55 @@ xfs_file_llseek(
}
}
/*
* Locking for serialisation of IO during page faults. This results in a lock
* ordering of:
*
* mmap_sem (MM)
* i_mmap_lock (XFS - truncate serialisation)
* page_lock (MM)
* i_lock (XFS - extent map serialisation)
*/
STATIC int
xfs_filemap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
int error;
trace_xfs_filemap_fault(ip);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
error = filemap_fault(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
return error;
}
/*
* mmap()d file has taken write protection fault and is being made writable. We
* can set the page state up correctly for a writable page, which means we can
* do correct delalloc accounting (ENOSPC checking!) and unwritten extent
* mapping.
*/
STATIC int
xfs_filemap_page_mkwrite(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
int error;
trace_xfs_filemap_page_mkwrite(ip);
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
return error;
}
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
.read_iter = xfs_file_read_iter,
......@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
};
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
.fault = xfs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
.page_mkwrite = xfs_filemap_page_mkwrite,
};
......@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
pip = xfs_filestream_get_parent(ip);
if (!pip)
goto out;
return NULLAGNUMBER;
mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
if (mru) {
......
......@@ -637,12 +637,13 @@ xfs_fs_counts(
xfs_mount_t *mp,
xfs_fsop_counts_t *cnt)
{
xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
XFS_ALLOC_SET_ASIDE(mp);
spin_lock(&mp->m_sb_lock);
cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents;
cnt->freeino = mp->m_sb.sb_ifree;
cnt->allocino = mp->m_sb.sb_icount;
spin_unlock(&mp->m_sb_lock);
return 0;
}
......@@ -692,14 +693,9 @@ xfs_reserve_blocks(
* what to do. This means that the amount of free space can
* change while we do this, so we need to retry if we end up
* trying to reserve more space than is available.
*
* We also use the xfs_mod_incore_sb() interface so that we
* don't have to care about whether per cpu counter are
* enabled, disabled or even compiled in....
*/
retry:
spin_lock(&mp->m_sb_lock);
xfs_icsb_sync_counters_locked(mp, 0);
/*
* If our previous reservation was larger than the current value,
......@@ -716,7 +712,8 @@ xfs_reserve_blocks(
} else {
__int64_t free;
free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
free = percpu_counter_sum(&mp->m_fdblocks) -
XFS_ALLOC_SET_ASIDE(mp);
if (!free)
goto out; /* ENOSPC and fdblks_delta = 0 */
......@@ -755,8 +752,7 @@ xfs_reserve_blocks(
* the extra reserve blocks from the reserve.....
*/
int error;
error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
fdblks_delta, 0);
error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
if (error == -ENOSPC)
goto retry;
}
......
......@@ -439,11 +439,11 @@ xfs_iget(
*ipp = ip;
/*
* If we have a real type for an on-disk inode, we can set ops(&unlock)
* If we have a real type for an on-disk inode, we can setup the inode
* now. If it's a new inode being created, xfs_ialloc will handle it.
*/
if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
xfs_setup_inode(ip);
xfs_setup_existing_inode(ip);
return 0;
out_error_or_again:
......
This diff is collapsed.
......@@ -56,6 +56,7 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
mrlock_t i_mmaplock; /* inode mmap IO lock */
atomic_t i_pincount; /* inode pin count */
spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */
......@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHARED (1<<1)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_SHARED (1<<3)
#define XFS_MMAPLOCK_EXCL (1<<4)
#define XFS_MMAPLOCK_SHARED (1<<5)
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
#define XFS_LOCK_FLAGS \
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
{ XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
{ XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
/*
......@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_SHIFT 16
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_SHIFT 20
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
#define XFS_IOLOCK_DEP_MASK 0x000f0000
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
#define XFS_ILOCK_DEP_MASK 0xff000000
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
>> XFS_IOLOCK_SHIFT)
#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
>> XFS_MMAPLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
>> XFS_ILOCK_SHIFT)
/*
* For multiple groups support: if S_ISGID bit is set in the parent
......@@ -391,6 +406,28 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
/* from xfs_iops.c */
/*
* When setting up a newly allocated inode, we need to call
* xfs_finish_inode_setup() once the inode is fully instantiated at
* the VFS level to prevent the rest of the world seeing the inode
* before we've completed instantiation. Otherwise we can do it
* the moment the inode lookup is complete.
*/
extern void xfs_setup_inode(struct xfs_inode *ip);
static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(VFS_I(ip));
}
static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
{
xfs_setup_inode(ip);
xfs_finish_inode_setup(ip);
}
#define IHOLD(ip) \
do { \
ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
......
......@@ -631,7 +631,7 @@ xfs_ioc_space(
if (filp->f_flags & O_DSYNC)
flags |= XFS_PREALLOC_SYNC;
if (ioflags & XFS_IO_INVIS)
if (ioflags & XFS_IO_INVIS)
flags |= XFS_PREALLOC_INVISIBLE;
error = mnt_want_write_file(filp);
......@@ -639,10 +639,13 @@ xfs_ioc_space(
return error;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(inode, &iolock);
error = xfs_break_layouts(inode, &iolock, false);
if (error)
goto out_unlock;
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
iolock |= XFS_MMAPLOCK_EXCL;
switch (bf->l_whence) {
case 0: /*SEEK_SET*/
break;
......
......@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
alloc_blocks);
xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
freesp = mp->m_sb.sb_fdblocks;
freesp = percpu_counter_read_positive(&mp->m_fdblocks);
if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
shift = 2;
if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
......
......@@ -187,6 +187,8 @@ xfs_generic_create(
else
d_instantiate(dentry, inode);
xfs_finish_inode_setup(ip);
out_free_acl:
if (default_acl)
posix_acl_release(default_acl);
......@@ -195,6 +197,7 @@ xfs_generic_create(
return error;
out_cleanup_inode:
xfs_finish_inode_setup(ip);
if (!tmpfile)
xfs_cleanup_inode(dir, inode, dentry);
iput(inode);
......@@ -367,9 +370,11 @@ xfs_vn_symlink(
goto out_cleanup_inode;
d_instantiate(dentry, inode);
xfs_finish_inode_setup(cip);
return 0;
out_cleanup_inode:
xfs_finish_inode_setup(cip);
xfs_cleanup_inode(dir, inode, dentry);
iput(inode);
out:
......@@ -389,7 +394,7 @@ xfs_vn_rename(
struct xfs_name oname;
struct xfs_name nname;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
/* if we are exchanging files, we need to set i_mode of both files */
......@@ -766,6 +771,7 @@ xfs_setattr_size(
return error;
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
......@@ -829,55 +835,27 @@ xfs_setattr_size(
inode_dio_wait(inode);
/*
* Do all the page cache truncate work outside the transaction context
* as the "lock" order is page lock->log space reservation. i.e.
* locking pages inside the transaction can ABBA deadlock with
* writeback. We have to do the VFS inode size update before we truncate
* the pagecache, however, to avoid racing with page faults beyond the
* new EOF they are not serialised against truncate operations except by
* page locks and size updates.
* We've already locked out new page faults, so now we can safely remove
* pages from the page cache knowing they won't get refaulted until we
* drop the XFS_MMAP_EXCL lock after the extent manipulations are
* complete. The truncate_setsize() call also cleans partial EOF page
* PTEs on extending truncates and hence ensures sub-page block size
* filesystems are correctly handled, too.
*
* Hence we are in a situation where a truncate can fail with ENOMEM
* from xfs_trans_reserve(), but having already truncated the in-memory
* version of the file (i.e. made user visible changes). There's not
* much we can do about this, except to hope that the caller sees ENOMEM
* and retries the truncate operation.
* We have to do all the page cache truncate work outside the
* transaction context as the "lock" order is page lock->log space
* reservation as defined by extent allocation in the writeback path.
* Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
* having already truncated the in-memory version of the file (i.e. made
* user visible changes). There's not much we can do about this, except
* to hope that the caller sees ENOMEM and retries the truncate
* operation.
*/
error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
return error;
truncate_setsize(inode, newsize);
/*
* The "we can't serialise against page faults" pain gets worse.
*
* If the file is mapped then we have to clean the page at the old EOF
* when extending the file. Extending the file can expose changes the
* underlying page mapping (e.g. from beyond EOF to a hole or
* unwritten), and so on the next attempt to write to that page we need
* to remap it for write. i.e. we need .page_mkwrite() to be called.
* Hence we need to clean the page to clean the pte and so a new write
* fault will be triggered appropriately.
*
* If we do it before we change the inode size, then we can race with a
* page fault that maps the page with exactly the same problem. If we do
* it after we change the file size, then a new page fault can come in
* and allocate space before we've run the rest of the truncate
* transaction. That's kinda grotesque, but it's better than have data
* over a hole, and so that's the lesser evil that has been chosen here.
*
* The real solution, however, is to have some mechanism for locking out
* page faults while a truncate is in progress.
*/
if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
error = filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
round_down(oldsize, PAGE_CACHE_SIZE),
round_up(oldsize, PAGE_CACHE_SIZE) - 1);
if (error)
return error;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
if (error)
......@@ -975,9 +953,13 @@ xfs_vn_setattr(
uint iolock = XFS_IOLOCK_EXCL;
xfs_ilock(ip, iolock);
error = xfs_break_layouts(dentry->d_inode, &iolock);
if (!error)
error = xfs_break_layouts(dentry->d_inode, &iolock, true);
if (!error) {
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
iolock |= XFS_MMAPLOCK_EXCL;
error = xfs_setattr_size(ip, iattr);
}
xfs_iunlock(ip, iolock);
} else {
error = xfs_setattr_nonsize(ip, iattr, 0);
......@@ -1228,16 +1210,12 @@ xfs_diflags_to_iflags(
}
/*
* Initialize the Linux inode, set up the operation vectors and
* unlock the inode.
* Initialize the Linux inode and set up the operation vectors.
*
* When reading existing inodes from disk this is called directly
* from xfs_iget, when creating a new inode it is called from
* xfs_ialloc after setting up the inode.
*
* We are always called with an uninitialised linux inode here.
* We need to initialise the necessary fields and take a reference
* on it.
* When reading existing inodes from disk this is called directly from xfs_iget,
* when creating a new inode it is called from xfs_ialloc after setting up the
* inode. These callers have different criteria for clearing XFS_INEW, so leave
* it up to the caller to deal with unlocking the inode appropriately.
*/
void
xfs_setup_inode(
......@@ -1324,9 +1302,4 @@ xfs_setup_inode(
inode_has_no_xattr(inode);
cache_no_acl(inode);
}
xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(inode);
}
......@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
extern void xfs_setup_inode(struct xfs_inode *);
/*
* Internal setattr interfaces.
*/
......
......@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk(
error = xfs_inobt_get_rec(cur, irec, &stat);
if (error)
return error;
XFS_WANT_CORRUPTED_RETURN(stat == 1);
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
/* Check if the record contains the inode in request */
if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
......
......@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
#undef XFS_NATIVE_HOST
#endif
/*
* Feature macros (disable/enable)
*/
#ifdef CONFIG_SMP
#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#else
#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#endif
#define irix_sgid_inherit xfs_params.sgid_inherit.val
#define irix_symlink_mode xfs_params.symlink_mode.val
#define xfs_panic_mask xfs_params.panic_mask.val
......
......@@ -4463,10 +4463,10 @@ xlog_do_recover(
xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
ASSERT(xfs_sb_good_version(sbp));
xfs_reinit_percpu_counters(log->l_mp);
xfs_buf_relse(bp);
/* We've re-read the superblock so re-initialize per-cpu counters */
xfs_icsb_reinit_counters(log->l_mp);
xlog_recover_check_summary(log);
......
This diff is collapsed.
......@@ -18,8 +18,6 @@
#ifndef __XFS_MOUNT_H__
#define __XFS_MOUNT_H__
#ifdef __KERNEL__
struct xlog;
struct xfs_inode;
struct xfs_mru_cache;
......@@ -29,44 +27,6 @@ struct xfs_quotainfo;
struct xfs_dir_ops;
struct xfs_da_geometry;
#ifdef HAVE_PERCPU_SB
/*
* Valid per-cpu incore superblock counters. Note that if you add new counters,
* you may need to define new counter disabled bit field descriptors as there
* are more possible fields in the superblock that can fit in a bitfield on a
* 32 bit platform. The XFS_SBS_* values for the current current counters just
* fit.
*/
typedef struct xfs_icsb_cnts {
uint64_t icsb_fdblocks;
uint64_t icsb_ifree;
uint64_t icsb_icount;
unsigned long icsb_flags;
} xfs_icsb_cnts_t;
#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
extern int xfs_icsb_init_counters(struct xfs_mount *);
extern void xfs_icsb_reinit_counters(struct xfs_mount *);
extern void xfs_icsb_destroy_counters(struct xfs_mount *);
extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
int64_t, int);
#else
#define xfs_icsb_init_counters(mp) (0)
#define xfs_icsb_destroy_counters(mp) do { } while (0)
#define xfs_icsb_reinit_counters(mp) do { } while (0)
#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
xfs_mod_incore_sb(mp, field, delta, rsvd)
#endif
/* dynamic preallocation free space thresholds, 5% down to 1% */
enum {
XFS_LOWSP_1_PCNT = 0,
......@@ -81,8 +41,13 @@ typedef struct xfs_mount {
struct super_block *m_super;
xfs_tid_t m_tid; /* next unused tid for fs */
struct xfs_ail *m_ail; /* fs active log item list */
xfs_sb_t m_sb; /* copy of fs superblock */
struct xfs_sb m_sb; /* copy of fs superblock */
spinlock_t m_sb_lock; /* sb counter lock */
struct percpu_counter m_icount; /* allocated inodes counter */
struct percpu_counter m_ifree; /* free inodes counter */
struct percpu_counter m_fdblocks; /* free block counter */
struct xfs_buf *m_sb_bp; /* buffer for superblock */
char *m_fsname; /* filesystem name */
int m_fsname_len; /* strlen of fs name */
......@@ -152,12 +117,6 @@ typedef struct xfs_mount {
const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
uint m_chsize; /* size of next field */
atomic_t m_active_trans; /* number trans frozen */
#ifdef HAVE_PERCPU_SB
xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
unsigned long m_icsb_counters; /* disabled per-cpu counters */
struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
struct mutex m_icsb_mutex; /* balancer sync lock */
#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
struct delayed_work m_eofblocks_work; /* background eof blocks
......@@ -300,35 +259,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
/*
* Per-cpu superblock locking functions
*/
#ifdef HAVE_PERCPU_SB
static inline void
xfs_icsb_lock(xfs_mount_t *mp)
{
mutex_lock(&mp->m_icsb_mutex);
}
static inline void
xfs_icsb_unlock(xfs_mount_t *mp)
{
mutex_unlock(&mp->m_icsb_mutex);
}
#else
#define xfs_icsb_lock(mp)
#define xfs_icsb_unlock(mp)
#endif
/*
* This structure is for use by the xfs_mod_incore_sb_batch() routine.
* xfs_growfs can specify a few fields which are more than int limit
*/
typedef struct xfs_mod_sb {
xfs_sb_field_t msb_field; /* Field to modify, see below */
int64_t msb_delta; /* Change to make to specified field */
} xfs_mod_sb_t;
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
......@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
xfs_agnumber_t *maxagi);
extern void xfs_unmountfs(xfs_mount_t *);
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
uint, int);
extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
bool reserved);
extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
extern int xfs_mount_log_sb(xfs_mount_t *);
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
......@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern void xfs_set_low_space_thresholds(struct xfs_mount *);
#endif /* __KERNEL__ */
#endif /* __XFS_MOUNT_H__ */
......@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
if (!mru || !mru->lists)
return -EINVAL;
if (radix_tree_preload(GFP_KERNEL))
if (radix_tree_preload(GFP_NOFS))
return -ENOMEM;
INIT_LIST_HEAD(&elem->list_node);
......
......@@ -31,7 +31,8 @@
int
xfs_break_layouts(
struct inode *inode,
uint *iolock)
uint *iolock,
bool with_imutex)
{
struct xfs_inode *ip = XFS_I(inode);
int error;
......@@ -40,8 +41,12 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
mutex_unlock(&inode->i_mutex);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, *iolock);
}
......
......@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
struct iattr *iattr);
int xfs_break_layouts(struct inode *inode, uint *iolock);
int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
#else
static inline int xfs_break_layouts(struct inode *inode, uint *iolock)
static inline int
xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
{
return 0;
}
......
......@@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
xfs_trans_t *tp;
int error;
int committed;
bool need_alloc = true;
*ip = NULL;
/*
......@@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
return error;
mp->m_sb.sb_gquotino = NULLFSINO;
mp->m_sb.sb_pquotino = NULLFSINO;
need_alloc = false;
}
}
......@@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
return error;
}
if (!*ip) {
if (need_alloc) {
error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
&committed);
if (error) {
......@@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
spin_unlock(&mp->m_sb_lock);
xfs_log_sb(tp);
if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_alert(mp, "%s failed (error %d)!", __func__, error);
return error;
}
return 0;
if (need_alloc)
xfs_finish_inode_setup(*ip);
return error;
}
......
This diff is collapsed.
......@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
extern const struct quotactl_ops xfs_quotactl_operations;
extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
#endif /* __XFS_SUPER_H__ */
......@@ -177,7 +177,7 @@ xfs_symlink(
int pathlen;
struct xfs_bmap_free free_list;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
bool unlock_dp_on_error = false;
uint cancel_flags;
int committed;
xfs_fileoff_t first_fsb;
......@@ -221,7 +221,7 @@ xfs_symlink(
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
goto std_return;
return error;
tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
......@@ -241,7 +241,7 @@ xfs_symlink(
}
if (error) {
cancel_flags = 0;
goto error_return;
goto out_trans_cancel;
}
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
......@@ -252,7 +252,7 @@ xfs_symlink(
*/
if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
error = -EPERM;
goto error_return;
goto out_trans_cancel;
}
/*
......@@ -261,7 +261,7 @@ xfs_symlink(
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0);
if (error)
goto error_return;
goto out_trans_cancel;
/*
* Check for ability to enter directory entry, if no space reserved.
......@@ -269,7 +269,7 @@ xfs_symlink(
if (!resblks) {
error = xfs_dir_canenter(tp, dp, link_name);
if (error)
goto error_return;
goto out_trans_cancel;
}
/*
* Initialize the bmap freelist prior to calling either
......@@ -282,15 +282,14 @@ xfs_symlink(
*/
error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
prid, resblks > 0, &ip, NULL);
if (error) {
if (error == -ENOSPC)
goto error_return;
goto error1;
}
if (error)
goto out_trans_cancel;
/*
* An error after we've joined dp to the transaction will result in the
* transaction cancel unlocking dp so don't do it explicitly in the
* Now we join the directory inode to the transaction. We do not do it
* earlier because xfs_dir_ialloc might commit the previous transaction
* (and release all the locks). An error from here on will result in
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
......@@ -330,7 +329,7 @@ xfs_symlink(
XFS_BMAPI_METADATA, &first_block, resblks,
mval, &nmaps, &free_list);
if (error)
goto error2;
goto out_bmap_cancel;
if (resblks)
resblks -= fs_blocks;
......@@ -348,7 +347,7 @@ xfs_symlink(
BTOBB(byte_cnt), 0);
if (!bp) {
error = -ENOMEM;
goto error2;
goto out_bmap_cancel;
}
bp->b_ops = &xfs_symlink_buf_ops;
......@@ -378,7 +377,7 @@ xfs_symlink(
error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
&first_block, &free_list, resblks);
if (error)
goto error2;
goto out_bmap_cancel;
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
......@@ -392,10 +391,13 @@ xfs_symlink(
}
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
goto error2;
}
if (error)
goto out_bmap_cancel;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error)
goto out_release_inode;
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
......@@ -403,20 +405,28 @@ xfs_symlink(
*ipp = ip;
return 0;
error2:
IRELE(ip);
error1:
out_bmap_cancel:
xfs_bmap_cancel(&free_list);
cancel_flags |= XFS_TRANS_ABORT;
error_return:
out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
if (ip) {
xfs_finish_inode_setup(ip);
IRELE(ip);
}
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
std_return:
return error;
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -21,4 +21,10 @@ struct space_resv {
#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE)
#endif /* _FALLOC_H_ */
......@@ -41,4 +41,21 @@
*/
#define FALLOC_FL_ZERO_RANGE 0x10
/*
* FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
* overwriting any existing data. The contents of the file beyond offset are
* shifted towards right by len bytes to create a hole. As such, this
* operation will increase the size of the file by len bytes.
*
* Different filesystems may implement different limitations on the granularity
* of the operation. Most will limit operations to filesystem block size
* boundaries, but this boundary may be larger or smaller depending on
* the filesystem and/or the configuration of the filesystem or file.
*
* Attempting to insert space using this flag at OR beyond the end of
* the file is considered an illegal operation - just use ftruncate(2) or
* fallocate(2) with mode 0 for such type of operations.
*/
#define FALLOC_FL_INSERT_RANGE 0x20
#endif /* _UAPI_FALLOC_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment