Commit c5850150 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: stop using the page cache to back the buffer cache
  xfs: register the inode cache shrinker before quotachecks
  xfs: xfs_trans_read_buf() should return an error on failure
  xfs: introduce inode cluster buffer trylocks for xfs_iflush
  vmap: flush vmap aliases when mapping fails
  xfs: preallocation transactions do not need to be synchronous

Fix up trivial conflicts in fs/xfs/linux-2.6/xfs_buf.c due to plug removal.
parents 243b422a 0e6e847f
This diff is collapsed.
...@@ -61,30 +61,11 @@ typedef enum { ...@@ -61,30 +61,11 @@ typedef enum {
#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
/* flags used only internally */ /* flags used only internally */
#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
#define _XBF_KMEM (1 << 20)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
/*
* Special flag for supporting metadata blocks smaller than a FSB.
*
* In this case we can have multiple xfs_buf_t on a single page and
* need to lock out concurrent xfs_buf_t readers as they only
* serialise access to the buffer.
*
* If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
* between reads of the page. Hence we can have one thread read the
* page and modify it, but then race with another thread that thinks
* the page is not up-to-date and hence reads it again.
*
* The result is that the first modifcation to the page is lost.
* This sort of AGF/AGI reading race can happen when unlinking inodes
* that require truncation and results in the AGI unlinked list
* modifications being lost.
*/
#define _XBF_PAGE_LOCKED (1 << 22)
typedef unsigned int xfs_buf_flags_t; typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \ #define XFS_BUF_FLAGS \
...@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t; ...@@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_LOCK, "LOCK" }, /* should never be set */\ { XBF_LOCK, "LOCK" }, /* should never be set */\
{ XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
{ XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
{ _XBF_PAGE_CACHE, "PAGE_CACHE" }, \
{ _XBF_PAGES, "PAGES" }, \ { _XBF_PAGES, "PAGES" }, \
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \ { _XBF_KMEM, "KMEM" }, \
{ _XBF_PAGE_LOCKED, "PAGE_LOCKED" } { _XBF_DELWRI_Q, "DELWRI_Q" }
typedef enum { typedef enum {
XBT_FORCE_SLEEP = 0, XBT_FORCE_SLEEP = 0,
...@@ -120,7 +99,7 @@ typedef struct xfs_bufhash { ...@@ -120,7 +99,7 @@ typedef struct xfs_bufhash {
typedef struct xfs_buftarg { typedef struct xfs_buftarg {
dev_t bt_dev; dev_t bt_dev;
struct block_device *bt_bdev; struct block_device *bt_bdev;
struct address_space *bt_mapping; struct backing_dev_info *bt_bdi;
struct xfs_mount *bt_mount; struct xfs_mount *bt_mount;
unsigned int bt_bsize; unsigned int bt_bsize;
unsigned int bt_sshift; unsigned int bt_sshift;
...@@ -139,17 +118,6 @@ typedef struct xfs_buftarg { ...@@ -139,17 +118,6 @@ typedef struct xfs_buftarg {
unsigned int bt_lru_nr; unsigned int bt_lru_nr;
} xfs_buftarg_t; } xfs_buftarg_t;
/*
* xfs_buf_t: Buffer structure for pagecache-based buffers
*
* This buffer structure is used by the pagecache buffer management routines
* to refer to an assembly of pages forming a logical buffer.
*
* The buffer structure is used on a temporary basis only, and discarded when
* released. The real data storage is recorded in the pagecache. Buffers are
* hashed to the block device on which the file system resides.
*/
struct xfs_buf; struct xfs_buf;
typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
......
...@@ -896,6 +896,7 @@ xfs_file_fallocate( ...@@ -896,6 +896,7 @@ xfs_file_fallocate(
xfs_flock64_t bf; xfs_flock64_t bf;
xfs_inode_t *ip = XFS_I(inode); xfs_inode_t *ip = XFS_I(inode);
int cmd = XFS_IOC_RESVSP; int cmd = XFS_IOC_RESVSP;
int attr_flags = XFS_ATTR_NOLOCK;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -918,7 +919,10 @@ xfs_file_fallocate( ...@@ -918,7 +919,10 @@ xfs_file_fallocate(
goto out_unlock; goto out_unlock;
} }
error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); if (file->f_flags & O_DSYNC)
attr_flags |= XFS_ATTR_SYNC;
error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
if (error) if (error)
goto out_unlock; goto out_unlock;
......
...@@ -624,6 +624,10 @@ xfs_ioc_space( ...@@ -624,6 +624,10 @@ xfs_ioc_space(
if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
attr_flags |= XFS_ATTR_NONBLOCK; attr_flags |= XFS_ATTR_NONBLOCK;
if (filp->f_flags & O_DSYNC)
attr_flags |= XFS_ATTR_SYNC;
if (ioflags & IO_INVIS) if (ioflags & IO_INVIS)
attr_flags |= XFS_ATTR_DMI; attr_flags |= XFS_ATTR_DMI;
......
...@@ -1078,7 +1078,7 @@ xfs_fs_write_inode( ...@@ -1078,7 +1078,7 @@ xfs_fs_write_inode(
error = 0; error = 0;
goto out_unlock; goto out_unlock;
} }
error = xfs_iflush(ip, 0); error = xfs_iflush(ip, SYNC_TRYLOCK);
} }
out_unlock: out_unlock:
...@@ -1539,10 +1539,14 @@ xfs_fs_fill_super( ...@@ -1539,10 +1539,14 @@ xfs_fs_fill_super(
if (error) if (error)
goto out_free_sb; goto out_free_sb;
error = xfs_mountfs(mp); /*
if (error) * we must configure the block size in the superblock before we run the
goto out_filestream_unmount; * full mount process as the mount process can lookup and cache inodes.
* For the same reason we must also initialise the syncd and register
* the inode cache shrinker so that inodes can be reclaimed during
* operations like a quotacheck that iterate all inodes in the
* filesystem.
*/
sb->s_magic = XFS_SB_MAGIC; sb->s_magic = XFS_SB_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
...@@ -1550,6 +1554,16 @@ xfs_fs_fill_super( ...@@ -1550,6 +1554,16 @@ xfs_fs_fill_super(
sb->s_time_gran = 1; sb->s_time_gran = 1;
set_posix_acl_flag(sb); set_posix_acl_flag(sb);
error = xfs_syncd_init(mp);
if (error)
goto out_filestream_unmount;
xfs_inode_shrinker_register(mp);
error = xfs_mountfs(mp);
if (error)
goto out_syncd_stop;
root = igrab(VFS_I(mp->m_rootip)); root = igrab(VFS_I(mp->m_rootip));
if (!root) { if (!root) {
error = ENOENT; error = ENOENT;
...@@ -1565,14 +1579,11 @@ xfs_fs_fill_super( ...@@ -1565,14 +1579,11 @@ xfs_fs_fill_super(
goto fail_vnrele; goto fail_vnrele;
} }
error = xfs_syncd_init(mp);
if (error)
goto fail_vnrele;
xfs_inode_shrinker_register(mp);
return 0; return 0;
out_syncd_stop:
xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
out_filestream_unmount: out_filestream_unmount:
xfs_filestream_unmount(mp); xfs_filestream_unmount(mp);
out_free_sb: out_free_sb:
...@@ -1596,6 +1607,9 @@ xfs_fs_fill_super( ...@@ -1596,6 +1607,9 @@ xfs_fs_fill_super(
} }
fail_unmount: fail_unmount:
xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
/* /*
* Blow away any referenced inode in the filestreams cache. * Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive * This can and will cause log traffic as inodes go inactive
......
...@@ -761,8 +761,10 @@ xfs_reclaim_inode( ...@@ -761,8 +761,10 @@ xfs_reclaim_inode(
struct xfs_perag *pag, struct xfs_perag *pag,
int sync_mode) int sync_mode)
{ {
int error = 0; int error;
restart:
error = 0;
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
if (!xfs_iflock_nowait(ip)) { if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT)) if (!(sync_mode & SYNC_WAIT))
...@@ -788,9 +790,31 @@ xfs_reclaim_inode( ...@@ -788,9 +790,31 @@ xfs_reclaim_inode(
if (xfs_inode_clean(ip)) if (xfs_inode_clean(ip))
goto reclaim; goto reclaim;
/* Now we have an inode that needs flushing */ /*
error = xfs_iflush(ip, sync_mode); * Now we have an inode that needs flushing.
*
* We do a nonblocking flush here even if we are doing a SYNC_WAIT
* reclaim as we can deadlock with inode cluster removal.
* xfs_ifree_cluster() can lock the inode buffer before it locks the
* ip->i_lock, and we are doing the exact opposite here. As a result,
* doing a blocking xfs_itobp() to get the cluster buffer will result
* in an ABBA deadlock with xfs_ifree_cluster().
*
* As xfs_ifree_cluser() must gather all inodes that are active in the
* cache to mark them stale, if we hit this case we don't actually want
* to do IO here - we want the inode marked stale so we can simply
* reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
* just unlock the inode, back off and try again. Hopefully the next
* pass through will see the stale flag set on the inode.
*/
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
if (sync_mode & SYNC_WAIT) { if (sync_mode & SYNC_WAIT) {
if (error == EAGAIN) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* backoff longer than in xfs_ifree_cluster */
delay(2);
goto restart;
}
xfs_iflock(ip); xfs_iflock(ip);
goto reclaim; goto reclaim;
} }
......
...@@ -2835,7 +2835,7 @@ xfs_iflush( ...@@ -2835,7 +2835,7 @@ xfs_iflush(
* Get the buffer containing the on-disk inode. * Get the buffer containing the on-disk inode.
*/ */
error = xfs_itobp(mp, NULL, ip, &dip, &bp, error = xfs_itobp(mp, NULL, ip, &dip, &bp,
(flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
if (error || !bp) { if (error || !bp) {
xfs_ifunlock(ip); xfs_ifunlock(ip);
return error; return error;
......
...@@ -760,11 +760,11 @@ xfs_inode_item_push( ...@@ -760,11 +760,11 @@ xfs_inode_item_push(
* Push the inode to it's backing buffer. This will not remove the * Push the inode to it's backing buffer. This will not remove the
* inode from the AIL - a further push will be required to trigger a * inode from the AIL - a further push will be required to trigger a
* buffer push. However, this allows all the dirty inodes to be pushed * buffer push. However, this allows all the dirty inodes to be pushed
* to the buffer before it is pushed to disk. THe buffer IO completion * to the buffer before it is pushed to disk. The buffer IO completion
* will pull th einode from the AIL, mark it clean and unlock the flush * will pull the inode from the AIL, mark it clean and unlock the flush
* lock. * lock.
*/ */
(void) xfs_iflush(ip, 0); (void) xfs_iflush(ip, SYNC_TRYLOCK);
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
} }
......
...@@ -383,7 +383,8 @@ xfs_trans_read_buf( ...@@ -383,7 +383,8 @@ xfs_trans_read_buf(
bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
if (bp == NULL) { if (bp == NULL) {
*bpp = NULL; *bpp = NULL;
return 0; return (flags & XBF_TRYLOCK) ?
0 : XFS_ERROR(ENOMEM);
} }
if (XFS_BUF_GETERROR(bp) != 0) { if (XFS_BUF_GETERROR(bp) != 0) {
XFS_BUF_SUPER_STALE(bp); XFS_BUF_SUPER_STALE(bp);
......
...@@ -2831,7 +2831,8 @@ xfs_change_file_space( ...@@ -2831,7 +2831,8 @@ xfs_change_file_space(
ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp); if (attr_flags & XFS_ATTR_SYNC)
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0); error = xfs_trans_commit(tp, 0);
......
...@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); ...@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */
int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_release(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment