Commit 9b1f56d6 authored by Alex Elder's avatar Alex Elder

Merge branch 'for-2.6.34-rc1-batch2' into for-linus

parents 64ba9926 07000ee6
......@@ -105,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_globals.o \
xfs_ioctl.o \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
xfs_sync.o \
xfs_xattr.o)
......
......@@ -39,6 +39,7 @@
#include "xfs_iomap.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
......@@ -163,14 +164,17 @@ xfs_ioend_new_eof(
}
/*
* Update on-disk file size now that data has been written to disk.
* The current in-memory file size is i_size. If a write is beyond
* eof i_new_size will be the intended file size until i_size is
* updated. If this write does not extend all the way to the valid
* file size then restrict this update to the end of the write.
* Update on-disk file size now that data has been written to disk. The
* current in-memory file size is i_size. If a write is beyond eof i_new_size
* will be the intended file size until i_size is updated. If this write does
* not extend all the way to the valid file size then restrict this update to
* the end of the write.
*
* This function does not block as blocking on the inode lock in IO completion
* can lead to IO completion order dependency deadlocks.. If it can't get the
* inode ilock it will return EAGAIN. Callers must handle this.
*/
STATIC void
STATIC int
xfs_setfilesize(
xfs_ioend_t *ioend)
{
......@@ -181,16 +185,40 @@ xfs_setfilesize(
ASSERT(ioend->io_type != IOMAP_READ);
if (unlikely(ioend->io_error))
return;
return 0;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
return EAGAIN;
xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = xfs_ioend_new_eof(ioend);
if (isize) {
ip->i_d.di_size = isize;
xfs_mark_inode_dirty_sync(ip);
xfs_mark_inode_dirty(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq;
wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
}
}
/*
......@@ -200,9 +228,9 @@ STATIC void
xfs_end_io(
struct work_struct *work)
{
xfs_ioend_t *ioend =
container_of(work, xfs_ioend_t, io_work);
xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
/*
* For unwritten extents we need to issue transactions to convert a
......@@ -210,7 +238,6 @@ xfs_end_io(
*/
if (ioend->io_type == IOMAP_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
int error;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
......@@ -222,30 +249,23 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
if (ioend->io_type != IOMAP_READ)
xfs_setfilesize(ioend);
xfs_destroy_ioend(ioend);
}
if (ioend->io_type != IOMAP_READ) {
error = xfs_setfilesize(ioend);
ASSERT(!error || error == EAGAIN);
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
/*
* If we didn't complete processing of the ioend, requeue it to the
* tail of the workqueue for another attempt later. Otherwise destroy
* it.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq;
wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
}
if (error == EAGAIN) {
atomic_inc(&ioend->io_remaining);
xfs_finish_ioend(ioend, 0);
/* ensure we don't spin on blocked ioends */
delay(1);
} else
xfs_destroy_ioend(ioend);
}
/*
......@@ -341,7 +361,7 @@ xfs_submit_ioend_bio(
* but don't update the inode size until I/O completion.
*/
if (xfs_ioend_new_eof(ioend))
xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE, bio);
......@@ -874,6 +894,118 @@ xfs_cluster_write(
}
}
STATIC void
xfs_vm_invalidatepage(
struct page *page,
unsigned long offset)
{
trace_xfs_invalidatepage(page->mapping->host, page, offset);
block_invalidatepage(page, offset);
}
/*
* If the page has delalloc buffers on it, we need to punch them out before we
* invalidate the page. If we don't, we leave a stale delalloc mapping on the
* inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
* is done on that same region - the delalloc extent is returned when none is
* supposed to be there.
*
* We prevent this by truncating away the delalloc regions on the page before
* invalidating it. Because they are delalloc, we can do this without needing a
* transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
* truncation without a transaction as there is no space left for block
* reservation (typically why we see a ENOSPC in writeback).
*
* This is not a performance critical path, so for now just do the punching a
* buffer head at a time.
*/
STATIC void
xfs_aops_discard_page(
struct page *page)
{
struct inode *inode = page->mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
ssize_t len = 1 << inode->i_blkbits;
if (!xfs_is_delayed_page(page, IOMAP_DELAY))
goto out_invalidate;
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard on page %p, inode 0x%llx, offset %llu.",
page, ip->i_ino, offset);
xfs_ilock(ip, XFS_ILOCK_EXCL);
bh = head = page_buffers(page);
do {
int done;
xfs_fileoff_t offset_fsb;
xfs_bmbt_irec_t imap;
int nimaps = 1;
int error;
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
if (!buffer_delay(bh))
goto next_buffer;
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
/*
* Map the range first and check that it is a delalloc extent
* before trying to unmap the range. Otherwise we will be
* trying to remove a real extent (which requires a
* transaction) or a hole, which is probably a bad idea...
*/
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
&nimaps, NULL, NULL);
if (error) {
/* something screwed, just bail */
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard failed delalloc mapping lookup.");
break;
}
if (!nimaps) {
/* nothing there */
goto next_buffer;
}
if (imap.br_startblock != DELAYSTARTBLOCK) {
/* been converted, ignore */
goto next_buffer;
}
WARN_ON(imap.br_blockcount == 0);
/*
* Note: while we initialise the firstblock/flist pair, they
* should never be used because blocks should never be
* allocated or freed for a delalloc extent and hence we need
* don't cancel or finish them after the xfs_bunmapi() call.
*/
xfs_bmap_init(&flist, &firstblock);
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
&flist, NULL, &done);
ASSERT(!flist.xbf_count && !flist.xbf_first);
if (error) {
/* something screwed, just bail */
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard unable to remove delalloc mapping.");
break;
}
next_buffer:
offset += len;
} while ((bh = bh->b_this_page) != head);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_invalidate:
xfs_vm_invalidatepage(page, 0);
return;
}
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it's buffers. When called with startio set then
......@@ -1125,7 +1257,7 @@ xfs_page_state_convert(
*/
if (err != -EAGAIN) {
if (!unmapped)
block_invalidatepage(page, 0);
xfs_aops_discard_page(page);
ClearPageUptodate(page);
}
return err;
......@@ -1535,15 +1667,6 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
STATIC void
xfs_vm_invalidatepage(
struct page *page,
unsigned long offset)
{
trace_xfs_invalidatepage(page->mapping->host, page, offset);
block_invalidatepage(page, offset);
}
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
......
......@@ -16,6 +16,7 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
......@@ -34,52 +35,279 @@
#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_vnodeops.h"
#include "xfs_da_btree.h"
#include "xfs_ioctl.h"
#include "xfs_trace.h"
#include <linux/dcache.h>
static const struct vm_operations_struct xfs_file_vm_ops;
STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
/*
* xfs_iozero
*
* xfs_iozero clears the specified range of buffer supplied,
* and marks all the affected blocks as valid and modified. If
* an affected block is not allocated, it will be allocated. If
* an affected block is not completely overwritten, and is not
* valid before the operation, it will be read from disk before
* being partially zeroed.
*/
STATIC int
xfs_iozero(
struct xfs_inode *ip, /* inode */
loff_t pos, /* offset in file */
size_t count) /* size of data to zero */
{
struct file *file = iocb->ki_filp;
int ioflags = 0;
struct page *page;
struct address_space *mapping;
int status;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
nr_segs, &iocb->ki_pos, ioflags);
mapping = VFS_I(ip)->i_mapping;
do {
unsigned offset, bytes;
void *fsdata;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
bytes = PAGE_CACHE_SIZE - offset;
if (bytes > count)
bytes = count;
status = pagecache_write_begin(NULL, mapping, pos, bytes,
AOP_FLAG_UNINTERRUPTIBLE,
&page, &fsdata);
if (status)
break;
zero_user(page, offset, bytes);
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
page, fsdata);
WARN_ON(status <= 0); /* can't return less than zero! */
pos += bytes;
count -= bytes;
status = 0;
} while (count);
return (-status);
}
STATIC int
xfs_file_fsync(
struct file *file,
struct dentry *dentry,
int datasync)
{
struct xfs_inode *ip = XFS_I(dentry->d_inode);
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
* log because of committed transactions that haven't hit the disk yet.
* Likewise, there could be unflushed non-transactional changes to the
* inode core that have to go to disk and this requires us to issue
* a synchronous transaction to capture these changes correctly.
*
* This code relies on the assumption that if the i_update_core field
* of the inode is clear and the inode is unpinned then it is clean
* and no action is required.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
/*
* First check if the VFS inode is marked dirty. All the dirtying
* of non-transactional updates no goes through mark_inode_dirty*,
* which allows us to distinguish beteeen pure timestamp updates
* and i_size updates which need to be caught for fdatasync.
* After that also theck for the dirty state in the XFS inode, which
* might gets cleared when the inode gets written out via the AIL
* or xfs_iflush_cluster.
*/
if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
ip->i_update_core) {
/*
* Kick off a transaction to log the inode core to get the
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return -error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
/*
* Note - it's possible that we might have pushed ourselves out
* of the way during trans_reserve which would flush the inode.
* But there's no guarantee that the inode buffer has actually
* gone out yet (it's delwri). Plus the buffer could be pinned
* anyway if it's part of an inode in another recent
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
} else {
/*
* Timestamps/size haven't changed since last inode flush or
* inode transaction commit. That means either nothing got
* written or a transaction committed which caught the updates.
* If the latter happened and the transaction hasn't hit the
* disk yet, the inode will be still be pinned. If it is,
* force the log.
*/
if (xfs_ipincount(ip)) {
error = _xfs_log_force_lsn(ip->i_mount,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
}
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
/*
* If the log write didn't issue an ordered tag we need
* to flush the disk cache for the data device now.
*/
if (!log_flushed)
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
/*
* If this inode is on the RT dev we need to flush that
* cache as well.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
}
return -error;
}
STATIC ssize_t
xfs_file_aio_write(
xfs_file_aio_read(
struct kiocb *iocb,
const struct iovec *iov,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
size_t size = 0;
ssize_t ret = 0;
int ioflags = 0;
xfs_fsize_t n;
unsigned long seg;
XFS_STATS_INC(xs_read_calls);
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
&iocb->ki_pos, ioflags);
/* START copy & waste from filemap.c */
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *iv = &iovp[seg];
/*
* If any segment has a negative length, or the cumulative
* length ever wraps negative then return -EINVAL.
*/
size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL);
}
/* END copy & waste from filemap.c */
if (unlikely(ioflags & IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((iocb->ki_pos & target->bt_smask) ||
(size & target->bt_smask)) {
if (iocb->ki_pos == ip->i_size)
return 0;
return -XFS_ERROR(EINVAL);
}
}
n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
if (n <= 0 || size == 0)
return 0;
if (n < size)
size = n;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if (unlikely(ioflags & IO_ISDIRECT))
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
int iolock = XFS_IOLOCK_SHARED;
ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
dmflags, &iolock);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
return ret;
}
}
if (unlikely(ioflags & IO_ISDIRECT)) {
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
(iocb->ki_pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
}
mutex_unlock(&inode->i_mutex);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
}
trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
STATIC ssize_t
......@@ -87,16 +315,44 @@ xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len,
size_t count,
unsigned int flags)
{
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
struct xfs_mount *mp = ip->i_mount;
int ioflags = 0;
ssize_t ret;
XFS_STATS_INC(xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
infilp, ppos, pipe, len, flags, ioflags);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
int iolock = XFS_IOLOCK_SHARED;
int error;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
FILP_DELAY_FLAG(infilp), &iolock);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error;
}
}
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
STATIC ssize_t
......@@ -104,16 +360,538 @@ xfs_file_splice_write(
struct pipe_inode_info *pipe,
struct file *outfilp,
loff_t *ppos,
size_t len,
size_t count,
unsigned int flags)
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
xfs_fsize_t isize, new_size;
int ioflags = 0;
ssize_t ret;
XFS_STATS_INC(xs_write_calls);
if (outfilp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
pipe, outfilp, ppos, len, flags, ioflags);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
int iolock = XFS_IOLOCK_EXCL;
int error;
error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
FILP_DELAY_FLAG(outfilp), &iolock);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return -error;
}
}
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (new_size > ip->i_size)
ip->i_new_size = new_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ip->i_new_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
/*
* This routine is called to handle zeroing any space in the last
* block of the file that is beyond the EOF. We do this since the
* size is being increased without writing anything to that block
* and we don't want anyone to read the garbage on the disk.
*/
STATIC int /* error (positive) */
xfs_zero_last_block(
xfs_inode_t *ip,
xfs_fsize_t offset,
xfs_fsize_t isize)
{
xfs_fileoff_t last_fsb;
xfs_mount_t *mp = ip->i_mount;
int nimaps;
int zero_offset;
int zero_len;
int error = 0;
xfs_bmbt_irec_t imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
if (zero_offset == 0) {
/*
* There are no extra bytes in the last block on disk to
* zero, so return.
*/
return 0;
}
last_fsb = XFS_B_TO_FSBT(mp, isize);
nimaps = 1;
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
&nimaps, NULL, NULL);
if (error) {
return error;
}
ASSERT(nimaps > 0);
/*
* If the block underlying isize is just a hole, then there
* is nothing to zero.
*/
if (imap.br_startblock == HOLESTARTBLOCK) {
return 0;
}
/*
* Zero the part of the last block beyond the EOF, and write it
* out sync. We need to drop the ilock while we do this so we
* don't deadlock when the buffer cache calls back to us.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
error = xfs_iozero(ip, isize, zero_len);
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
/*
* Zero any on disk space between the current EOF and the new,
* larger EOF. This handles the normal case of zeroing the remainder
* of the last block in the file and the unusual case of zeroing blocks
* out beyond the size of the file. This second case only happens
* with fixed size extents and when the system crashes before the inode
* size was updated but after blocks were allocated. If fill is set,
* then any holes in the range are filled and zeroed. If not, the holes
* are left alone as holes.
*/
int /* error (positive) */
xfs_zero_eof(
xfs_inode_t *ip,
xfs_off_t offset, /* starting I/O offset */
xfs_fsize_t isize) /* current inode size */
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb;
xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb;
xfs_fileoff_t zero_off;
xfs_fsize_t zero_len;
int nimaps;
int error = 0;
xfs_bmbt_irec_t imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
/*
* First handle zeroing the block on which isize resides.
* We only zero a part of that block so it is handled specially.
*/
error = xfs_zero_last_block(ip, offset, isize);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
/*
* Calculate the range between the new size and the old
* where blocks needing to be zeroed may exist. To get the
* block where the last byte in the file currently resides,
* we need to subtract one from the size and truncate back
* to a block boundary. We subtract 1 in case the size is
* exactly on a block boundary.
*/
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
if (last_fsb == end_zero_fsb) {
/*
* The size was only incremented on its last block.
* We took care of that above, so just return.
*/
return 0;
}
ASSERT(start_zero_fsb <= end_zero_fsb);
while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
0, NULL, 0, &imap, &nimaps, NULL, NULL);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
ASSERT(nimaps > 0);
if (imap.br_state == XFS_EXT_UNWRITTEN ||
imap.br_startblock == HOLESTARTBLOCK) {
/*
* This loop handles initializing pages that were
* partially initialized by the code below this
* loop. It basically zeroes the part of the page
* that sits on a hole and sets the page as P_HOLE
* and calls remapf if it is a mapped file.
*/
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
continue;
}
/*
* There are blocks we need to zero.
* Drop the inode lock while we're doing the I/O.
* We'll still have the iolock to protect us.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
if ((zero_off + zero_len) > offset)
zero_len = offset - zero_off;
error = xfs_iozero(ip, zero_off, zero_len);
if (error) {
goto out_lock;
}
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
xfs_ilock(ip, XFS_ILOCK_EXCL);
}
return 0;
out_lock:
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0, error = 0;
int ioflags = 0;
xfs_fsize_t isize, new_size;
int iolock;
int eventsent = 0;
size_t ocount = 0, count;
int need_i_mutex;
XFS_STATS_INC(xs_write_calls);
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
if (error)
return error;
count = ocount;
if (count == 0)
return 0;
xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
relock:
if (ioflags & IO_ISDIRECT) {
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
} else {
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
}
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
start:
error = -generic_write_checks(file, &pos, &count,
S_ISBLK(inode->i_mode));
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
goto out_unlock_mutex;
}
if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
!(ioflags & IO_INVIS) && !eventsent)) {
int dmflags = FILP_DELAY_FLAG(file);
if (need_i_mutex)
dmflags |= DM_FLAGS_IMUX;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
pos, count, dmflags, &iolock);
if (error) {
goto out_unlock_internal;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
eventsent = 1;
/*
* The iolock was dropped and reacquired in XFS_SEND_DATA
* so we have to recheck the size when appending.
* We will only "goto start;" once, since having sent the
* event prevents another call to XFS_SEND_DATA, which is
* what allows the size to change in the first place.
*/
if ((file->f_flags & O_APPEND) && pos != ip->i_size)
goto start;
}
if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((pos & target->bt_smask) || (count & target->bt_smask)) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
return XFS_ERROR(-EINVAL);
}
if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
goto start;
}
}
new_size = pos + count;
if (new_size > ip->i_size)
ip->i_new_size = new_size;
if (likely(!(ioflags & IO_INVIS)))
file_update_time(file);
/*
* If the offset is beyond the size of the file, we have a couple
* of things to do. First, if there is already space allocated
* we need to either create holes or zero the disk or ...
*
* If there is a page where the previous size lands, we need
* to zero it out up to the new size.
*/
if (pos > ip->i_size) {
error = xfs_zero_eof(ip, pos, ip->i_size);
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_unlock_internal;
}
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* If we're writing the file then make sure to clear the
* setuid and setgid bits if the process is not being run
* by root. This keeps people from modifying setuid and
* setgid binaries.
*/
error = -file_remove_suid(file);
if (unlikely(error))
goto out_unlock_internal;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
if ((ioflags & IO_ISDIRECT)) {
if (mapping->nrpages) {
WARN_ON(need_i_mutex == 0);
error = xfs_flushinval_pages(ip,
(pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
if (error)
goto out_unlock_internal;
}
if (need_i_mutex) {
/* demote the lock now the cached pages are gone */
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
mutex_unlock(&inode->i_mutex);
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount);
/*
* direct-io write to a hole: fall through to buffered I/O
* for completing the rest of the request.
*/
if (ret >= 0 && ret != count) {
XFS_STATS_ADD(xs_write_bytes, ret);
pos += ret;
count -= ret;
ioflags &= ~IO_ISDIRECT;
xfs_iunlock(ip, iolock);
goto relock;
}
} else {
int enospc = 0;
ssize_t ret2 = 0;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
pos, &iocb->ki_pos, count, ret);
/*
* if we just got an ENOSPC, flush the inode now we
* aren't holding any page locks and retry *once*
*/
if (ret2 == -ENOSPC && !enospc) {
error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (error)
goto out_unlock_internal;
enospc = 1;
goto write_retry;
}
ret = ret2;
}
current->backing_dev_info = NULL;
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
iocb->ki_pos = isize;
if (iocb->ki_pos > ip->i_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (iocb->ki_pos > ip->i_size)
ip->i_size = iocb->ki_pos;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ret == -ENOSPC &&
DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
xfs_iunlock(ip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
0, 0, 0); /* Delay flag intentionally unused */
if (need_i_mutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, iolock);
if (error)
goto out_unlock_internal;
goto start;
}
error = -ret;
if (ret <= 0)
goto out_unlock_internal;
XFS_STATS_ADD(xs_write_bytes, ret);
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error2;
xfs_iunlock(ip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
error2 = filemap_write_and_wait_range(mapping, pos, end);
if (!error)
error = error2;
if (need_i_mutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, iolock);
error2 = -xfs_file_fsync(file, file->f_path.dentry,
(file->f_flags & __O_SYNC) ? 0 : 1);
if (!error)
error = error2;
}
out_unlock_internal:
if (ip->i_new_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
/*
* If this was a direct or synchronous I/O that failed (such
* as ENOSPC) then part of the I/O may have been written to
* disk before the error occured. In this case the on-disk
* file size may have been adjusted beyond the in-memory file
* size and now needs to be truncated back.
*/
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_iunlock(ip, iolock);
out_unlock_mutex:
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
return -error;
}
STATIC int
......@@ -160,28 +938,6 @@ xfs_file_release(
return -xfs_release(XFS_I(inode));
}
/*
* We ignore the datasync flag here because a datasync is effectively
* identical to an fsync. That is, datasync implies that we need to write
* only the metadata needed to be able to access the data that is written
* if we crash after the call completes. Hence if we are writing beyond
* EOF we have to log the inode size change as well, which makes it a
* full fsync. If we don't write beyond EOF, the inode core will be
* clean in memory and so we don't need to log the inode, just like
* fsync.
*/
STATIC int
xfs_file_fsync(
struct file *file,
struct dentry *dentry,
int datasync)
{
struct xfs_inode *ip = XFS_I(dentry->d_inode);
xfs_iflags_clear(ip, XFS_ITRUNCATED);
return -xfs_fsync(ip);
}
STATIC int
xfs_file_readdir(
struct file *filp,
......@@ -203,9 +959,9 @@ xfs_file_readdir(
*
* Try to give it an estimate that's good enough, maybe at some
* point we can change the ->readdir prototype to include the
* buffer size.
* buffer size. For now we use the current glibc buffer size.
*/
bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
error = xfs_readdir(ip, dirent, bufsize,
(xfs_off_t *)&filp->f_pos, filldir);
......
......@@ -91,6 +91,16 @@ xfs_mark_inode_dirty_sync(
mark_inode_dirty_sync(inode);
}
void
xfs_mark_inode_dirty(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
mark_inode_dirty(inode);
}
/*
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
......
......@@ -88,7 +88,6 @@
#include <xfs_super.h>
#include <xfs_globals.h>
#include <xfs_fs_subr.h>
#include <xfs_lrw.h>
#include <xfs_buf.h>
/*
......
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_alloc.h"
#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_iomap.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
#include <linux/capability.h>
#include <linux/writeback.h>
/*
* xfs_iozero
*
* xfs_iozero clears the specified range of buffer supplied,
* and marks all the affected blocks as valid and modified. If
* an affected block is not allocated, it will be allocated. If
* an affected block is not completely overwritten, and is not
* valid before the operation, it will be read from disk before
* being partially zeroed.
*/
STATIC int
xfs_iozero(
struct xfs_inode *ip, /* inode */
loff_t pos, /* offset in file */
size_t count) /* size of data to zero */
{
struct page *page;
struct address_space *mapping;
int status;
mapping = VFS_I(ip)->i_mapping;
do {
unsigned offset, bytes;
void *fsdata;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
bytes = PAGE_CACHE_SIZE - offset;
if (bytes > count)
bytes = count;
status = pagecache_write_begin(NULL, mapping, pos, bytes,
AOP_FLAG_UNINTERRUPTIBLE,
&page, &fsdata);
if (status)
break;
zero_user(page, offset, bytes);
status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
page, fsdata);
WARN_ON(status <= 0); /* can't return less than zero! */
pos += bytes;
count -= bytes;
status = 0;
} while (count);
return (-status);
}
ssize_t /* bytes read, or (-) error */
xfs_read(
xfs_inode_t *ip,
struct kiocb *iocb,
const struct iovec *iovp,
unsigned int segs,
loff_t *offset,
int ioflags)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
xfs_mount_t *mp = ip->i_mount;
size_t size = 0;
ssize_t ret = 0;
xfs_fsize_t n;
unsigned long seg;
XFS_STATS_INC(xs_read_calls);
/* START copy & waste from filemap.c */
for (seg = 0; seg < segs; seg++) {
const struct iovec *iv = &iovp[seg];
/*
* If any segment has a negative length, or the cumulative
* length ever wraps negative then return -EINVAL.
*/
size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL);
}
/* END copy & waste from filemap.c */
if (unlikely(ioflags & IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((*offset & target->bt_smask) ||
(size & target->bt_smask)) {
if (*offset == ip->i_size) {
return (0);
}
return -XFS_ERROR(EINVAL);
}
}
n = XFS_MAXIOFFSET(mp) - *offset;
if ((n <= 0) || (size == 0))
return 0;
if (n < size)
size = n;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if (unlikely(ioflags & IO_ISDIRECT))
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
int iolock = XFS_IOLOCK_SHARED;
ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
dmflags, &iolock);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
if (unlikely(ioflags & IO_ISDIRECT))
mutex_unlock(&inode->i_mutex);
return ret;
}
}
if (unlikely(ioflags & IO_ISDIRECT)) {
if (inode->i_mapping->nrpages)
ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
mutex_unlock(&inode->i_mutex);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
}
trace_xfs_file_read(ip, size, *offset, ioflags);
iocb->ki_pos = *offset;
ret = generic_file_aio_read(iocb, iovp, segs, *offset);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
ssize_t
xfs_splice_read(
xfs_inode_t *ip,
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t count,
int flags,
int ioflags)
{
xfs_mount_t *mp = ip->i_mount;
ssize_t ret;
XFS_STATS_INC(xs_read_calls);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
int iolock = XFS_IOLOCK_SHARED;
int error;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
FILP_DELAY_FLAG(infilp), &iolock);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error;
}
}
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
ssize_t
xfs_splice_write(
xfs_inode_t *ip,
struct pipe_inode_info *pipe,
struct file *outfilp,
loff_t *ppos,
size_t count,
int flags,
int ioflags)
{
xfs_mount_t *mp = ip->i_mount;
ssize_t ret;
struct inode *inode = outfilp->f_mapping->host;
xfs_fsize_t isize, new_size;
XFS_STATS_INC(xs_write_calls);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
int iolock = XFS_IOLOCK_EXCL;
int error;
error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
FILP_DELAY_FLAG(outfilp), &iolock);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return -error;
}
}
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (new_size > ip->i_size)
ip->i_new_size = new_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ip->i_new_size) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
/*
* This routine is called to handle zeroing any space in the last
* block of the file that is beyond the EOF. We do this since the
* size is being increased without writing anything to that block
* and we don't want anyone to read the garbage on the disk.
*/
STATIC int /* error (positive) */
xfs_zero_last_block(
xfs_inode_t *ip,
xfs_fsize_t offset,
xfs_fsize_t isize)
{
xfs_fileoff_t last_fsb;
xfs_mount_t *mp = ip->i_mount;
int nimaps;
int zero_offset;
int zero_len;
int error = 0;
xfs_bmbt_irec_t imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
if (zero_offset == 0) {
/*
* There are no extra bytes in the last block on disk to
* zero, so return.
*/
return 0;
}
last_fsb = XFS_B_TO_FSBT(mp, isize);
nimaps = 1;
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
&nimaps, NULL, NULL);
if (error) {
return error;
}
ASSERT(nimaps > 0);
/*
* If the block underlying isize is just a hole, then there
* is nothing to zero.
*/
if (imap.br_startblock == HOLESTARTBLOCK) {
return 0;
}
/*
* Zero the part of the last block beyond the EOF, and write it
* out sync. We need to drop the ilock while we do this so we
* don't deadlock when the buffer cache calls back to us.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
error = xfs_iozero(ip, isize, zero_len);
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
/*
* Zero any on disk space between the current EOF and the new,
* larger EOF. This handles the normal case of zeroing the remainder
* of the last block in the file and the unusual case of zeroing blocks
* out beyond the size of the file. This second case only happens
* with fixed size extents and when the system crashes before the inode
* size was updated but after blocks were allocated. If fill is set,
* then any holes in the range are filled and zeroed. If not, the holes
* are left alone as holes.
*/
int /* error (positive) */
xfs_zero_eof(
xfs_inode_t *ip,
xfs_off_t offset, /* starting I/O offset */
xfs_fsize_t isize) /* current inode size */
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb;
xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb;
xfs_fileoff_t zero_off;
xfs_fsize_t zero_len;
int nimaps;
int error = 0;
xfs_bmbt_irec_t imap;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
/*
* First handle zeroing the block on which isize resides.
* We only zero a part of that block so it is handled specially.
*/
error = xfs_zero_last_block(ip, offset, isize);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
/*
* Calculate the range between the new size and the old
* where blocks needing to be zeroed may exist. To get the
* block where the last byte in the file currently resides,
* we need to subtract one from the size and truncate back
* to a block boundary. We subtract 1 in case the size is
* exactly on a block boundary.
*/
last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
if (last_fsb == end_zero_fsb) {
/*
* The size was only incremented on its last block.
* We took care of that above, so just return.
*/
return 0;
}
ASSERT(start_zero_fsb <= end_zero_fsb);
while (start_zero_fsb <= end_zero_fsb) {
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
0, NULL, 0, &imap, &nimaps, NULL, NULL);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
ASSERT(nimaps > 0);
if (imap.br_state == XFS_EXT_UNWRITTEN ||
imap.br_startblock == HOLESTARTBLOCK) {
/*
* This loop handles initializing pages that were
* partially initialized by the code below this
* loop. It basically zeroes the part of the page
* that sits on a hole and sets the page as P_HOLE
* and calls remapf if it is a mapped file.
*/
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
continue;
}
/*
* There are blocks we need to zero.
* Drop the inode lock while we're doing the I/O.
* We'll still have the iolock to protect us.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
if ((zero_off + zero_len) > offset)
zero_len = offset - zero_off;
error = xfs_iozero(ip, zero_off, zero_len);
if (error) {
goto out_lock;
}
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
xfs_ilock(ip, XFS_ILOCK_EXCL);
}
return 0;
out_lock:
xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
ssize_t /* bytes written, or (-) error */
xfs_write(
struct xfs_inode *xip,
struct kiocb *iocb,
const struct iovec *iovp,
unsigned int nsegs,
loff_t *offset,
int ioflags)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned long segs = nsegs;
xfs_mount_t *mp;
ssize_t ret = 0, error = 0;
xfs_fsize_t isize, new_size;
int iolock;
int eventsent = 0;
size_t ocount = 0, count;
loff_t pos;
int need_i_mutex;
XFS_STATS_INC(xs_write_calls);
error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ);
if (error)
return error;
count = ocount;
pos = *offset;
if (count == 0)
return 0;
mp = xip->i_mount;
xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
relock:
if (ioflags & IO_ISDIRECT) {
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
} else {
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
}
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
start:
error = -generic_write_checks(file, &pos, &count,
S_ISBLK(inode->i_mode));
if (error) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
goto out_unlock_mutex;
}
if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) &&
!(ioflags & IO_INVIS) && !eventsent)) {
int dmflags = FILP_DELAY_FLAG(file);
if (need_i_mutex)
dmflags |= DM_FLAGS_IMUX;
xfs_iunlock(xip, XFS_ILOCK_EXCL);
error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
pos, count, dmflags, &iolock);
if (error) {
goto out_unlock_internal;
}
xfs_ilock(xip, XFS_ILOCK_EXCL);
eventsent = 1;
/*
* The iolock was dropped and reacquired in XFS_SEND_DATA
* so we have to recheck the size when appending.
* We will only "goto start;" once, since having sent the
* event prevents another call to XFS_SEND_DATA, which is
* what allows the size to change in the first place.
*/
if ((file->f_flags & O_APPEND) && pos != xip->i_size)
goto start;
}
if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(xip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((pos & target->bt_smask) || (count & target->bt_smask)) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
return XFS_ERROR(-EINVAL);
}
if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
iolock = XFS_IOLOCK_EXCL;
need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
goto start;
}
}
new_size = pos + count;
if (new_size > xip->i_size)
xip->i_new_size = new_size;
if (likely(!(ioflags & IO_INVIS)))
file_update_time(file);
/*
* If the offset is beyond the size of the file, we have a couple
* of things to do. First, if there is already space allocated
* we need to either create holes or zero the disk or ...
*
* If there is a page where the previous size lands, we need
* to zero it out up to the new size.
*/
if (pos > xip->i_size) {
error = xfs_zero_eof(xip, pos, xip->i_size);
if (error) {
xfs_iunlock(xip, XFS_ILOCK_EXCL);
goto out_unlock_internal;
}
}
xfs_iunlock(xip, XFS_ILOCK_EXCL);
/*
* If we're writing the file then make sure to clear the
* setuid and setgid bits if the process is not being run
* by root. This keeps people from modifying setuid and
* setgid binaries.
*/
error = -file_remove_suid(file);
if (unlikely(error))
goto out_unlock_internal;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
if ((ioflags & IO_ISDIRECT)) {
if (mapping->nrpages) {
WARN_ON(need_i_mutex == 0);
error = xfs_flushinval_pages(xip,
(pos & PAGE_CACHE_MASK),
-1, FI_REMAPF_LOCKED);
if (error)
goto out_unlock_internal;
}
if (need_i_mutex) {
/* demote the lock now the cached pages are gone */
xfs_ilock_demote(xip, XFS_IOLOCK_EXCL);
mutex_unlock(&inode->i_mutex);
iolock = XFS_IOLOCK_SHARED;
need_i_mutex = 0;
}
trace_xfs_file_direct_write(xip, count, *offset, ioflags);
ret = generic_file_direct_write(iocb, iovp,
&segs, pos, offset, count, ocount);
/*
* direct-io write to a hole: fall through to buffered I/O
* for completing the rest of the request.
*/
if (ret >= 0 && ret != count) {
XFS_STATS_ADD(xs_write_bytes, ret);
pos += ret;
count -= ret;
ioflags &= ~IO_ISDIRECT;
xfs_iunlock(xip, iolock);
goto relock;
}
} else {
int enospc = 0;
ssize_t ret2 = 0;
write_retry:
trace_xfs_file_buffered_write(xip, count, *offset, ioflags);
ret2 = generic_file_buffered_write(iocb, iovp, segs,
pos, offset, count, ret);
/*
* if we just got an ENOSPC, flush the inode now we
* aren't holding any page locks and retry *once*
*/
if (ret2 == -ENOSPC && !enospc) {
error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
if (error)
goto out_unlock_internal;
enospc = 1;
goto write_retry;
}
ret = ret2;
}
current->backing_dev_info = NULL;
isize = i_size_read(inode);
if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
*offset = isize;
if (*offset > xip->i_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL);
if (*offset > xip->i_size)
xip->i_size = *offset;
xfs_iunlock(xip, XFS_ILOCK_EXCL);
}
if (ret == -ENOSPC &&
DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
xfs_iunlock(xip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
0, 0, 0); /* Delay flag intentionally unused */
if (need_i_mutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(xip, iolock);
if (error)
goto out_unlock_internal;
goto start;
}
error = -ret;
if (ret <= 0)
goto out_unlock_internal;
XFS_STATS_ADD(xs_write_bytes, ret);
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error2;
xfs_iunlock(xip, iolock);
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
error2 = filemap_write_and_wait_range(mapping, pos, end);
if (!error)
error = error2;
if (need_i_mutex)
mutex_lock(&inode->i_mutex);
xfs_ilock(xip, iolock);
error2 = xfs_fsync(xip);
if (!error)
error = error2;
}
out_unlock_internal:
if (xip->i_new_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL);
xip->i_new_size = 0;
/*
* If this was a direct or synchronous I/O that failed (such
* as ENOSPC) then part of the I/O may have been written to
* disk before the error occured. In this case the on-disk
* file size may have been adjusted beyond the in-memory file
* size and now needs to be truncated back.
*/
if (xip->i_d.di_size > xip->i_size)
xip->i_d.di_size = xip->i_size;
xfs_iunlock(xip, XFS_ILOCK_EXCL);
}
xfs_iunlock(xip, iolock);
out_unlock_mutex:
if (need_i_mutex)
mutex_unlock(&inode->i_mutex);
return -error;
}
/*
* If the underlying (data/log/rt) device is readonly, there are some
* operations that cannot proceed.
*/
int
xfs_dev_is_read_only(
xfs_mount_t *mp,
char *message)
{
if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
xfs_readonly_buftarg(mp->m_logdev_targp) ||
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
cmn_err(CE_NOTE,
"XFS: %s required on read-only device.", message);
cmn_err(CE_NOTE,
"XFS: write access unavailable, cannot proceed.");
return EROFS;
}
return 0;
}
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_LRW_H__
#define __XFS_LRW_H__
struct xfs_mount;
struct xfs_inode;
struct xfs_buf;
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
#endif /* __XFS_LRW_H__ */
......@@ -607,6 +607,7 @@ xfssyncd(
set_freezable();
timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
for (;;) {
if (list_empty(&mp->m_sync_list))
timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
......@@ -627,8 +628,7 @@ xfssyncd(
list_add_tail(&mp->m_sync_work.w_list,
&mp->m_sync_list);
}
list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
list_move(&work->w_list, &tmp);
list_splice_init(&mp->m_sync_list, &tmp);
spin_unlock(&mp->m_sync_lock);
list_for_each_entry_safe(work, n, &tmp, w_list) {
......@@ -688,12 +688,12 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
read_lock(&pag->pag_ici_lock);
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
write_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
......
......@@ -51,22 +51,6 @@
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
/*
* Format fsblock number into a static buffer & return it.
*/
STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
{
static char rval[50];
if (bno == NULLFSBLOCK)
sprintf(rval, "NULLFSBLOCK");
else if (isnullstartblock(bno))
sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
else
sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
return rval;
}
/*
* We include this last to have the helpers above available for the trace
* event implementations.
......
......@@ -197,13 +197,13 @@ TRACE_EVENT(xfs_iext_insert,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
"offset %lld block %s count %lld flag %d caller %pf",
"offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
xfs_fmtfsblock(__entry->startblock),
(__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
......@@ -241,13 +241,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
"offset %lld block %s count %lld flag %d caller %pf",
"offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
xfs_fmtfsblock(__entry->startblock),
(__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
......@@ -593,7 +593,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
TP_ARGS(dqp),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(__be32, id)
__field(u32, id)
__field(unsigned, flags)
__field(unsigned, nrefs)
__field(unsigned long long, res_bcount)
......@@ -606,7 +606,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
), \
TP_fast_assign(
__entry->dev = dqp->q_mount->m_super->s_dev;
__entry->id = dqp->q_core.d_id;
__entry->id = be32_to_cpu(dqp->q_core.d_id);
__entry->flags = dqp->dq_flags;
__entry->nrefs = dqp->q_nrefs;
__entry->res_bcount = dqp->q_res_bcount;
......@@ -622,10 +622,10 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
be64_to_cpu(dqp->q_core.d_ino_softlimit);
),
TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
"bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
"icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
"bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
"icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
MAJOR(__entry->dev), MINOR(__entry->dev),
be32_to_cpu(__entry->id),
__entry->id,
__print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
__entry->nrefs,
__entry->res_bcount,
......@@ -881,7 +881,7 @@ TRACE_EVENT(name, \
), \
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
"offset 0x%llx count %zd flags %s " \
"startoff 0x%llx startblock %s blockcount 0x%llx", \
"startoff 0x%llx startblock %lld blockcount 0x%llx", \
MAJOR(__entry->dev), MINOR(__entry->dev), \
__entry->ino, \
__entry->size, \
......@@ -890,7 +890,7 @@ TRACE_EVENT(name, \
__entry->count, \
__print_flags(__entry->flags, "|", BMAPI_FLAGS), \
__entry->startoff, \
xfs_fmtfsblock(__entry->startblock), \
(__int64_t)__entry->startblock, \
__entry->blockcount) \
)
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
......
......@@ -2550,106 +2550,56 @@ xfs_bmap_rtalloc(
}
STATIC int
xfs_bmap_btalloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
xfs_bmap_btalloc_nullfb(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen)
{
xfs_mount_t *mp; /* mount point structure */
xfs_alloctype_t atype = 0; /* type for allocation routines */
xfs_extlen_t align; /* minimum allocation alignment */
xfs_agnumber_t ag;
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t startag;
xfs_alloc_arg_t args;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
xfs_perag_t *pag;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int notinit;
int tryagain;
struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_perag *pag;
xfs_agnumber_t ag, startag;
int notinit = 0;
int error;
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
align, 0, ap->eof, 0, ap->conv,
&ap->off, &ap->alen);
ASSERT(!error);
ASSERT(ap->alen);
}
nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->rval = ap->firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->rval; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
;
else
ap->rval = ap->firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->rval;
args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args->type = XFS_ALLOCTYPE_NEAR_BNO;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = ap->total;
args->type = XFS_ALLOCTYPE_START_BNO;
args->total = ap->total;
/*
* Search for an allocation group with a single extent
* large enough for the request.
*
* If one isn't found, then adjust the minimum allocation
* size to the largest space found.
* Search for an allocation group with a single extent large enough
* for the request. If one isn't found, then adjust the minimum
* allocation size to the largest space found.
*/
startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
notinit = 0;
pag = xfs_perag_get(mp, ag);
while (blen < ap->alen) {
if (!pag->pagf_init &&
(error = xfs_alloc_pagf_init(mp, args.tp,
ag, XFS_ALLOC_FLAG_TRYLOCK))) {
while (*blen < ap->alen) {
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, args->tp, ag,
XFS_ALLOC_FLAG_TRYLOCK);
if (error) {
xfs_perag_put(pag);
return error;
}
}
/*
* See xfs_alloc_fix_freelist...
*/
if (pag->pagf_init) {
xfs_extlen_t longest;
longest = xfs_alloc_longest_free_extent(mp, pag);
if (blen < longest)
blen = longest;
if (*blen < longest)
*blen = longest;
} else
notinit = 1;
if (xfs_inode_is_filestream(ap->ip)) {
if (blen >= ap->alen)
if (*blen >= ap->alen)
break;
if (ap->userdata) {
......@@ -2684,31 +2634,99 @@ xfs_bmap_btalloc(
pag = xfs_perag_get(mp, ag);
}
xfs_perag_put(pag);
/*
* Since the above loop did a BUF_TRYLOCK, it is
* possible that there is space for this request.
*/
if (notinit || blen < ap->minlen)
args.minlen = ap->minlen;
if (notinit || *blen < ap->minlen)
args->minlen = ap->minlen;
/*
* If the best seen length is less than the request
* length, use the best as the minimum.
*/
else if (blen < ap->alen)
args.minlen = blen;
else if (*blen < ap->alen)
args->minlen = *blen;
/*
* Otherwise we've seen an extent as big as alen,
* use that as the minimum.
*/
else
args.minlen = ap->alen;
args->minlen = ap->alen;
/*
* set the failure fallback case to look in the selected
* AG as the stream may have moved.
*/
if (xfs_inode_is_filestream(ap->ip))
ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
}
STATIC int
xfs_bmap_btalloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
{
xfs_mount_t *mp; /* mount point structure */
xfs_alloctype_t atype = 0; /* type for allocation routines */
xfs_extlen_t align; /* minimum allocation alignment */
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t ag;
xfs_alloc_arg_t args;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int tryagain;
int error;
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
align, 0, ap->eof, 0, ap->conv,
&ap->off, &ap->alen);
ASSERT(!error);
ASSERT(ap->alen);
}
nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->rval = ap->firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->rval; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
;
else
ap->rval = ap->firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->rval;
args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
if (error)
return error;
} else if (ap->low) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
......
......@@ -292,7 +292,8 @@ typedef struct xfs_bstat {
__s32 bs_extents; /* number of extents */
__u32 bs_gen; /* generation count */
__u16 bs_projid; /* project id */
unsigned char bs_pad[14]; /* pad space, unused */
__u16 bs_forkoff; /* inode fork offset in bytes */
unsigned char bs_pad[12]; /* pad space, unused */
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
......
......@@ -190,13 +190,12 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
/*
* We need to set XFS_INEW atomically with clearing the
* reclaimable tag so that we do have an indicator of the
* inode still being initialized.
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
* from stomping over us while we recycle the inode. We can't
* clear the radix tree reclaimable tag yet as it requires
* pag_ici_lock to be held exclusive.
*/
ip->i_flags |= XFS_INEW;
ip->i_flags &= ~XFS_IRECLAIMABLE;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
ip->i_flags |= XFS_IRECLAIM;
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
......@@ -216,7 +215,15 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
goto out_error;
}
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
inode->i_state = I_NEW;
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
......
......@@ -2439,75 +2439,31 @@ xfs_idestroy_fork(
}
/*
* Increment the pin count of the given buffer.
* This value is protected by ipinlock spinlock in the mount structure.
* This is called to unpin an inode. The caller must have the inode locked
* in at least shared mode so that the buffer cannot be subsequently pinned
* once someone is waiting for it to be unpinned.
*/
void
xfs_ipin(
xfs_inode_t *ip)
{
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
atomic_inc(&ip->i_pincount);
}
/*
* Decrement the pin count of the given inode, and wake up
* anyone in xfs_iwait_unpin() if the count goes to 0. The
* inode must have been previously pinned with a call to xfs_ipin().
*/
void
xfs_iunpin(
xfs_inode_t *ip)
{
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait);
}
/*
* This is called to unpin an inode. It can be directed to wait or to return
* immediately without waiting for the inode to be unpinned. The caller must
* have the inode locked in at least shared mode so that the buffer cannot be
* subsequently pinned once someone is waiting for it to be unpinned.
*/
STATIC void
__xfs_iunpin_wait(
xfs_inode_t *ip,
int wait)
static void
xfs_iunpin_nowait(
struct xfs_inode *ip)
{
xfs_inode_log_item_t *iip = ip->i_itemp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
if (atomic_read(&ip->i_pincount) == 0)
return;
/* Give the log a push to start the unpinning I/O */
if (iip && iip->ili_last_lsn)
xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0);
else
xfs_log_force(ip->i_mount, 0);
xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
if (wait)
wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
}
void
xfs_iunpin_wait(
xfs_inode_t *ip)
struct xfs_inode *ip)
{
__xfs_iunpin_wait(ip, 1);
}
static inline void
xfs_iunpin_nowait(
xfs_inode_t *ip)
{
__xfs_iunpin_wait(ip, 0);
if (xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip);
wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
}
}
/*
* xfs_iextents_copy()
*
......
......@@ -471,8 +471,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_ipin(xfs_inode_t *);
void xfs_iunpin(xfs_inode_t *);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint);
void xfs_ichgtime(xfs_inode_t *, int);
......@@ -480,6 +478,7 @@ void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_times(xfs_inode_t *);
void xfs_mark_inode_dirty(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#define IHOLD(ip) \
......
......@@ -535,23 +535,23 @@ xfs_inode_item_format(
/*
* This is called to pin the inode associated with the inode log
* item in memory so it cannot be written out. Do this by calling
* xfs_ipin() to bump the pin count in the inode while holding the
* inode pin lock.
* item in memory so it cannot be written out.
*/
STATIC void
xfs_inode_item_pin(
xfs_inode_log_item_t *iip)
{
ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
xfs_ipin(iip->ili_inode);
atomic_inc(&iip->ili_inode->i_pincount);
}
/*
* This is called to unpin the inode associated with the inode log
* item which was previously pinned with a call to xfs_inode_item_pin().
* Just call xfs_iunpin() on the inode to do this.
*
* Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
*/
/* ARGSUSED */
STATIC void
......@@ -559,7 +559,11 @@ xfs_inode_item_unpin(
xfs_inode_log_item_t *iip,
int stale)
{
xfs_iunpin(iip->ili_inode);
struct xfs_inode *ip = iip->ili_inode;
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait);
}
/* ARGSUSED */
......@@ -568,7 +572,7 @@ xfs_inode_item_unpin_remove(
xfs_inode_log_item_t *iip,
xfs_trans_t *tp)
{
xfs_iunpin(iip->ili_inode);
xfs_inode_item_unpin(iip, 0);
}
/*
......
......@@ -106,6 +106,7 @@ xfs_bulkstat_one_iget(
buf->bs_dmevmask = dic->di_dmevmask;
buf->bs_dmstate = dic->di_dmstate;
buf->bs_aextents = dic->di_anextents;
buf->bs_forkoff = XFS_IFORK_BOFF(ip);
switch (dic->di_format) {
case XFS_DINODE_FMT_DEV:
......@@ -176,6 +177,7 @@ xfs_bulkstat_one_dinode(
buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
buf->bs_aextents = be16_to_cpu(dic->di_anextents);
buf->bs_forkoff = XFS_DFORK_BOFF(dic);
switch (dic->di_format) {
case XFS_DINODE_FMT_DEV:
......
......@@ -60,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
int nentries, xfs_log_ticket_t tic,
int nentries, struct xlog_ticket *tic,
xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog,
uint flags);
......@@ -243,13 +243,13 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
* out when the next write occurs.
*/
xfs_lsn_t
xfs_log_done(xfs_mount_t *mp,
xfs_log_ticket_t xtic,
void **iclog,
xfs_log_done(
struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
uint flags)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic;
struct log *log = mp->m_log;
xfs_lsn_t lsn = 0;
if (XLOG_FORCED_SHUTDOWN(log) ||
......@@ -258,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
* If we get an error, just continue and give back the log ticket.
*/
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(mp, ticket,
(xlog_in_core_t **)iclog, &lsn)))) {
(xlog_commit_record(mp, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1;
if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
flags |= XFS_LOG_REL_PERM_RESERV;
......@@ -289,7 +288,7 @@ xfs_log_done(xfs_mount_t *mp,
}
return lsn;
} /* xfs_log_done */
}
/*
* Attaches a new iclog I/O completion callback routine during
......@@ -298,11 +297,11 @@ xfs_log_done(xfs_mount_t *mp,
* executing the callback at an appropriate time.
*/
int
xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
void *iclog_hndl, /* iclog to hang callback off */
xfs_log_notify(
struct xfs_mount *mp,
struct xlog_in_core *iclog,
xfs_log_callback_t *cb)
{
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
int abortflg;
spin_lock(&iclog->ic_callback_lock);
......@@ -316,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
}
spin_unlock(&iclog->ic_callback_lock);
return abortflg;
} /* xfs_log_notify */
}
int
xfs_log_release_iclog(xfs_mount_t *mp,
void *iclog_hndl)
xfs_log_release_iclog(
struct xfs_mount *mp,
struct xlog_in_core *iclog)
{
xlog_t *log = mp->m_log;
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
if (xlog_state_release_iclog(log, iclog)) {
if (xlog_state_release_iclog(mp->m_log, iclog)) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return EIO;
}
......@@ -344,16 +341,17 @@ xfs_log_release_iclog(xfs_mount_t *mp,
* reservation, we prevent over allocation problems.
*/
int
xfs_log_reserve(xfs_mount_t *mp,
xfs_log_reserve(
struct xfs_mount *mp,
int unit_bytes,
int cnt,
xfs_log_ticket_t *ticket,
struct xlog_ticket **ticket,
__uint8_t client,
uint flags,
uint t_type)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *internal_ticket;
struct log *log = mp->m_log;
struct xlog_ticket *internal_ticket;
int retval = 0;
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
......@@ -367,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp,
if (*ticket != NULL) {
ASSERT(flags & XFS_LOG_PERM_RESERV);
internal_ticket = (xlog_ticket_t *)*ticket;
internal_ticket = *ticket;
trace_xfs_log_reserve(log, internal_ticket);
......@@ -519,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
xlog_in_core_t *first_iclog;
#endif
xfs_log_iovec_t reg[1];
xfs_log_ticket_t tic = NULL;
xlog_ticket_t *tic = NULL;
xfs_lsn_t lsn;
int error;
......@@ -656,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
* transaction occur with one call to xfs_log_write().
*/
int
xfs_log_write(xfs_mount_t * mp,
xfs_log_iovec_t reg[],
xfs_log_write(
struct xfs_mount *mp,
struct xfs_log_iovec reg[],
int nentries,
xfs_log_ticket_t tic,
struct xlog_ticket *tic,
xfs_lsn_t *start_lsn)
{
struct log *log = mp->m_log;
int error;
xlog_t *log = mp->m_log;
if (XLOG_FORCED_SHUTDOWN(log))
return XFS_ERROR(EIO);
if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
if (error)
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
}
return error;
} /* xfs_log_write */
}
void
xfs_log_move_tail(xfs_mount_t *mp,
......@@ -1642,16 +1640,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
* bytes have been written out.
*/
STATIC int
xlog_write(xfs_mount_t * mp,
xfs_log_iovec_t reg[],
xlog_write(
struct xfs_mount *mp,
struct xfs_log_iovec reg[],
int nentries,
xfs_log_ticket_t tic,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog,
struct xlog_in_core **commit_iclog,
uint flags)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
xlog_op_header_t *logop_head; /* ptr to log operation header */
__psint_t ptr; /* copy address into data region */
......@@ -1765,7 +1763,7 @@ xlog_write(xfs_mount_t * mp,
default:
xfs_fs_cmn_err(CE_WARN, mp,
"Bad XFS transaction clientid 0x%x in ticket 0x%p",
logop_head->oh_clientid, tic);
logop_head->oh_clientid, ticket);
return XFS_ERROR(EIO);
}
......
......@@ -110,8 +110,6 @@ typedef struct xfs_log_iovec {
uint i_type; /* type of region */
} xfs_log_iovec_t;
typedef void* xfs_log_ticket_t;
/*
* Structure used to pass callback function and the function's argument
* to the log manager.
......@@ -126,10 +124,12 @@ typedef struct xfs_log_callback {
#ifdef __KERNEL__
/* Log manager interfaces */
struct xfs_mount;
struct xlog_in_core;
struct xlog_ticket;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
xfs_log_ticket_t ticket,
void **iclog,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
uint flags);
int _xfs_log_force(struct xfs_mount *mp,
uint flags,
......@@ -151,21 +151,21 @@ int xfs_log_mount_finish(struct xfs_mount *mp);
void xfs_log_move_tail(struct xfs_mount *mp,
xfs_lsn_t tail_lsn);
int xfs_log_notify(struct xfs_mount *mp,
void *iclog,
struct xlog_in_core *iclog,
xfs_log_callback_t *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
void *iclog_hndl);
struct xlog_in_core *iclog);
int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
xfs_log_ticket_t *ticket,
struct xlog_ticket **ticket,
__uint8_t clientid,
uint flags,
uint t_type);
int xfs_log_write(struct xfs_mount *mp,
xfs_log_iovec_t region[],
int nentries,
xfs_log_ticket_t ticket,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn);
int xfs_log_unmount_write(struct xfs_mount *mp);
void xfs_log_unmount(struct xfs_mount *mp);
......
......@@ -1097,13 +1097,15 @@ xfs_default_resblks(xfs_mount_t *mp)
__uint64_t resblks;
/*
* We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
* This may drive us straight to ENOSPC on mount, but that implies
* we were already there on the last unmount. Warn if this occurs.
* We default to 5% or 8192 fsbs of space reserved, whichever is
* smaller. This is intended to cover concurrent allocation
* transactions when we initially hit enospc. These each require a 4
* block reservation. Hence by default we cover roughly 2000 concurrent
* allocation reservations.
*/
resblks = mp->m_sb.sb_dblocks;
do_div(resblks, 20);
resblks = min_t(__uint64_t, resblks, 1024);
resblks = min_t(__uint64_t, resblks, 8192);
return resblks;
}
......@@ -1417,6 +1419,9 @@ xfs_mountfs(
* when at ENOSPC. This is needed for operations like create with
* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
* are not allowed to use this reserved space.
*
* This may drive us straight to ENOSPC on mount, but that implies
* we were already there on the last unmount. Warn if this occurs.
*/
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
resblks = xfs_default_resblks(mp);
......@@ -1725,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
lcounter += rem;
}
} else { /* Taking blocks away */
lcounter += delta;
if (lcounter >= 0) {
mp->m_sb.sb_fdblocks = lcounter +
XFS_ALLOC_SET_ASIDE(mp);
return 0;
}
/*
* If were out of blocks, use any available reserved blocks if
* were allowed to.
* We are out of blocks, use any available reserved
* blocks if were allowed to.
*/
if (!rsvd)
return XFS_ERROR(ENOSPC);
if (lcounter < 0) {
if (rsvd) {
lcounter = (long long)mp->m_resblks_avail + delta;
if (lcounter < 0) {
return XFS_ERROR(ENOSPC);
}
if (lcounter >= 0) {
mp->m_resblks_avail = lcounter;
return 0;
} else { /* not reserved */
return XFS_ERROR(ENOSPC);
}
}
printk_once(KERN_WARNING
"Filesystem \"%s\": reserve blocks depleted! "
"Consider increasing reserve pool size.",
mp->m_fsname);
return XFS_ERROR(ENOSPC);
}
mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
......@@ -2052,6 +2061,26 @@ xfs_mount_log_sb(
return error;
}
/*
* If the underlying (data/log/rt) device is readonly, there are some
* operations that cannot proceed.
*/
int
xfs_dev_is_read_only(
struct xfs_mount *mp,
char *message)
{
if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
xfs_readonly_buftarg(mp->m_logdev_targp) ||
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
cmn_err(CE_NOTE,
"XFS: %s required on read-only device.", message);
cmn_err(CE_NOTE,
"XFS: write access unavailable, cannot proceed.");
return EROFS;
}
return 0;
}
#ifdef HAVE_PERCPU_SB
/*
......
......@@ -436,6 +436,8 @@ extern void xfs_freesb(xfs_mount_t *);
extern int xfs_fs_writable(xfs_mount_t *);
extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
......
......@@ -796,7 +796,7 @@ _xfs_trans_commit(
int sync;
#define XFS_TRANS_LOGVEC_COUNT 16
xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
void *commit_iclog;
struct xlog_in_core *commit_iclog;
int shutdown;
commit_lsn = -1;
......
......@@ -910,7 +910,7 @@ typedef struct xfs_trans {
unsigned int t_blk_res_used; /* # of resvd blocks used */
unsigned int t_rtx_res; /* # of rt extents resvd */
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
xfs_log_ticket_t t_ticket; /* log mgr ticket */
struct xlog_ticket *t_ticket; /* log mgr ticket */
xfs_lsn_t t_lsn; /* log seq num of start of
* transaction. */
xfs_lsn_t t_commit_lsn; /* log seq num of end of
......
......@@ -46,6 +46,65 @@ STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
xfs_daddr_t, int);
/*
* Add the locked buffer to the transaction.
*
* The buffer must be locked, and it cannot be associated with any
* transaction.
*
* If the buffer does not yet have a buf log item associated with it,
* then allocate one for it. Then add the buf item to the transaction.
*/
STATIC void
_xfs_trans_bjoin(
struct xfs_trans *tp,
struct xfs_buf *bp,
int reset_recur)
{
struct xfs_buf_log_item *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
if (reset_recur)
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
}
void
xfs_trans_bjoin(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
_xfs_trans_bjoin(tp, bp, 0);
trace_xfs_trans_bjoin(bp->b_fspriv);
}
/*
* Get and lock the buffer for the caller if it is not already
......@@ -132,40 +191,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
ASSERT(!XFS_BUF_GETERROR(bp));
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_get_buf(bip);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
return (bp);
}
......@@ -210,44 +237,11 @@ xfs_trans_getsb(xfs_trans_t *tp,
}
bp = xfs_getsb(mp, flags);
if (bp == NULL) {
if (bp == NULL)
return NULL;
}
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, mp);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_getsb(bip);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_getsb(bp->b_fspriv);
return (bp);
}
......@@ -425,40 +419,9 @@ xfs_trans_read_buf(
if (XFS_FORCED_SHUTDOWN(mp))
goto shutdown_abort;
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_read_buf(bp->b_fspriv);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_read_buf(bip);
*bpp = bp;
return 0;
......@@ -622,53 +585,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
return;
}
/*
* Add the locked buffer to the transaction.
* The buffer must be locked, and it cannot be associated with any
* transaction.
*
* If the buffer does not yet have a buf log item associated with it,
* then allocate one for it. Then add the buf item to the transaction.
*/
void
xfs_trans_bjoin(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_bjoin(bip);
}
/*
* Mark the buffer as not needing to be unlocked when the buf item's
* IOP_UNLOCK() routine is called. The buffer must already be locked
......
......@@ -583,113 +583,6 @@ xfs_readlink(
return error;
}
/*
* xfs_fsync
*
* This is called to sync the inode and its data out to disk. We need to hold
* the I/O lock while flushing the data, and the inode lock while flushing the
* inode. The inode lock CANNOT be held while flushing the data, so acquire
* after we're done with that.
*/
int
xfs_fsync(
xfs_inode_t *ip)
{
xfs_trans_t *tp;
int error = 0;
int log_flushed = 0;
xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return XFS_ERROR(EIO);
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
* log because of committed transactions that haven't hit the disk yet.
* Likewise, there could be unflushed non-transactional changes to the
* inode core that have to go to disk and this requires us to issue
* a synchronous transaction to capture these changes correctly.
*
* This code relies on the assumption that if the update_* fields
* of the inode are clear and the inode is unpinned then it is clean
* and no action is required.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (!ip->i_update_core) {
/*
* Timestamps/size haven't changed since last inode flush or
* inode transaction commit. That means either nothing got
* written or a transaction committed which caught the updates.
* If the latter happened and the transaction hasn't hit the
* disk yet, the inode will be still be pinned. If it is,
* force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
if (ip->i_itemp->ili_last_lsn) {
error = _xfs_log_force_lsn(ip->i_mount,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
} else {
error = _xfs_log_force(ip->i_mount,
XFS_LOG_SYNC, &log_flushed);
}
}
} else {
/*
* Kick off a transaction to log the inode core to get the
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
/*
* Note - it's possible that we might have pushed ourselves out
* of the way during trans_reserve which would flush the inode.
* But there's no guarantee that the inode buffer has actually
* gone out yet (it's delwri). Plus the buffer could be pinned
* anyway if it's part of an inode in another recent
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
/*
* If the log write didn't issue an ordered tag we need
* to flush the disk cache for the data device now.
*/
if (!log_flushed)
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
/*
* If this inode is on the RT dev we need to flush that
* cache as well.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
}
return error;
}
/*
* Flags for xfs_free_eofblocks
*/
......
......@@ -21,7 +21,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);
int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
......@@ -50,18 +49,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor);
ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
const struct iovec *iovp, unsigned int segs,
loff_t *offset, int ioflags);
ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
int flags, int ioflags);
ssize_t xfs_splice_write(struct xfs_inode *ip,
struct pipe_inode_info *pipe, struct file *outfilp,
loff_t *ppos, size_t count, int flags, int ioflags);
ssize_t xfs_write(struct xfs_inode *xip, struct kiocb *iocb,
const struct iovec *iovp, unsigned int nsegs,
loff_t *offset, int ioflags);
int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int flags, struct xfs_iomap *iomapp, int *niomaps);
void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
......@@ -72,4 +59,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
xfs_off_t last, uint64_t flags, int fiopt);
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
#endif /* _XFS_VNODEOPS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment