Commit c4293751 authored by Stephen Lord's avatar Stephen Lord Committed by Stephen Lord

Rework XFS read/write path so that there is one common read and one common

write path for all the different I/O variants. This means we can now support
true async I/O.
parent 48d86a41
...@@ -59,105 +59,102 @@ static struct vm_operations_struct linvfs_file_vm_ops; ...@@ -59,105 +59,102 @@ static struct vm_operations_struct linvfs_file_vm_ops;
STATIC ssize_t STATIC ssize_t
linvfs_readv( linvfs_read(
struct file *filp, struct kiocb *iocb,
const struct iovec *iovp, char __user *buf,
unsigned long nr_segs, size_t count,
loff_t *ppos) loff_t pos)
{ {
vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode); struct iovec iov = {buf, count};
vnode_t *vp;
int error; int error;
VOP_READ(vp, filp, iovp, nr_segs, ppos, NULL, error); BUG_ON(iocb->ki_pos != pos);
vp = LINVFS_GET_VP(iocb->ki_filp->f_dentry->d_inode);
VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
return error; return error;
} }
STATIC ssize_t STATIC ssize_t
linvfs_writev( linvfs_write(
struct file *filp, struct kiocb *iocb,
const struct iovec *iovp, const char *buf,
unsigned long nr_segs, size_t count,
loff_t *ppos) loff_t pos)
{ {
struct inode *inode = filp->f_dentry->d_inode; struct iovec iov = {(void *)buf, count};
vnode_t *vp = LINVFS_GET_VP(inode); struct file *file = iocb->ki_filp;
int error = filp->f_error; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
int error;
int direct = file->f_flags & O_DIRECT;
if (unlikely(error)) { BUG_ON(iocb->ki_pos != pos);
filp->f_error = 0;
return error;
}
/* if (direct) {
* We allow multiple direct writers in, there is no VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
* potential call to vmtruncate in that path.
*/
if (filp->f_flags & O_DIRECT) {
VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error);
} else { } else {
down(&inode->i_sem); down(&inode->i_sem);
VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error); VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
up(&inode->i_sem); up(&inode->i_sem);
} }
return error; return error;
} }
STATIC ssize_t STATIC ssize_t
linvfs_read( linvfs_readv(
struct file *filp, struct file *file,
char *buf, const struct iovec *iov,
size_t count, unsigned long nr_segs,
loff_t *ppos) loff_t *ppos)
{ {
struct iovec iov = {buf, count}; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
struct kiocb kiocb;
int error;
return linvfs_readv(filp, &iov, 1, ppos); init_sync_kiocb(&kiocb, file);
} kiocb.ki_pos = *ppos;
VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
if (-EIOCBQUEUED == error)
error = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return error;
}
STATIC ssize_t STATIC ssize_t
linvfs_write( linvfs_writev(
struct file *file, struct file *file,
const char *buf, const struct iovec *iov,
size_t count, unsigned long nr_segs,
loff_t *ppos) loff_t *ppos)
{ {
struct iovec iov = {(void *)buf, count}; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
return linvfs_writev(file, &iov, 1, ppos); struct kiocb kiocb;
} int error;
int direct = file->f_flags & O_DIRECT;
STATIC ssize_t
linvfs_aio_read(
struct kiocb *iocb,
char *buf,
size_t count,
loff_t pos)
{
struct iovec iov = {buf, count};
return linvfs_readv(iocb->ki_filp, &iov, 1, &iocb->ki_pos);
}
STATIC ssize_t init_sync_kiocb(&kiocb, file);
linvfs_aio_write( kiocb.ki_pos = *ppos;
struct kiocb *iocb, if (direct) {
const char *buf, VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
size_t count, } else {
loff_t pos) down(&inode->i_sem);
{ VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
struct iovec iov = {(void *)buf, count}; up(&inode->i_sem);
}
if (-EIOCBQUEUED == error)
error = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return linvfs_writev(iocb->ki_filp, &iov, 1, &iocb->ki_pos); return error;
} }
STATIC ssize_t STATIC ssize_t
linvfs_sendfile( linvfs_sendfile(
struct file *filp, struct file *filp,
...@@ -381,12 +378,12 @@ linvfs_mprotect( ...@@ -381,12 +378,12 @@ linvfs_mprotect(
struct file_operations linvfs_file_operations = { struct file_operations linvfs_file_operations = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.read = linvfs_read, .read = do_sync_read,
.write = linvfs_write, .write = do_sync_write,
.readv = linvfs_readv, .readv = linvfs_readv,
.writev = linvfs_writev, .writev = linvfs_writev,
.aio_read = linvfs_aio_read, .aio_read = linvfs_read,
.aio_write = linvfs_aio_write, .aio_write = linvfs_write,
.sendfile = linvfs_sendfile, .sendfile = linvfs_sendfile,
.ioctl = linvfs_ioctl, .ioctl = linvfs_ioctl,
.mmap = linvfs_file_mmap, .mmap = linvfs_file_mmap,
......
...@@ -127,8 +127,8 @@ xfs_iozero( ...@@ -127,8 +127,8 @@ xfs_iozero(
if (!status) { if (!status) {
pos += bytes; pos += bytes;
count -= bytes; count -= bytes;
if (pos > ip->i_size) if (pos > i_size_read(ip))
ip->i_size = pos < end_size ? pos : end_size; i_size_write(ip, pos < end_size ? pos : end_size);
} }
unlock: unlock:
...@@ -145,12 +145,13 @@ xfs_iozero( ...@@ -145,12 +145,13 @@ xfs_iozero(
ssize_t /* bytes read, or (-) error */ ssize_t /* bytes read, or (-) error */
xfs_read( xfs_read(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct file *filp, struct kiocb *iocb,
const struct iovec *iovp, const struct iovec *iovp,
unsigned long segs, unsigned int segs,
loff_t *offp, loff_t *offset,
cred_t *credp) cred_t *credp)
{ {
struct file *file = iocb->ki_filp;
size_t size = 0; size_t size = 0;
ssize_t ret; ssize_t ret;
xfs_fsize_t n; xfs_fsize_t n;
...@@ -158,8 +159,8 @@ xfs_read( ...@@ -158,8 +159,8 @@ xfs_read(
xfs_mount_t *mp; xfs_mount_t *mp;
vnode_t *vp; vnode_t *vp;
unsigned long seg; unsigned long seg;
int direct = (filp->f_flags & O_DIRECT); int direct = (file->f_flags & O_DIRECT);
int invisible = (filp->f_mode & FINVIS); int invisible = (file->f_mode & FINVIS);
ip = XFS_BHVTOI(bdp); ip = XFS_BHVTOI(bdp);
vp = BHV_TO_VNODE(bdp); vp = BHV_TO_VNODE(bdp);
...@@ -179,33 +180,20 @@ xfs_read( ...@@ -179,33 +180,20 @@ xfs_read(
size += iv->iov_len; size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0)) if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL); return XFS_ERROR(-EINVAL);
if (direct) { /* XFS specific check */
if ((__psint_t)iv->iov_base & BBMASK) {
if (*offp == ip->i_d.di_size)
return 0;
return XFS_ERROR(-EINVAL);
}
}
if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return XFS_ERROR(-EFAULT);
segs = seg;
break;
} }
/* END copy & waste from filemap.c */ /* END copy & waste from filemap.c */
if (direct) { if (direct) {
if ((*offp & mp->m_blockmask) || if ((*offset & mp->m_blockmask) ||
(size & mp->m_blockmask)) { (size & mp->m_blockmask)) {
if (*offp == ip->i_d.di_size) { if (*offset == ip->i_d.di_size) {
return (0); return (0);
} }
return -XFS_ERROR(EINVAL); return -XFS_ERROR(EINVAL);
} }
} }
n = XFS_MAXIOFFSET(mp) - *offp; n = XFS_MAXIOFFSET(mp) - *offset;
if ((n <= 0) || (size == 0)) if ((n <= 0) || (size == 0))
return 0; return 0;
...@@ -216,21 +204,27 @@ xfs_read( ...@@ -216,21 +204,27 @@ xfs_read(
return -EIO; return -EIO;
} }
/* OK so we are holding the I/O lock for the duration
* of the submission, then what happens if the I/O
* does not really happen here, but is scheduled
* later?
*/
xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !invisible) { if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !invisible) {
int error; int error;
vrwlock_t locktype = VRWLOCK_READ; vrwlock_t locktype = VRWLOCK_READ;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, size, error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offset, size,
FILP_DELAY_FLAG(filp), &locktype); FILP_DELAY_FLAG(file), &locktype);
if (error) { if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error; return -error;
} }
} }
ret = generic_file_readv(filp, iovp, segs, offp); /* We need to deal with the iovec case seperately here */
ret = __generic_file_aio_read(iocb, iovp, segs, offset);
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
XFS_STATS_ADD(xfsstats.xs_read_bytes, ret); XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);
...@@ -245,7 +239,7 @@ ssize_t ...@@ -245,7 +239,7 @@ ssize_t
xfs_sendfile( xfs_sendfile(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct file *filp, struct file *filp,
loff_t *offp, loff_t *offset,
size_t count, size_t count,
read_actor_t actor, read_actor_t actor,
void *target, void *target,
...@@ -265,7 +259,7 @@ xfs_sendfile( ...@@ -265,7 +259,7 @@ xfs_sendfile(
XFS_STATS_INC(xfsstats.xs_read_calls); XFS_STATS_INC(xfsstats.xs_read_calls);
n = XFS_MAXIOFFSET(mp) - *offp; n = XFS_MAXIOFFSET(mp) - *offset;
if ((n <= 0) || (count == 0)) if ((n <= 0) || (count == 0))
return 0; return 0;
...@@ -280,14 +274,14 @@ xfs_sendfile( ...@@ -280,14 +274,14 @@ xfs_sendfile(
vrwlock_t locktype = VRWLOCK_READ; vrwlock_t locktype = VRWLOCK_READ;
int error; int error;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, count, error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offset, count,
FILP_DELAY_FLAG(filp), &locktype); FILP_DELAY_FLAG(filp), &locktype);
if (error) { if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error; return -error;
} }
} }
ret = generic_file_sendfile(filp, offp, count, actor, target); ret = generic_file_sendfile(filp, offset, count, actor, target);
xfs_iunlock(ip, XFS_IOLOCK_SHARED); xfs_iunlock(ip, XFS_IOLOCK_SHARED);
XFS_STATS_ADD(xfsstats.xs_read_bytes, ret); XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);
...@@ -516,12 +510,13 @@ xfs_zero_eof( ...@@ -516,12 +510,13 @@ xfs_zero_eof(
ssize_t /* bytes written, or (-) error */ ssize_t /* bytes written, or (-) error */
xfs_write( xfs_write(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct file *file, struct kiocb *iocb,
const struct iovec *iovp, const struct iovec *iovp,
unsigned long segs, unsigned int segs,
loff_t *offset, loff_t *offset,
cred_t *credp) cred_t *credp)
{ {
struct file *file = iocb->ki_filp;
size_t size = 0; size_t size = 0;
xfs_inode_t *xip; xfs_inode_t *xip;
xfs_mount_t *mp; xfs_mount_t *mp;
...@@ -555,16 +550,6 @@ xfs_write( ...@@ -555,16 +550,6 @@ xfs_write(
size += iv->iov_len; size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0)) if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL); return XFS_ERROR(-EINVAL);
if (direct) { /* XFS specific check */
if ((__psint_t)iv->iov_base & BBMASK)
return XFS_ERROR(-EINVAL);
}
if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return XFS_ERROR(-EFAULT);
segs = seg;
break;
} }
/* END copy & waste from filemap.c */ /* END copy & waste from filemap.c */
...@@ -576,7 +561,7 @@ xfs_write( ...@@ -576,7 +561,7 @@ xfs_write(
xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE);
if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { if (XFS_FORCED_SHUTDOWN(mp)) {
return -EIO; return -EIO;
} }
...@@ -694,7 +679,7 @@ xfs_write( ...@@ -694,7 +679,7 @@ xfs_write(
xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1);
} }
ret = generic_file_write_nolock(file, iovp, segs, offset); ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset);
if ((ret == -ENOSPC) && if ((ret == -ENOSPC) &&
DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !invisible) { DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !invisible) {
...@@ -711,25 +696,26 @@ xfs_write( ...@@ -711,25 +696,26 @@ xfs_write(
} }
if (ret <= 0) {
xfs_rwunlock(bdp, locktype);
return ret;
}
XFS_STATS_ADD(xfsstats.xs_write_bytes, ret);
if (*offset > xip->i_d.di_size) { if (*offset > xip->i_d.di_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_ilock(xip, XFS_ILOCK_EXCL);
if (*offset > xip->i_d.di_size) { if (*offset > xip->i_d.di_size) {
struct inode *inode = LINVFS_GET_IP(vp); struct inode *inode = LINVFS_GET_IP(vp);
inode->i_size = xip->i_d.di_size = *offset; xip->i_d.di_size = *offset;
i_size_write(inode, *offset);
xip->i_update_core = 1; xip->i_update_core = 1;
xip->i_update_size = 1; xip->i_update_size = 1;
} }
xfs_iunlock(xip, XFS_ILOCK_EXCL); xfs_iunlock(xip, XFS_ILOCK_EXCL);
} }
if (ret <= 0) {
xfs_rwunlock(bdp, locktype);
return ret;
}
XFS_STATS_ADD(xfsstats.xs_write_bytes, ret);
/* Handle various SYNC-type writes */ /* Handle various SYNC-type writes */
if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) {
......
...@@ -54,11 +54,11 @@ extern int xfs_bdstrat_cb(struct page_buf_s *); ...@@ -54,11 +54,11 @@ extern int xfs_bdstrat_cb(struct page_buf_s *);
extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t, extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
xfs_fsize_t, xfs_fsize_t); xfs_fsize_t, xfs_fsize_t);
extern ssize_t xfs_read(struct bhv_desc *, struct file *, extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned long, const struct iovec *, unsigned int,
loff_t *, struct cred *); loff_t *, struct cred *);
extern ssize_t xfs_write(struct bhv_desc *, struct file *, extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned long, const struct iovec *, unsigned int,
loff_t *, struct cred *); loff_t *, struct cred *);
extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *, extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
loff_t *, size_t, read_actor_t, loff_t *, size_t, read_actor_t,
......
...@@ -158,11 +158,11 @@ typedef enum vchange { ...@@ -158,11 +158,11 @@ typedef enum vchange {
typedef int (*vop_open_t)(bhv_desc_t *, struct cred *); typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct file *, typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
const struct iovec *, unsigned long, const struct iovec *, unsigned int,
loff_t *, struct cred *); loff_t *, struct cred *);
typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct file *, typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
const struct iovec *, unsigned long, const struct iovec *, unsigned int,
loff_t *, struct cred *); loff_t *, struct cred *);
typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *, typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
loff_t *, size_t, read_actor_t, loff_t *, size_t, read_actor_t,
......
...@@ -1276,6 +1276,7 @@ int generic_write_checks(struct inode *inode, struct file *file, ...@@ -1276,6 +1276,7 @@ int generic_write_checks(struct inode *inode, struct file *file,
loff_t *pos, size_t *count, int isblk); loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t *); unsigned long, loff_t *);
......
...@@ -724,7 +724,7 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, ...@@ -724,7 +724,7 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
* This is the "read()" routine for all filesystems * This is the "read()" routine for all filesystems
* that can use the page cache directly. * that can use the page cache directly.
*/ */
static ssize_t ssize_t
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos) unsigned long nr_segs, loff_t *ppos)
{ {
...@@ -809,6 +809,7 @@ generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t ...@@ -809,6 +809,7 @@ generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
} }
EXPORT_SYMBOL(generic_file_aio_read); EXPORT_SYMBOL(generic_file_aio_read);
EXPORT_SYMBOL(__generic_file_aio_read);
ssize_t ssize_t
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment