Commit c4293751 authored by Stephen Lord's avatar Stephen Lord Committed by Stephen Lord

Rework XFS read/write path so that there is one common read and one common

write path for all the different I/O variants. This means we can now support
true async I/O.
parent 48d86a41
......@@ -59,105 +59,102 @@ static struct vm_operations_struct linvfs_file_vm_ops;
STATIC ssize_t
linvfs_readv(
struct file *filp,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t *ppos)
linvfs_read(
struct kiocb *iocb,
char __user *buf,
size_t count,
loff_t pos)
{
vnode_t *vp = LINVFS_GET_VP(filp->f_dentry->d_inode);
struct iovec iov = {buf, count};
vnode_t *vp;
int error;
VOP_READ(vp, filp, iovp, nr_segs, ppos, NULL, error);
BUG_ON(iocb->ki_pos != pos);
vp = LINVFS_GET_VP(iocb->ki_filp->f_dentry->d_inode);
VOP_READ(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
return error;
}
STATIC ssize_t
linvfs_writev(
struct file *filp,
const struct iovec *iovp,
unsigned long nr_segs,
loff_t *ppos)
linvfs_write(
struct kiocb *iocb,
const char *buf,
size_t count,
loff_t pos)
{
struct inode *inode = filp->f_dentry->d_inode;
struct iovec iov = {(void *)buf, count};
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
int error = filp->f_error;
int error;
int direct = file->f_flags & O_DIRECT;
if (unlikely(error)) {
filp->f_error = 0;
return error;
}
BUG_ON(iocb->ki_pos != pos);
/*
* We allow multiple direct writers in, there is no
* potential call to vmtruncate in that path.
*/
if (filp->f_flags & O_DIRECT) {
VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error);
if (direct) {
VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
} else {
down(&inode->i_sem);
VOP_WRITE(vp, filp, iovp, nr_segs, ppos, NULL, error);
VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, NULL, error);
up(&inode->i_sem);
}
return error;
}
STATIC ssize_t
linvfs_read(
struct file *filp,
char *buf,
size_t count,
linvfs_readv(
struct file *file,
const struct iovec *iov,
unsigned long nr_segs,
loff_t *ppos)
{
struct iovec iov = {buf, count};
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
struct kiocb kiocb;
int error;
return linvfs_readv(filp, &iov, 1, ppos);
}
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
VOP_READ(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
if (-EIOCBQUEUED == error)
error = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return error;
}
STATIC ssize_t
linvfs_write(
linvfs_writev(
struct file *file,
const char *buf,
size_t count,
const struct iovec *iov,
unsigned long nr_segs,
loff_t *ppos)
{
struct iovec iov = {(void *)buf, count};
return linvfs_writev(file, &iov, 1, ppos);
}
STATIC ssize_t
linvfs_aio_read(
struct kiocb *iocb,
char *buf,
size_t count,
loff_t pos)
{
struct iovec iov = {buf, count};
return linvfs_readv(iocb->ki_filp, &iov, 1, &iocb->ki_pos);
}
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
vnode_t *vp = LINVFS_GET_VP(inode);
struct kiocb kiocb;
int error;
int direct = file->f_flags & O_DIRECT;
STATIC ssize_t
linvfs_aio_write(
struct kiocb *iocb,
const char *buf,
size_t count,
loff_t pos)
{
struct iovec iov = {(void *)buf, count};
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
if (direct) {
VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
} else {
down(&inode->i_sem);
VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, NULL, error);
up(&inode->i_sem);
}
if (-EIOCBQUEUED == error)
error = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return linvfs_writev(iocb->ki_filp, &iov, 1, &iocb->ki_pos);
return error;
}
STATIC ssize_t
linvfs_sendfile(
struct file *filp,
......@@ -381,12 +378,12 @@ linvfs_mprotect(
struct file_operations linvfs_file_operations = {
.llseek = generic_file_llseek,
.read = linvfs_read,
.write = linvfs_write,
.read = do_sync_read,
.write = do_sync_write,
.readv = linvfs_readv,
.writev = linvfs_writev,
.aio_read = linvfs_aio_read,
.aio_write = linvfs_aio_write,
.aio_read = linvfs_read,
.aio_write = linvfs_write,
.sendfile = linvfs_sendfile,
.ioctl = linvfs_ioctl,
.mmap = linvfs_file_mmap,
......
......@@ -127,8 +127,8 @@ xfs_iozero(
if (!status) {
pos += bytes;
count -= bytes;
if (pos > ip->i_size)
ip->i_size = pos < end_size ? pos : end_size;
if (pos > i_size_read(ip))
i_size_write(ip, pos < end_size ? pos : end_size);
}
unlock:
......@@ -145,12 +145,13 @@ xfs_iozero(
ssize_t /* bytes read, or (-) error */
xfs_read(
bhv_desc_t *bdp,
struct file *filp,
struct kiocb *iocb,
const struct iovec *iovp,
unsigned long segs,
loff_t *offp,
unsigned int segs,
loff_t *offset,
cred_t *credp)
{
struct file *file = iocb->ki_filp;
size_t size = 0;
ssize_t ret;
xfs_fsize_t n;
......@@ -158,8 +159,8 @@ xfs_read(
xfs_mount_t *mp;
vnode_t *vp;
unsigned long seg;
int direct = (filp->f_flags & O_DIRECT);
int invisible = (filp->f_mode & FINVIS);
int direct = (file->f_flags & O_DIRECT);
int invisible = (file->f_mode & FINVIS);
ip = XFS_BHVTOI(bdp);
vp = BHV_TO_VNODE(bdp);
......@@ -179,33 +180,20 @@ xfs_read(
size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL);
if (direct) { /* XFS specific check */
if ((__psint_t)iv->iov_base & BBMASK) {
if (*offp == ip->i_d.di_size)
return 0;
return XFS_ERROR(-EINVAL);
}
}
if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return XFS_ERROR(-EFAULT);
segs = seg;
break;
}
/* END copy & waste from filemap.c */
if (direct) {
if ((*offp & mp->m_blockmask) ||
if ((*offset & mp->m_blockmask) ||
(size & mp->m_blockmask)) {
if (*offp == ip->i_d.di_size) {
if (*offset == ip->i_d.di_size) {
return (0);
}
return -XFS_ERROR(EINVAL);
}
}
n = XFS_MAXIOFFSET(mp) - *offp;
n = XFS_MAXIOFFSET(mp) - *offset;
if ((n <= 0) || (size == 0))
return 0;
......@@ -216,21 +204,27 @@ xfs_read(
return -EIO;
}
/* OK so we are holding the I/O lock for the duration
* of the submission, then what happens if the I/O
* does not really happen here, but is scheduled
* later?
*/
xfs_ilock(ip, XFS_IOLOCK_SHARED);
if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !invisible) {
int error;
vrwlock_t locktype = VRWLOCK_READ;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, size,
FILP_DELAY_FLAG(filp), &locktype);
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offset, size,
FILP_DELAY_FLAG(file), &locktype);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error;
}
}
ret = generic_file_readv(filp, iovp, segs, offp);
/* We need to deal with the iovec case seperately here */
ret = __generic_file_aio_read(iocb, iovp, segs, offset);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);
......@@ -245,7 +239,7 @@ ssize_t
xfs_sendfile(
bhv_desc_t *bdp,
struct file *filp,
loff_t *offp,
loff_t *offset,
size_t count,
read_actor_t actor,
void *target,
......@@ -265,7 +259,7 @@ xfs_sendfile(
XFS_STATS_INC(xfsstats.xs_read_calls);
n = XFS_MAXIOFFSET(mp) - *offp;
n = XFS_MAXIOFFSET(mp) - *offset;
if ((n <= 0) || (count == 0))
return 0;
......@@ -280,14 +274,14 @@ xfs_sendfile(
vrwlock_t locktype = VRWLOCK_READ;
int error;
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offp, count,
error = XFS_SEND_DATA(mp, DM_EVENT_READ, bdp, *offset, count,
FILP_DELAY_FLAG(filp), &locktype);
if (error) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return -error;
}
}
ret = generic_file_sendfile(filp, offp, count, actor, target);
ret = generic_file_sendfile(filp, offset, count, actor, target);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
XFS_STATS_ADD(xfsstats.xs_read_bytes, ret);
......@@ -516,12 +510,13 @@ xfs_zero_eof(
ssize_t /* bytes written, or (-) error */
xfs_write(
bhv_desc_t *bdp,
struct file *file,
struct kiocb *iocb,
const struct iovec *iovp,
unsigned long segs,
unsigned int segs,
loff_t *offset,
cred_t *credp)
{
struct file *file = iocb->ki_filp;
size_t size = 0;
xfs_inode_t *xip;
xfs_mount_t *mp;
......@@ -555,16 +550,6 @@ xfs_write(
size += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL);
if (direct) { /* XFS specific check */
if ((__psint_t)iv->iov_base & BBMASK)
return XFS_ERROR(-EINVAL);
}
if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return XFS_ERROR(-EFAULT);
segs = seg;
break;
}
/* END copy & waste from filemap.c */
......@@ -576,7 +561,7 @@ xfs_write(
xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE);
if (XFS_FORCED_SHUTDOWN(xip->i_mount)) {
if (XFS_FORCED_SHUTDOWN(mp)) {
return -EIO;
}
......@@ -694,7 +679,7 @@ xfs_write(
xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1);
}
ret = generic_file_write_nolock(file, iovp, segs, offset);
ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset);
if ((ret == -ENOSPC) &&
DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !invisible) {
......@@ -711,25 +696,26 @@ xfs_write(
}
if (ret <= 0) {
xfs_rwunlock(bdp, locktype);
return ret;
}
XFS_STATS_ADD(xfsstats.xs_write_bytes, ret);
if (*offset > xip->i_d.di_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL);
if (*offset > xip->i_d.di_size) {
struct inode *inode = LINVFS_GET_IP(vp);
inode->i_size = xip->i_d.di_size = *offset;
xip->i_d.di_size = *offset;
i_size_write(inode, *offset);
xip->i_update_core = 1;
xip->i_update_size = 1;
}
xfs_iunlock(xip, XFS_ILOCK_EXCL);
}
if (ret <= 0) {
xfs_rwunlock(bdp, locktype);
return ret;
}
XFS_STATS_ADD(xfsstats.xs_write_bytes, ret);
/* Handle various SYNC-type writes */
if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) {
......
......@@ -54,11 +54,11 @@ extern int xfs_bdstrat_cb(struct page_buf_s *);
extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
xfs_fsize_t, xfs_fsize_t);
extern ssize_t xfs_read(struct bhv_desc *, struct file *,
const struct iovec *, unsigned long,
extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, struct cred *);
extern ssize_t xfs_write(struct bhv_desc *, struct file *,
const struct iovec *, unsigned long,
extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, struct cred *);
extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
loff_t *, size_t, read_actor_t,
......
......@@ -158,11 +158,11 @@ typedef enum vchange {
typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct file *,
const struct iovec *, unsigned long,
typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, struct cred *);
typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct file *,
const struct iovec *, unsigned long,
typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, struct cred *);
typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
loff_t *, size_t, read_actor_t,
......
......@@ -1276,6 +1276,7 @@ int generic_write_checks(struct inode *inode, struct file *file,
loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t);
extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *);
extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t);
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
unsigned long, loff_t *);
......
......@@ -724,7 +724,7 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
* This is the "read()" routine for all filesystems
* that can use the page cache directly.
*/
static ssize_t
ssize_t
__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
......@@ -809,6 +809,7 @@ generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
}
EXPORT_SYMBOL(generic_file_aio_read);
EXPORT_SYMBOL(__generic_file_aio_read);
ssize_t
generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment