Commit 8e4473bb authored by Xiubo Li's avatar Xiubo Li Committed by Ilya Dryomov

ceph: do not execute direct write in parallel if O_APPEND is specified

In O_APPEND & O_DIRECT mode, the data from different writers will
be possibly overlapping each other since they take the shared lock.

For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
mode:

          Writer1                         Writer2

     shared_lock()                   shared_lock()
     getattr(CAP_SIZE)               getattr(CAP_SIZE)
     iocb->ki_pos = EOF              iocb->ki_pos = EOF
     write(data1)
                                     write(data2)
     shared_unlock()                 shared_unlock()

The data2 will overlap the data1 from the same file offset, the
old EOF.

Switch to exclusive lock instead when O_APPEND is specified.
Signed-off-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent bb6d3fb3
...@@ -1418,6 +1418,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1418,6 +1418,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_cap_flush *prealloc_cf; struct ceph_cap_flush *prealloc_cf;
ssize_t count, written = 0; ssize_t count, written = 0;
int err, want, got; int err, want, got;
bool direct_lock = false;
loff_t pos; loff_t pos;
loff_t limit = max(i_size_read(inode), fsc->max_file_size); loff_t limit = max(i_size_read(inode), fsc->max_file_size);
...@@ -1428,8 +1429,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1428,8 +1429,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!prealloc_cf) if (!prealloc_cf)
return -ENOMEM; return -ENOMEM;
if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
direct_lock = true;
retry_snap: retry_snap:
if (iocb->ki_flags & IOCB_DIRECT) if (direct_lock)
ceph_start_io_direct(inode); ceph_start_io_direct(inode);
else else
ceph_start_io_write(inode); ceph_start_io_write(inode);
...@@ -1519,14 +1523,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1519,14 +1523,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* we might need to revert back to that point */ /* we might need to revert back to that point */
data = *from; data = *from;
if (iocb->ki_flags & IOCB_DIRECT) { if (iocb->ki_flags & IOCB_DIRECT)
written = ceph_direct_read_write(iocb, &data, snapc, written = ceph_direct_read_write(iocb, &data, snapc,
&prealloc_cf); &prealloc_cf);
ceph_end_io_direct(inode); else
} else {
written = ceph_sync_write(iocb, &data, pos, snapc); written = ceph_sync_write(iocb, &data, pos, snapc);
if (direct_lock)
ceph_end_io_direct(inode);
else
ceph_end_io_write(inode); ceph_end_io_write(inode);
}
if (written > 0) if (written > 0)
iov_iter_advance(from, written); iov_iter_advance(from, written);
ceph_put_snap_context(snapc); ceph_put_snap_context(snapc);
...@@ -1577,7 +1582,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1577,7 +1582,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out_unlocked; goto out_unlocked;
out: out:
if (iocb->ki_flags & IOCB_DIRECT) if (direct_lock)
ceph_end_io_direct(inode); ceph_end_io_direct(inode);
else else
ceph_end_io_write(inode); ceph_end_io_write(inode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment