ceph: do not execute direct write in parallel if O_APPEND is specified
In O_APPEND & O_DIRECT mode, the data from different writers will be possibly overlapping each other since they take the shared lock. For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT mode: Writer1 Writer2 shared_lock() shared_lock() getattr(CAP_SIZE) getattr(CAP_SIZE) iocb->ki_pos = EOF iocb->ki_pos = EOF write(data1) write(data2) shared_unlock() shared_unlock() The data2 will overlap the data1 from the same file offset, the old EOF. Switch to exclusive lock instead when O_APPEND is specified. Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
bb6d3fb354
commit
8e4473bb50
|
@ -1418,6 +1418,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
struct ceph_cap_flush *prealloc_cf;
|
struct ceph_cap_flush *prealloc_cf;
|
||||||
ssize_t count, written = 0;
|
ssize_t count, written = 0;
|
||||||
int err, want, got;
|
int err, want, got;
|
||||||
|
bool direct_lock = false;
|
||||||
loff_t pos;
|
loff_t pos;
|
||||||
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
|
loff_t limit = max(i_size_read(inode), fsc->max_file_size);
|
||||||
|
|
||||||
|
@ -1428,8 +1429,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
if (!prealloc_cf)
|
if (!prealloc_cf)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
|
||||||
|
direct_lock = true;
|
||||||
|
|
||||||
retry_snap:
|
retry_snap:
|
||||||
if (iocb->ki_flags & IOCB_DIRECT)
|
if (direct_lock)
|
||||||
ceph_start_io_direct(inode);
|
ceph_start_io_direct(inode);
|
||||||
else
|
else
|
||||||
ceph_start_io_write(inode);
|
ceph_start_io_write(inode);
|
||||||
|
@ -1519,14 +1523,15 @@ retry_snap:
|
||||||
|
|
||||||
/* we might need to revert back to that point */
|
/* we might need to revert back to that point */
|
||||||
data = *from;
|
data = *from;
|
||||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
if (iocb->ki_flags & IOCB_DIRECT)
|
||||||
written = ceph_direct_read_write(iocb, &data, snapc,
|
written = ceph_direct_read_write(iocb, &data, snapc,
|
||||||
&prealloc_cf);
|
&prealloc_cf);
|
||||||
ceph_end_io_direct(inode);
|
else
|
||||||
} else {
|
|
||||||
written = ceph_sync_write(iocb, &data, pos, snapc);
|
written = ceph_sync_write(iocb, &data, pos, snapc);
|
||||||
|
if (direct_lock)
|
||||||
|
ceph_end_io_direct(inode);
|
||||||
|
else
|
||||||
ceph_end_io_write(inode);
|
ceph_end_io_write(inode);
|
||||||
}
|
|
||||||
if (written > 0)
|
if (written > 0)
|
||||||
iov_iter_advance(from, written);
|
iov_iter_advance(from, written);
|
||||||
ceph_put_snap_context(snapc);
|
ceph_put_snap_context(snapc);
|
||||||
|
@ -1577,7 +1582,7 @@ retry_snap:
|
||||||
|
|
||||||
goto out_unlocked;
|
goto out_unlocked;
|
||||||
out:
|
out:
|
||||||
if (iocb->ki_flags & IOCB_DIRECT)
|
if (direct_lock)
|
||||||
ceph_end_io_direct(inode);
|
ceph_end_io_direct(inode);
|
||||||
else
|
else
|
||||||
ceph_end_io_write(inode);
|
ceph_end_io_write(inode);
|
||||||
|
|
Loading…
Reference in New Issue