xfs: don't allocate an ioend for direct I/O completions
Back in the days when the direct I/O ->end_io callback could be called from interrupt context for AIO we needed a structure to hand off to the workqueue, and reused the ioend structure for this purpose. These days ->end_io is always called from user or workqueue context, which allows us to avoid this memory allocation and simplify the code significantly. [dchinner: removed now unused xfs_finish_ioend_sync() function after Brian Foster did an initial review. ] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
parent
f3d215526e
commit
2ba6623702
|
@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc(
|
|||
*/
|
||||
STATIC int
|
||||
xfs_setfilesize(
|
||||
struct xfs_ioend *ioend)
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_trans *tp,
|
||||
xfs_off_t offset,
|
||||
size_t size)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(ioend->io_inode);
|
||||
struct xfs_trans *tp = ioend->io_append_trans;
|
||||
xfs_fsize_t isize;
|
||||
|
||||
/*
|
||||
* The transaction may have been allocated in the I/O submission thread,
|
||||
* thus we need to mark ourselves as beeing in a transaction manually.
|
||||
* Similarly for freeze protection.
|
||||
*/
|
||||
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
|
||||
rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
|
||||
0, 1, _THIS_IP_);
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
|
||||
isize = xfs_new_eof(ip, offset + size);
|
||||
if (!isize) {
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
|
||||
trace_xfs_setfilesize(ip, offset, size);
|
||||
|
||||
ip->i_d.di_size = isize;
|
||||
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
|
||||
|
@ -167,6 +159,25 @@ xfs_setfilesize(
|
|||
return xfs_trans_commit(tp, 0);
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_setfilesize_ioend(
|
||||
struct xfs_ioend *ioend)
|
||||
{
|
||||
struct xfs_inode *ip = XFS_I(ioend->io_inode);
|
||||
struct xfs_trans *tp = ioend->io_append_trans;
|
||||
|
||||
/*
|
||||
* The transaction may have been allocated in the I/O submission thread,
|
||||
* thus we need to mark ourselves as being in a transaction manually.
|
||||
* Similarly for freeze protection.
|
||||
*/
|
||||
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
|
||||
rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
|
||||
0, 1, _THIS_IP_);
|
||||
|
||||
return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Schedule IO completion handling on the final put of an ioend.
|
||||
*
|
||||
|
@ -182,8 +193,7 @@ xfs_finish_ioend(
|
|||
|
||||
if (ioend->io_type == XFS_IO_UNWRITTEN)
|
||||
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
|
||||
else if (ioend->io_append_trans ||
|
||||
(ioend->io_isdirect && xfs_ioend_is_append(ioend)))
|
||||
else if (ioend->io_append_trans)
|
||||
queue_work(mp->m_data_workqueue, &ioend->io_work);
|
||||
else
|
||||
xfs_destroy_ioend(ioend);
|
||||
|
@ -215,22 +225,8 @@ xfs_end_io(
|
|||
if (ioend->io_type == XFS_IO_UNWRITTEN) {
|
||||
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
|
||||
ioend->io_size);
|
||||
} else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
|
||||
/*
|
||||
* For direct I/O we do not know if we need to allocate blocks
|
||||
* or not so we can't preallocate an append transaction as that
|
||||
* results in nested reservations and log space deadlocks. Hence
|
||||
* allocate the transaction here. While this is sub-optimal and
|
||||
* can block IO completion for some time, we're stuck with doing
|
||||
* it this way until we can pass the ioend to the direct IO
|
||||
* allocation callbacks and avoid nesting that way.
|
||||
*/
|
||||
error = xfs_setfilesize_trans_alloc(ioend);
|
||||
if (error)
|
||||
goto done;
|
||||
error = xfs_setfilesize(ioend);
|
||||
} else if (ioend->io_append_trans) {
|
||||
error = xfs_setfilesize(ioend);
|
||||
error = xfs_setfilesize_ioend(ioend);
|
||||
} else {
|
||||
ASSERT(!xfs_ioend_is_append(ioend));
|
||||
}
|
||||
|
@ -241,17 +237,6 @@ done:
|
|||
xfs_destroy_ioend(ioend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call IO completion handling in caller context on the final put of an ioend.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_finish_ioend_sync(
|
||||
struct xfs_ioend *ioend)
|
||||
{
|
||||
if (atomic_dec_and_test(&ioend->io_remaining))
|
||||
xfs_end_io(&ioend->io_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialise an IO completion structure.
|
||||
* We need to track unwritten extent write completion here initially.
|
||||
|
@ -273,7 +258,6 @@ xfs_alloc_ioend(
|
|||
* all the I/O from calling the completion routine too early.
|
||||
*/
|
||||
atomic_set(&ioend->io_remaining, 1);
|
||||
ioend->io_isdirect = 0;
|
||||
ioend->io_error = 0;
|
||||
ioend->io_list = NULL;
|
||||
ioend->io_type = type;
|
||||
|
@ -1459,11 +1443,7 @@ xfs_get_blocks_direct(
|
|||
*
|
||||
* If the private argument is non-NULL __xfs_get_blocks signals us that we
|
||||
* need to issue a transaction to convert the range from unwritten to written
|
||||
* extents. In case this is regular synchronous I/O we just call xfs_end_io
|
||||
* to do this and we are done. But in case this was a successful AIO
|
||||
* request this handler is called from interrupt context, from which we
|
||||
* can't start transactions. In that case offload the I/O completion to
|
||||
* the workqueues we also use for buffered I/O completion.
|
||||
* extents.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_end_io_direct_write(
|
||||
|
@ -1472,7 +1452,12 @@ xfs_end_io_direct_write(
|
|||
ssize_t size,
|
||||
void *private)
|
||||
{
|
||||
struct xfs_ioend *ioend = iocb->private;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp))
|
||||
return;
|
||||
|
||||
/*
|
||||
* While the generic direct I/O code updates the inode size, it does
|
||||
|
@ -1480,22 +1465,33 @@ xfs_end_io_direct_write(
|
|||
* end_io handler thinks the on-disk size is outside the in-core
|
||||
* size. To prevent this just update it a little bit earlier here.
|
||||
*/
|
||||
if (offset + size > i_size_read(ioend->io_inode))
|
||||
i_size_write(ioend->io_inode, offset + size);
|
||||
if (offset + size > i_size_read(inode))
|
||||
i_size_write(inode, offset + size);
|
||||
|
||||
/*
|
||||
* blockdev_direct_IO can return an error even after the I/O
|
||||
* completion handler was called. Thus we need to protect
|
||||
* against double-freeing.
|
||||
* For direct I/O we do not know if we need to allocate blocks or not,
|
||||
* so we can't preallocate an append transaction, as that results in
|
||||
* nested reservations and log space deadlocks. Hence allocate the
|
||||
* transaction here. While this is sub-optimal and can block IO
|
||||
* completion for some time, we're stuck with doing it this way until
|
||||
* we can pass the ioend to the direct IO allocation callbacks and
|
||||
* avoid nesting that way.
|
||||
*/
|
||||
iocb->private = NULL;
|
||||
if (private && size > 0) {
|
||||
xfs_iomap_write_unwritten(ip, offset, size);
|
||||
} else if (offset + size > ip->i_d.di_size) {
|
||||
struct xfs_trans *tp;
|
||||
int error;
|
||||
|
||||
ioend->io_offset = offset;
|
||||
ioend->io_size = size;
|
||||
if (private && size > 0)
|
||||
ioend->io_type = XFS_IO_UNWRITTEN;
|
||||
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
|
||||
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
|
||||
if (error) {
|
||||
xfs_trans_cancel(tp, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
xfs_finish_ioend_sync(ioend);
|
||||
xfs_setfilesize(ip, tp, offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
|
@ -1507,39 +1503,16 @@ xfs_vm_direct_IO(
|
|||
{
|
||||
struct inode *inode = iocb->ki_filp->f_mapping->host;
|
||||
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
|
||||
struct xfs_ioend *ioend = NULL;
|
||||
ssize_t ret;
|
||||
|
||||
if (rw & WRITE) {
|
||||
size_t size = iov_iter_count(iter);
|
||||
|
||||
/*
|
||||
* We cannot preallocate a size update transaction here as we
|
||||
* don't know whether allocation is necessary or not. Hence we
|
||||
* can only tell IO completion that one is necessary if we are
|
||||
* not doing unwritten extent conversion.
|
||||
*/
|
||||
iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
|
||||
if (offset + size > XFS_I(inode)->i_d.di_size)
|
||||
ioend->io_isdirect = 1;
|
||||
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
|
||||
return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
|
||||
offset, xfs_get_blocks_direct,
|
||||
xfs_end_io_direct_write, NULL,
|
||||
DIO_ASYNC_EXTEND);
|
||||
if (ret != -EIOCBQUEUED && iocb->private)
|
||||
goto out_destroy_ioend;
|
||||
} else {
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
|
||||
offset, xfs_get_blocks_direct,
|
||||
NULL, NULL, 0);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
out_destroy_ioend:
|
||||
xfs_destroy_ioend(ioend);
|
||||
return ret;
|
||||
return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
|
||||
offset, xfs_get_blocks_direct,
|
||||
NULL, NULL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool;
|
|||
* Types of I/O for bmap clustering and I/O completion tracking.
|
||||
*/
|
||||
enum {
|
||||
XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */
|
||||
XFS_IO_DELALLOC, /* covers delalloc region */
|
||||
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
|
||||
XFS_IO_OVERWRITE, /* covers already allocated extent */
|
||||
};
|
||||
|
||||
#define XFS_IO_TYPES \
|
||||
{ 0, "" }, \
|
||||
{ XFS_IO_DELALLOC, "delalloc" }, \
|
||||
{ XFS_IO_UNWRITTEN, "unwritten" }, \
|
||||
{ XFS_IO_OVERWRITE, "overwrite" }
|
||||
|
@ -45,7 +43,6 @@ typedef struct xfs_ioend {
|
|||
unsigned int io_type; /* delalloc / unwritten */
|
||||
int io_error; /* I/O error code */
|
||||
atomic_t io_remaining; /* hold count */
|
||||
unsigned int io_isdirect : 1;/* direct I/O */
|
||||
struct inode *io_inode; /* file being written to */
|
||||
struct buffer_head *io_buffer_head;/* buffer linked list head */
|
||||
struct buffer_head *io_buffer_tail;/* buffer linked list tail */
|
||||
|
|
Loading…
Reference in New Issue