xfs: remove the i_new_size field in struct xfs_inode
Now that we use the VFS i_size field throughout XFS there is no need for the i_new_size field any more given that the VFS i_size field gets updated in ->write_end before unlocking the page, and thus is always uptodate when writeback could see a page. Removing i_new_size also has the advantage that we will never have to trim back di_size during a failed buffered write, given that it never gets updated past i_size. Note that currently the generic direct I/O code only updates i_size after calling our end_io handler, which requires a small workaround to make sure di_size actually makes it to disk. I hope to fix this properly in the generic code. A downside is that we lose the support for parallel non-overlapping O_DIRECT appending writes that recently was added. I don't think keeping the complex and fragile i_new_size infrastructure for this is a good tradeoff - if we really care about parallel appending writers we should investigate turning the iolock into a range lock, which would also allow for parallel non-overlapping buffered writers. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
parent
ce7ae151dd
commit
2813d682e8
|
@ -111,8 +111,7 @@ xfs_ioend_new_eof(
|
|||
xfs_fsize_t bsize;
|
||||
|
||||
bsize = ioend->io_offset + ioend->io_size;
|
||||
isize = MAX(i_size_read(VFS_I(ip)), ip->i_new_size);
|
||||
isize = MIN(isize, bsize);
|
||||
isize = MIN(i_size_read(VFS_I(ip)), bsize);
|
||||
return isize > ip->i_d.di_size ? isize : 0;
|
||||
}
|
||||
|
||||
|
@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
|
|||
}
|
||||
|
||||
/*
|
||||
* Update on-disk file size now that data has been written to disk. The
|
||||
* current in-memory file size is i_size. If a write is beyond eof i_new_size
|
||||
* will be the intended file size until i_size is updated. If this write does
|
||||
* not extend all the way to the valid file size then restrict this update to
|
||||
* the end of the write.
|
||||
* Update on-disk file size now that data has been written to disk.
|
||||
*
|
||||
* This function does not block as blocking on the inode lock in IO completion
|
||||
* can lead to IO completion order dependency deadlocks.. If it can't get the
|
||||
|
@ -1278,6 +1273,15 @@ xfs_end_io_direct_write(
|
|||
{
|
||||
struct xfs_ioend *ioend = iocb->private;
|
||||
|
||||
/*
|
||||
* While the generic direct I/O code updates the inode size, it does
|
||||
* so only after the end_io handler is called, which means our
|
||||
* end_io handler thinks the on-disk size is outside the in-core
|
||||
* size. To prevent this just update it a little bit earlier here.
|
||||
*/
|
||||
if (offset + size > i_size_read(ioend->io_inode))
|
||||
i_size_write(ioend->io_inode, offset + size);
|
||||
|
||||
/*
|
||||
* blockdev_direct_IO can return an error even after the I/O
|
||||
* completion handler was called. Thus we need to protect
|
||||
|
@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
|
|||
|
||||
if (to > inode->i_size) {
|
||||
/*
|
||||
* punch out the delalloc blocks we have already allocated. We
|
||||
* don't call xfs_setattr() to do this as we may be in the
|
||||
* middle of a multi-iovec write and so the vfs inode->i_size
|
||||
* will not match the xfs ip->i_size and so it will zero too
|
||||
* much. Hence we jus truncate the page cache to zero what is
|
||||
* necessary and punch the delalloc blocks directly.
|
||||
* Punch out the delalloc blocks we have already allocated.
|
||||
*
|
||||
* Don't bother with xfs_setattr given that nothing can have
|
||||
* made it to disk yet as the page is still locked at this
|
||||
* point.
|
||||
*/
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fileoff_t start_fsb;
|
||||
|
|
|
@ -412,27 +412,6 @@ xfs_file_splice_read(
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this was a direct or synchronous I/O that failed (such as ENOSPC) then
|
||||
* part of the I/O may have been written to disk before the error occurred. In
|
||||
* this case the on-disk file size may have been adjusted beyond the in-memory
|
||||
* file size and now needs to be truncated back.
|
||||
*/
|
||||
STATIC void
|
||||
xfs_aio_write_newsize_update(
|
||||
struct xfs_inode *ip,
|
||||
xfs_fsize_t new_size)
|
||||
{
|
||||
if (new_size == ip->i_new_size) {
|
||||
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (new_size == ip->i_new_size)
|
||||
ip->i_new_size = 0;
|
||||
if (ip->i_d.di_size > i_size_read(VFS_I(ip)))
|
||||
ip->i_d.di_size = i_size_read(VFS_I(ip));
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_file_splice_write() does not use xfs_rw_ilock() because
|
||||
* generic_file_splice_write() takes the i_mutex itself. This, in theory,
|
||||
|
@ -451,7 +430,6 @@ xfs_file_splice_write(
|
|||
{
|
||||
struct inode *inode = outfilp->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fsize_t new_size;
|
||||
int ioflags = 0;
|
||||
ssize_t ret;
|
||||
|
||||
|
@ -465,20 +443,12 @@ xfs_file_splice_write(
|
|||
|
||||
xfs_ilock(ip, XFS_IOLOCK_EXCL);
|
||||
|
||||
new_size = *ppos + count;
|
||||
|
||||
xfs_ilock(ip, XFS_ILOCK_EXCL);
|
||||
if (new_size > i_size_read(inode))
|
||||
ip->i_new_size = new_size;
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
|
||||
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
|
||||
|
||||
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
|
||||
if (ret > 0)
|
||||
XFS_STATS_ADD(xs_write_bytes, ret);
|
||||
|
||||
xfs_aio_write_newsize_update(ip, new_size);
|
||||
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
|
||||
return ret;
|
||||
}
|
||||
|
@ -673,16 +643,13 @@ xfs_file_aio_write_checks(
|
|||
struct file *file,
|
||||
loff_t *pos,
|
||||
size_t *count,
|
||||
xfs_fsize_t *new_sizep,
|
||||
int *iolock)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct xfs_inode *ip = XFS_I(inode);
|
||||
xfs_fsize_t new_size;
|
||||
int error = 0;
|
||||
|
||||
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
|
||||
*new_sizep = 0;
|
||||
restart:
|
||||
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
|
||||
if (error) {
|
||||
|
@ -697,15 +664,13 @@ restart:
|
|||
/*
|
||||
* If the offset is beyond the size of the file, we need to zero any
|
||||
* blocks that fall between the existing EOF and the start of this
|
||||
* write. There is no need to issue zeroing if another in-flght IO ends
|
||||
* at or before this one If zeronig is needed and we are currently
|
||||
* holding the iolock shared, we need to update it to exclusive which
|
||||
* involves dropping all locks and relocking to maintain correct locking
|
||||
* order. If we do this, restart the function to ensure all checks and
|
||||
* values are still valid.
|
||||
* write. If zeroing is needed and we are currently holding the
|
||||
* iolock shared, we need to update it to exclusive which involves
|
||||
* dropping all locks and relocking to maintain correct locking order.
|
||||
* If we do this, restart the function to ensure all checks and values
|
||||
* are still valid.
|
||||
*/
|
||||
if ((ip->i_new_size && *pos > ip->i_new_size) ||
|
||||
(!ip->i_new_size && *pos > i_size_read(inode))) {
|
||||
if (*pos > i_size_read(inode)) {
|
||||
if (*iolock == XFS_IOLOCK_SHARED) {
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
|
||||
*iolock = XFS_IOLOCK_EXCL;
|
||||
|
@ -714,19 +679,6 @@ restart:
|
|||
}
|
||||
error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
|
||||
}
|
||||
|
||||
/*
|
||||
* If this IO extends beyond EOF, we may need to update ip->i_new_size.
|
||||
* We have already zeroed space beyond EOF (if necessary). Only update
|
||||
* ip->i_new_size if this IO ends beyond any other in-flight writes.
|
||||
*/
|
||||
new_size = *pos + *count;
|
||||
if (new_size > i_size_read(inode)) {
|
||||
if (new_size > ip->i_new_size)
|
||||
ip->i_new_size = new_size;
|
||||
*new_sizep = new_size;
|
||||
}
|
||||
|
||||
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
return error;
|
||||
|
@ -772,7 +724,6 @@ xfs_file_dio_aio_write(
|
|||
unsigned long nr_segs,
|
||||
loff_t pos,
|
||||
size_t ocount,
|
||||
xfs_fsize_t *new_size,
|
||||
int *iolock)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
|
@ -817,7 +768,7 @@ xfs_file_dio_aio_write(
|
|||
xfs_rw_ilock(ip, *iolock);
|
||||
}
|
||||
|
||||
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
|
||||
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -855,7 +806,6 @@ xfs_file_buffered_aio_write(
|
|||
unsigned long nr_segs,
|
||||
loff_t pos,
|
||||
size_t ocount,
|
||||
xfs_fsize_t *new_size,
|
||||
int *iolock)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
|
@ -869,7 +819,7 @@ xfs_file_buffered_aio_write(
|
|||
*iolock = XFS_IOLOCK_EXCL;
|
||||
xfs_rw_ilock(ip, *iolock);
|
||||
|
||||
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
|
||||
ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -909,7 +859,6 @@ xfs_file_aio_write(
|
|||
ssize_t ret;
|
||||
int iolock;
|
||||
size_t ocount = 0;
|
||||
xfs_fsize_t new_size = 0;
|
||||
|
||||
XFS_STATS_INC(xs_write_calls);
|
||||
|
||||
|
@ -929,10 +878,10 @@ xfs_file_aio_write(
|
|||
|
||||
if (unlikely(file->f_flags & O_DIRECT))
|
||||
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
|
||||
ocount, &new_size, &iolock);
|
||||
ocount, &iolock);
|
||||
else
|
||||
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
|
||||
ocount, &new_size, &iolock);
|
||||
ocount, &iolock);
|
||||
|
||||
if (ret <= 0)
|
||||
goto out_unlock;
|
||||
|
@ -953,7 +902,6 @@ xfs_file_aio_write(
|
|||
}
|
||||
|
||||
out_unlock:
|
||||
xfs_aio_write_newsize_update(ip, new_size);
|
||||
xfs_rw_iunlock(ip, iolock);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -94,7 +94,6 @@ xfs_inode_alloc(
|
|||
ip->i_update_core = 0;
|
||||
ip->i_delayed_blks = 0;
|
||||
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
|
||||
ip->i_new_size = 0;
|
||||
|
||||
return ip;
|
||||
}
|
||||
|
|
|
@ -246,8 +246,6 @@ typedef struct xfs_inode {
|
|||
|
||||
xfs_icdinode_t i_d; /* most of ondisk inode */
|
||||
|
||||
xfs_fsize_t i_new_size; /* size when write completes */
|
||||
|
||||
/* VFS inode */
|
||||
struct inode i_vnode; /* embedded VFS inode */
|
||||
} xfs_inode_t;
|
||||
|
|
|
@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
|
|||
__field(dev_t, dev)
|
||||
__field(xfs_ino_t, ino)
|
||||
__field(xfs_fsize_t, size)
|
||||
__field(xfs_fsize_t, new_size)
|
||||
__field(loff_t, offset)
|
||||
__field(size_t, count)
|
||||
__field(int, flags)
|
||||
|
@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
|
|||
__entry->dev = VFS_I(ip)->i_sb->s_dev;
|
||||
__entry->ino = ip->i_ino;
|
||||
__entry->size = ip->i_d.di_size;
|
||||
__entry->new_size = ip->i_new_size;
|
||||
__entry->offset = offset;
|
||||
__entry->count = count;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
|
||||
"offset 0x%llx count 0x%zx ioflags %s",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__entry->size,
|
||||
__entry->new_size,
|
||||
__entry->offset,
|
||||
__entry->count,
|
||||
__print_flags(__entry->flags, "|", XFS_IO_FLAGS))
|
||||
|
@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
|
|||
__field(dev_t, dev)
|
||||
__field(xfs_ino_t, ino)
|
||||
__field(loff_t, size)
|
||||
__field(loff_t, new_size)
|
||||
__field(loff_t, offset)
|
||||
__field(size_t, count)
|
||||
__field(int, type)
|
||||
|
@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
|
|||
__entry->dev = VFS_I(ip)->i_sb->s_dev;
|
||||
__entry->ino = ip->i_ino;
|
||||
__entry->size = ip->i_d.di_size;
|
||||
__entry->new_size = ip->i_new_size;
|
||||
__entry->offset = offset;
|
||||
__entry->count = count;
|
||||
__entry->type = type;
|
||||
|
@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
|
|||
__entry->startblock = irec ? irec->br_startblock : 0;
|
||||
__entry->blockcount = irec ? irec->br_blockcount : 0;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
|
||||
"offset 0x%llx count %zd type %s "
|
||||
"startoff 0x%llx startblock %lld blockcount 0x%llx",
|
||||
TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
|
||||
"type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__entry->size,
|
||||
__entry->new_size,
|
||||
__entry->offset,
|
||||
__entry->count,
|
||||
__print_symbolic(__entry->type, XFS_IO_TYPES),
|
||||
|
@ -1031,7 +1024,6 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
|
|||
__field(xfs_ino_t, ino)
|
||||
__field(loff_t, isize)
|
||||
__field(loff_t, disize)
|
||||
__field(loff_t, new_size)
|
||||
__field(loff_t, offset)
|
||||
__field(size_t, count)
|
||||
),
|
||||
|
@ -1040,17 +1032,15 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
|
|||
__entry->ino = ip->i_ino;
|
||||
__entry->isize = VFS_I(ip)->i_size;
|
||||
__entry->disize = ip->i_d.di_size;
|
||||
__entry->new_size = ip->i_new_size;
|
||||
__entry->offset = offset;
|
||||
__entry->count = count;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
|
||||
TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
|
||||
"offset 0x%llx count %zd",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__entry->isize,
|
||||
__entry->disize,
|
||||
__entry->new_size,
|
||||
__entry->offset,
|
||||
__entry->count)
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue