Merge branch 'xfs-mmap-lock' into for-next
This commit is contained in:
commit
88e8fda99a
|
@ -1599,13 +1599,6 @@ xfs_swap_extent_flush(
|
||||||
/* Verify O_DIRECT for ftmp */
|
/* Verify O_DIRECT for ftmp */
|
||||||
if (VFS_I(ip)->i_mapping->nrpages)
|
if (VFS_I(ip)->i_mapping->nrpages)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't try to swap extents on mmap()d files because we can't lock
|
|
||||||
* out races against page faults safely.
|
|
||||||
*/
|
|
||||||
if (mapping_mapped(VFS_I(ip)->i_mapping))
|
|
||||||
return -EBUSY;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1633,13 +1626,14 @@ xfs_swap_extents(
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lock up the inodes against other IO and truncate to begin with.
|
* Lock the inodes against other IO, page faults and truncate to
|
||||||
* Then we can ensure the inodes are flushed and have no page cache
|
* begin with. Then we can ensure the inodes are flushed and have no
|
||||||
* safely. Once we have done this we can take the ilocks and do the rest
|
* page cache safely. Once we have done this we can take the ilocks and
|
||||||
* of the checks.
|
* do the rest of the checks.
|
||||||
*/
|
*/
|
||||||
lock_flags = XFS_IOLOCK_EXCL;
|
lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
|
||||||
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
|
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
|
||||||
|
xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
|
||||||
|
|
||||||
/* Verify that both files have the same format */
|
/* Verify that both files have the same format */
|
||||||
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
|
if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
|
||||||
|
@ -1666,8 +1660,16 @@ xfs_swap_extents(
|
||||||
xfs_trans_cancel(tp, 0);
|
xfs_trans_cancel(tp, 0);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lock and join the inodes to the tansaction so that transaction commit
|
||||||
|
* or cancel will unlock the inodes from this point onwards.
|
||||||
|
*/
|
||||||
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
|
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
|
||||||
lock_flags |= XFS_ILOCK_EXCL;
|
lock_flags |= XFS_ILOCK_EXCL;
|
||||||
|
xfs_trans_ijoin(tp, ip, lock_flags);
|
||||||
|
xfs_trans_ijoin(tp, tip, lock_flags);
|
||||||
|
|
||||||
|
|
||||||
/* Verify all data are being swapped */
|
/* Verify all data are being swapped */
|
||||||
if (sxp->sx_offset != 0 ||
|
if (sxp->sx_offset != 0 ||
|
||||||
|
@ -1720,9 +1722,6 @@ xfs_swap_extents(
|
||||||
goto out_trans_cancel;
|
goto out_trans_cancel;
|
||||||
}
|
}
|
||||||
|
|
||||||
xfs_trans_ijoin(tp, ip, lock_flags);
|
|
||||||
xfs_trans_ijoin(tp, tip, lock_flags);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before we've swapped the forks, lets set the owners of the forks
|
* Before we've swapped the forks, lets set the owners of the forks
|
||||||
* appropriately. We have to do this as we are demand paging the btree
|
* appropriately. We have to do this as we are demand paging the btree
|
||||||
|
@ -1856,5 +1855,5 @@ out_unlock:
|
||||||
|
|
||||||
out_trans_cancel:
|
out_trans_cancel:
|
||||||
xfs_trans_cancel(tp, 0);
|
xfs_trans_cancel(tp, 0);
|
||||||
goto out_unlock;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
|
@ -847,6 +847,9 @@ xfs_file_fallocate(
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
|
||||||
|
iolock |= XFS_MMAPLOCK_EXCL;
|
||||||
|
|
||||||
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
if (mode & FALLOC_FL_PUNCH_HOLE) {
|
||||||
error = xfs_free_file_space(ip, offset, len);
|
error = xfs_free_file_space(ip, offset, len);
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -996,20 +999,6 @@ xfs_file_mmap(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* mmap()d file has taken write protection fault and is being made
|
|
||||||
* writable. We can set the page state up correctly for a writable
|
|
||||||
* page, which means we can do correct delalloc accounting (ENOSPC
|
|
||||||
* checking!) and unwritten extent mapping.
|
|
||||||
*/
|
|
||||||
STATIC int
|
|
||||||
xfs_vm_page_mkwrite(
|
|
||||||
struct vm_area_struct *vma,
|
|
||||||
struct vm_fault *vmf)
|
|
||||||
{
|
|
||||||
return block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This type is designed to indicate the type of offset we would like
|
* This type is designed to indicate the type of offset we would like
|
||||||
* to search from page cache for xfs_seek_hole_data().
|
* to search from page cache for xfs_seek_hole_data().
|
||||||
|
@ -1385,6 +1374,55 @@ xfs_file_llseek(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Locking for serialisation of IO during page faults. This results in a lock
|
||||||
|
* ordering of:
|
||||||
|
*
|
||||||
|
* mmap_sem (MM)
|
||||||
|
* i_mmap_lock (XFS - truncate serialisation)
|
||||||
|
* page_lock (MM)
|
||||||
|
* i_lock (XFS - extent map serialisation)
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_filemap_fault(
|
||||||
|
struct vm_area_struct *vma,
|
||||||
|
struct vm_fault *vmf)
|
||||||
|
{
|
||||||
|
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
||||||
|
int error;
|
||||||
|
|
||||||
|
trace_xfs_filemap_fault(ip);
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
||||||
|
error = filemap_fault(vma, vmf);
|
||||||
|
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mmap()d file has taken write protection fault and is being made writable. We
|
||||||
|
* can set the page state up correctly for a writable page, which means we can
|
||||||
|
* do correct delalloc accounting (ENOSPC checking!) and unwritten extent
|
||||||
|
* mapping.
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_filemap_page_mkwrite(
|
||||||
|
struct vm_area_struct *vma,
|
||||||
|
struct vm_fault *vmf)
|
||||||
|
{
|
||||||
|
struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
|
||||||
|
int error;
|
||||||
|
|
||||||
|
trace_xfs_filemap_page_mkwrite(ip);
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
||||||
|
error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
||||||
|
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
const struct file_operations xfs_file_operations = {
|
const struct file_operations xfs_file_operations = {
|
||||||
.llseek = xfs_file_llseek,
|
.llseek = xfs_file_llseek,
|
||||||
.read = new_sync_read,
|
.read = new_sync_read,
|
||||||
|
@ -1417,7 +1455,7 @@ const struct file_operations xfs_dir_file_operations = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct vm_operations_struct xfs_file_vm_ops = {
|
static const struct vm_operations_struct xfs_file_vm_ops = {
|
||||||
.fault = filemap_fault,
|
.fault = xfs_filemap_fault,
|
||||||
.map_pages = filemap_map_pages,
|
.map_pages = filemap_map_pages,
|
||||||
.page_mkwrite = xfs_vm_page_mkwrite,
|
.page_mkwrite = xfs_filemap_page_mkwrite,
|
||||||
};
|
};
|
||||||
|
|
|
@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The xfs inode contains 2 locks: a multi-reader lock called the
|
* The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
|
||||||
* i_iolock and a multi-reader lock called the i_lock. This routine
|
* the i_lock. This routine allows various combinations of the locks to be
|
||||||
* allows either or both of the locks to be obtained.
|
* obtained.
|
||||||
*
|
*
|
||||||
* The 2 locks should always be ordered so that the IO lock is
|
* The 3 locks should always be ordered so that the IO lock is obtained first,
|
||||||
* obtained first in order to prevent deadlock.
|
* the mmap lock second and the ilock last in order to prevent deadlock.
|
||||||
*
|
*
|
||||||
* ip -- the inode being locked
|
* Basic locking order:
|
||||||
* lock_flags -- this parameter indicates the inode's locks
|
*
|
||||||
* to be locked. It can be:
|
* i_iolock -> i_mmap_lock -> page_lock -> i_ilock
|
||||||
* XFS_IOLOCK_SHARED,
|
*
|
||||||
* XFS_IOLOCK_EXCL,
|
* mmap_sem locking order:
|
||||||
* XFS_ILOCK_SHARED,
|
*
|
||||||
* XFS_ILOCK_EXCL,
|
* i_iolock -> page lock -> mmap_sem
|
||||||
* XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
|
* mmap_sem -> i_mmap_lock -> page_lock
|
||||||
* XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
|
*
|
||||||
* XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
|
* The difference in mmap_sem locking order mean that we cannot hold the
|
||||||
* XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
|
* i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
|
||||||
|
* fault in pages during copy in/out (for buffered IO) or require the mmap_sem
|
||||||
|
* in get_user_pages() to map the user pages into the kernel address space for
|
||||||
|
* direct IO. Similarly the i_iolock cannot be taken inside a page fault because
|
||||||
|
* page faults already hold the mmap_sem.
|
||||||
|
*
|
||||||
|
* Hence to serialise fully against both syscall and mmap based IO, we need to
|
||||||
|
* take both the i_iolock and the i_mmap_lock. These locks should *only* be both
|
||||||
|
* taken in places where we need to invalidate the page cache in a race
|
||||||
|
* free manner (e.g. truncate, hole punch and other extent manipulation
|
||||||
|
* functions).
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
xfs_ilock(
|
xfs_ilock(
|
||||||
|
@ -150,6 +160,8 @@ xfs_ilock(
|
||||||
*/
|
*/
|
||||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||||
|
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||||
|
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||||
|
@ -159,6 +171,11 @@ xfs_ilock(
|
||||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||||
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
|
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
|
||||||
|
|
||||||
|
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||||
|
mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
|
||||||
|
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||||
|
mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
|
||||||
|
|
||||||
if (lock_flags & XFS_ILOCK_EXCL)
|
if (lock_flags & XFS_ILOCK_EXCL)
|
||||||
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
|
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
|
||||||
else if (lock_flags & XFS_ILOCK_SHARED)
|
else if (lock_flags & XFS_ILOCK_SHARED)
|
||||||
|
@ -191,6 +208,8 @@ xfs_ilock_nowait(
|
||||||
*/
|
*/
|
||||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||||
|
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||||
|
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||||
|
@ -202,21 +221,35 @@ xfs_ilock_nowait(
|
||||||
if (!mrtryaccess(&ip->i_iolock))
|
if (!mrtryaccess(&ip->i_iolock))
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lock_flags & XFS_MMAPLOCK_EXCL) {
|
||||||
|
if (!mrtryupdate(&ip->i_mmaplock))
|
||||||
|
goto out_undo_iolock;
|
||||||
|
} else if (lock_flags & XFS_MMAPLOCK_SHARED) {
|
||||||
|
if (!mrtryaccess(&ip->i_mmaplock))
|
||||||
|
goto out_undo_iolock;
|
||||||
|
}
|
||||||
|
|
||||||
if (lock_flags & XFS_ILOCK_EXCL) {
|
if (lock_flags & XFS_ILOCK_EXCL) {
|
||||||
if (!mrtryupdate(&ip->i_lock))
|
if (!mrtryupdate(&ip->i_lock))
|
||||||
goto out_undo_iolock;
|
goto out_undo_mmaplock;
|
||||||
} else if (lock_flags & XFS_ILOCK_SHARED) {
|
} else if (lock_flags & XFS_ILOCK_SHARED) {
|
||||||
if (!mrtryaccess(&ip->i_lock))
|
if (!mrtryaccess(&ip->i_lock))
|
||||||
goto out_undo_iolock;
|
goto out_undo_mmaplock;
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
out_undo_iolock:
|
out_undo_mmaplock:
|
||||||
|
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||||
|
mrunlock_excl(&ip->i_mmaplock);
|
||||||
|
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||||
|
mrunlock_shared(&ip->i_mmaplock);
|
||||||
|
out_undo_iolock:
|
||||||
if (lock_flags & XFS_IOLOCK_EXCL)
|
if (lock_flags & XFS_IOLOCK_EXCL)
|
||||||
mrunlock_excl(&ip->i_iolock);
|
mrunlock_excl(&ip->i_iolock);
|
||||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||||
mrunlock_shared(&ip->i_iolock);
|
mrunlock_shared(&ip->i_iolock);
|
||||||
out:
|
out:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -244,6 +277,8 @@ xfs_iunlock(
|
||||||
*/
|
*/
|
||||||
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
|
||||||
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
(XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
|
||||||
|
ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
|
||||||
|
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
|
||||||
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
|
||||||
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
|
||||||
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
|
||||||
|
@ -254,6 +289,11 @@ xfs_iunlock(
|
||||||
else if (lock_flags & XFS_IOLOCK_SHARED)
|
else if (lock_flags & XFS_IOLOCK_SHARED)
|
||||||
mrunlock_shared(&ip->i_iolock);
|
mrunlock_shared(&ip->i_iolock);
|
||||||
|
|
||||||
|
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||||
|
mrunlock_excl(&ip->i_mmaplock);
|
||||||
|
else if (lock_flags & XFS_MMAPLOCK_SHARED)
|
||||||
|
mrunlock_shared(&ip->i_mmaplock);
|
||||||
|
|
||||||
if (lock_flags & XFS_ILOCK_EXCL)
|
if (lock_flags & XFS_ILOCK_EXCL)
|
||||||
mrunlock_excl(&ip->i_lock);
|
mrunlock_excl(&ip->i_lock);
|
||||||
else if (lock_flags & XFS_ILOCK_SHARED)
|
else if (lock_flags & XFS_ILOCK_SHARED)
|
||||||
|
@ -271,11 +311,14 @@ xfs_ilock_demote(
|
||||||
xfs_inode_t *ip,
|
xfs_inode_t *ip,
|
||||||
uint lock_flags)
|
uint lock_flags)
|
||||||
{
|
{
|
||||||
ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
|
ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
|
||||||
ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
|
ASSERT((lock_flags &
|
||||||
|
~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
|
||||||
|
|
||||||
if (lock_flags & XFS_ILOCK_EXCL)
|
if (lock_flags & XFS_ILOCK_EXCL)
|
||||||
mrdemote(&ip->i_lock);
|
mrdemote(&ip->i_lock);
|
||||||
|
if (lock_flags & XFS_MMAPLOCK_EXCL)
|
||||||
|
mrdemote(&ip->i_mmaplock);
|
||||||
if (lock_flags & XFS_IOLOCK_EXCL)
|
if (lock_flags & XFS_IOLOCK_EXCL)
|
||||||
mrdemote(&ip->i_iolock);
|
mrdemote(&ip->i_iolock);
|
||||||
|
|
||||||
|
@ -294,6 +337,12 @@ xfs_isilocked(
|
||||||
return rwsem_is_locked(&ip->i_lock.mr_lock);
|
return rwsem_is_locked(&ip->i_lock.mr_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
|
||||||
|
if (!(lock_flags & XFS_MMAPLOCK_SHARED))
|
||||||
|
return !!ip->i_mmaplock.mr_writer;
|
||||||
|
return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
|
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
|
||||||
if (!(lock_flags & XFS_IOLOCK_SHARED))
|
if (!(lock_flags & XFS_IOLOCK_SHARED))
|
||||||
return !!ip->i_iolock.mr_writer;
|
return !!ip->i_iolock.mr_writer;
|
||||||
|
@ -314,14 +363,27 @@ int xfs_lock_delays;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bump the subclass so xfs_lock_inodes() acquires each lock with
|
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
|
||||||
* a different value
|
* value. This shouldn't be called for page fault locking, but we also need to
|
||||||
|
* ensure we don't overrun the number of lockdep subclasses for the iolock or
|
||||||
|
* mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
xfs_lock_inumorder(int lock_mode, int subclass)
|
xfs_lock_inumorder(int lock_mode, int subclass)
|
||||||
{
|
{
|
||||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
|
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
|
||||||
|
ASSERT(subclass + XFS_LOCK_INUMORDER <
|
||||||
|
(1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
|
||||||
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
|
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
|
||||||
|
ASSERT(subclass + XFS_LOCK_INUMORDER <
|
||||||
|
(1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
|
||||||
|
lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
|
||||||
|
XFS_MMAPLOCK_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
|
if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
|
||||||
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
|
lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
|
||||||
|
|
||||||
|
@ -440,10 +502,10 @@ again:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xfs_lock_two_inodes() can only be used to lock one type of lock
|
* xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
|
||||||
* at a time - the iolock or the ilock, but not both at once. If
|
* the iolock, the mmaplock or the ilock, but not more than one at a time. If we
|
||||||
* we lock both at once, lockdep will report false positives saying
|
* lock more than one at a time, lockdep will report false positives saying we
|
||||||
* we have violated locking orders.
|
* have violated locking orders.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
xfs_lock_two_inodes(
|
xfs_lock_two_inodes(
|
||||||
|
@ -455,8 +517,12 @@ xfs_lock_two_inodes(
|
||||||
int attempts = 0;
|
int attempts = 0;
|
||||||
xfs_log_item_t *lp;
|
xfs_log_item_t *lp;
|
||||||
|
|
||||||
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
|
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
|
||||||
ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
|
ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
|
||||||
|
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
|
||||||
|
} else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
|
||||||
|
ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
|
||||||
|
|
||||||
ASSERT(ip0->i_ino != ip1->i_ino);
|
ASSERT(ip0->i_ino != ip1->i_ino);
|
||||||
|
|
||||||
if (ip0->i_ino > ip1->i_ino) {
|
if (ip0->i_ino > ip1->i_ino) {
|
||||||
|
|
|
@ -56,6 +56,7 @@ typedef struct xfs_inode {
|
||||||
struct xfs_inode_log_item *i_itemp; /* logging information */
|
struct xfs_inode_log_item *i_itemp; /* logging information */
|
||||||
mrlock_t i_lock; /* inode lock */
|
mrlock_t i_lock; /* inode lock */
|
||||||
mrlock_t i_iolock; /* inode IO lock */
|
mrlock_t i_iolock; /* inode IO lock */
|
||||||
|
mrlock_t i_mmaplock; /* inode mmap IO lock */
|
||||||
atomic_t i_pincount; /* inode pin count */
|
atomic_t i_pincount; /* inode pin count */
|
||||||
spinlock_t i_flags_lock; /* inode i_flags lock */
|
spinlock_t i_flags_lock; /* inode i_flags lock */
|
||||||
/* Miscellaneous state. */
|
/* Miscellaneous state. */
|
||||||
|
@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
|
||||||
#define XFS_IOLOCK_SHARED (1<<1)
|
#define XFS_IOLOCK_SHARED (1<<1)
|
||||||
#define XFS_ILOCK_EXCL (1<<2)
|
#define XFS_ILOCK_EXCL (1<<2)
|
||||||
#define XFS_ILOCK_SHARED (1<<3)
|
#define XFS_ILOCK_SHARED (1<<3)
|
||||||
|
#define XFS_MMAPLOCK_EXCL (1<<4)
|
||||||
|
#define XFS_MMAPLOCK_SHARED (1<<5)
|
||||||
|
|
||||||
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
|
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
|
||||||
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
|
| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
|
||||||
|
| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
|
||||||
|
|
||||||
#define XFS_LOCK_FLAGS \
|
#define XFS_LOCK_FLAGS \
|
||||||
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
|
{ XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
|
||||||
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
|
{ XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
|
||||||
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
|
{ XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
|
||||||
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }
|
{ XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
|
||||||
|
{ XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
|
||||||
|
{ XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
|
||||||
#define XFS_IOLOCK_SHIFT 16
|
#define XFS_IOLOCK_SHIFT 16
|
||||||
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
|
#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
|
||||||
|
|
||||||
|
#define XFS_MMAPLOCK_SHIFT 20
|
||||||
|
|
||||||
#define XFS_ILOCK_SHIFT 24
|
#define XFS_ILOCK_SHIFT 24
|
||||||
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
|
#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
|
||||||
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
|
#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
|
||||||
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
|
#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
|
||||||
|
|
||||||
#define XFS_IOLOCK_DEP_MASK 0x00ff0000
|
#define XFS_IOLOCK_DEP_MASK 0x000f0000
|
||||||
|
#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
|
||||||
#define XFS_ILOCK_DEP_MASK 0xff000000
|
#define XFS_ILOCK_DEP_MASK 0xff000000
|
||||||
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
|
#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
|
||||||
|
XFS_MMAPLOCK_DEP_MASK | \
|
||||||
|
XFS_ILOCK_DEP_MASK)
|
||||||
|
|
||||||
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
|
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
|
||||||
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
|
>> XFS_IOLOCK_SHIFT)
|
||||||
|
#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
|
||||||
|
>> XFS_MMAPLOCK_SHIFT)
|
||||||
|
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
|
||||||
|
>> XFS_ILOCK_SHIFT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For multiple groups support: if S_ISGID bit is set in the parent
|
* For multiple groups support: if S_ISGID bit is set in the parent
|
||||||
|
|
|
@ -631,7 +631,7 @@ xfs_ioc_space(
|
||||||
|
|
||||||
if (filp->f_flags & O_DSYNC)
|
if (filp->f_flags & O_DSYNC)
|
||||||
flags |= XFS_PREALLOC_SYNC;
|
flags |= XFS_PREALLOC_SYNC;
|
||||||
if (ioflags & XFS_IO_INVIS)
|
if (ioflags & XFS_IO_INVIS)
|
||||||
flags |= XFS_PREALLOC_INVISIBLE;
|
flags |= XFS_PREALLOC_INVISIBLE;
|
||||||
|
|
||||||
error = mnt_want_write_file(filp);
|
error = mnt_want_write_file(filp);
|
||||||
|
@ -643,6 +643,9 @@ xfs_ioc_space(
|
||||||
if (error)
|
if (error)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
|
||||||
|
iolock |= XFS_MMAPLOCK_EXCL;
|
||||||
|
|
||||||
switch (bf->l_whence) {
|
switch (bf->l_whence) {
|
||||||
case 0: /*SEEK_SET*/
|
case 0: /*SEEK_SET*/
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -771,6 +771,7 @@ xfs_setattr_size(
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
|
||||||
|
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
|
||||||
ASSERT(S_ISREG(ip->i_d.di_mode));
|
ASSERT(S_ISREG(ip->i_d.di_mode));
|
||||||
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
|
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
|
||||||
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
|
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
|
||||||
|
@ -834,55 +835,27 @@ xfs_setattr_size(
|
||||||
inode_dio_wait(inode);
|
inode_dio_wait(inode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do all the page cache truncate work outside the transaction context
|
* We've already locked out new page faults, so now we can safely remove
|
||||||
* as the "lock" order is page lock->log space reservation. i.e.
|
* pages from the page cache knowing they won't get refaulted until we
|
||||||
* locking pages inside the transaction can ABBA deadlock with
|
* drop the XFS_MMAP_EXCL lock after the extent manipulations are
|
||||||
* writeback. We have to do the VFS inode size update before we truncate
|
* complete. The truncate_setsize() call also cleans partial EOF page
|
||||||
* the pagecache, however, to avoid racing with page faults beyond the
|
* PTEs on extending truncates and hence ensures sub-page block size
|
||||||
* new EOF they are not serialised against truncate operations except by
|
* filesystems are correctly handled, too.
|
||||||
* page locks and size updates.
|
|
||||||
*
|
*
|
||||||
* Hence we are in a situation where a truncate can fail with ENOMEM
|
* We have to do all the page cache truncate work outside the
|
||||||
* from xfs_trans_reserve(), but having already truncated the in-memory
|
* transaction context as the "lock" order is page lock->log space
|
||||||
* version of the file (i.e. made user visible changes). There's not
|
* reservation as defined by extent allocation in the writeback path.
|
||||||
* much we can do about this, except to hope that the caller sees ENOMEM
|
* Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
|
||||||
* and retries the truncate operation.
|
* having already truncated the in-memory version of the file (i.e. made
|
||||||
|
* user visible changes). There's not much we can do about this, except
|
||||||
|
* to hope that the caller sees ENOMEM and retries the truncate
|
||||||
|
* operation.
|
||||||
*/
|
*/
|
||||||
error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
|
error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
|
||||||
if (error)
|
if (error)
|
||||||
return error;
|
return error;
|
||||||
truncate_setsize(inode, newsize);
|
truncate_setsize(inode, newsize);
|
||||||
|
|
||||||
/*
|
|
||||||
* The "we can't serialise against page faults" pain gets worse.
|
|
||||||
*
|
|
||||||
* If the file is mapped then we have to clean the page at the old EOF
|
|
||||||
* when extending the file. Extending the file can expose changes the
|
|
||||||
* underlying page mapping (e.g. from beyond EOF to a hole or
|
|
||||||
* unwritten), and so on the next attempt to write to that page we need
|
|
||||||
* to remap it for write. i.e. we need .page_mkwrite() to be called.
|
|
||||||
* Hence we need to clean the page to clean the pte and so a new write
|
|
||||||
* fault will be triggered appropriately.
|
|
||||||
*
|
|
||||||
* If we do it before we change the inode size, then we can race with a
|
|
||||||
* page fault that maps the page with exactly the same problem. If we do
|
|
||||||
* it after we change the file size, then a new page fault can come in
|
|
||||||
* and allocate space before we've run the rest of the truncate
|
|
||||||
* transaction. That's kinda grotesque, but it's better than have data
|
|
||||||
* over a hole, and so that's the lesser evil that has been chosen here.
|
|
||||||
*
|
|
||||||
* The real solution, however, is to have some mechanism for locking out
|
|
||||||
* page faults while a truncate is in progress.
|
|
||||||
*/
|
|
||||||
if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
|
|
||||||
error = filemap_write_and_wait_range(
|
|
||||||
VFS_I(ip)->i_mapping,
|
|
||||||
round_down(oldsize, PAGE_CACHE_SIZE),
|
|
||||||
round_up(oldsize, PAGE_CACHE_SIZE) - 1);
|
|
||||||
if (error)
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
|
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
|
||||||
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
|
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -981,8 +954,12 @@ xfs_vn_setattr(
|
||||||
|
|
||||||
xfs_ilock(ip, iolock);
|
xfs_ilock(ip, iolock);
|
||||||
error = xfs_break_layouts(dentry->d_inode, &iolock);
|
error = xfs_break_layouts(dentry->d_inode, &iolock);
|
||||||
if (!error)
|
if (!error) {
|
||||||
|
xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
|
||||||
|
iolock |= XFS_MMAPLOCK_EXCL;
|
||||||
|
|
||||||
error = xfs_setattr_size(ip, iattr);
|
error = xfs_setattr_size(ip, iattr);
|
||||||
|
}
|
||||||
xfs_iunlock(ip, iolock);
|
xfs_iunlock(ip, iolock);
|
||||||
} else {
|
} else {
|
||||||
error = xfs_setattr_nonsize(ip, iattr, 0);
|
error = xfs_setattr_nonsize(ip, iattr, 0);
|
||||||
|
|
|
@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
|
||||||
atomic_set(&ip->i_pincount, 0);
|
atomic_set(&ip->i_pincount, 0);
|
||||||
spin_lock_init(&ip->i_flags_lock);
|
spin_lock_init(&ip->i_flags_lock);
|
||||||
|
|
||||||
|
mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
||||||
|
"xfsino", ip->i_ino);
|
||||||
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
|
||||||
"xfsino", ip->i_ino);
|
"xfsino", ip->i_ino);
|
||||||
}
|
}
|
||||||
|
|
|
@ -685,6 +685,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
|
||||||
DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
|
DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
|
||||||
DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
|
DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
|
||||||
|
|
||||||
|
DEFINE_INODE_EVENT(xfs_filemap_fault);
|
||||||
|
DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(xfs_iref_class,
|
DECLARE_EVENT_CLASS(xfs_iref_class,
|
||||||
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
|
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
|
||||||
TP_ARGS(ip, caller_ip),
|
TP_ARGS(ip, caller_ip),
|
||||||
|
|
Loading…
Reference in New Issue