Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull DAX updates from Dan Williams: "The completion of Jan's DAX work for 4.10. As I mentioned in the libnvdimm-for-4.10 pull request, these are some final fixes for the DAX dirty-cacheline-tracking invalidation work that was merged through the -mm, ext4, and xfs trees in -rc1. These patches were prepared prior to the merge window, but we waited for 4.10-rc1 to have a stable merge base after all the prerequisites were merged. Quoting Jan on the overall changes in these patches: "So I'd like all these 6 patches to go for rc2. The first three patches fix invalidation of exceptional DAX entries (a bug which is there for a long time) - without these patches data loss can occur on power failure even though user called fsync(2). The other three patches change locking of DAX faults so that ->iomap_begin() is called in a more relaxed locking context and we are safe to start a transaction there for ext4" These have received a build success notification from the kbuild robot, and pass the latest libnvdimm unit tests. There have not been any -next releases since -rc1, so they have not appeared there" * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: ext4: Simplify DAX fault path dax: Call ->iomap_begin without entry lock during dax fault dax: Finish fault completely when loading holes dax: Avoid page invalidation races and unnecessary radix tree traversals mm: Invalidate DAX radix tree entries only if appropriate ext2: Return BH_New buffers for zeroed blocks
This commit is contained in:
commit
4759d386d5
249
fs/dax.c
249
fs/dax.c
|
@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
|
|||
__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
|
||||
}
|
||||
|
||||
static int __dax_invalidate_mapping_entry(struct address_space *mapping,
|
||||
pgoff_t index, bool trunc)
|
||||
{
|
||||
int ret = 0;
|
||||
void *entry;
|
||||
struct radix_tree_root *page_tree = &mapping->page_tree;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
entry = get_unlocked_mapping_entry(mapping, index, NULL);
|
||||
if (!entry || !radix_tree_exceptional_entry(entry))
|
||||
goto out;
|
||||
if (!trunc &&
|
||||
(radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
|
||||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
|
||||
goto out;
|
||||
radix_tree_delete(page_tree, index);
|
||||
mapping->nrexceptional--;
|
||||
ret = 1;
|
||||
out:
|
||||
put_unlocked_mapping_entry(mapping, index, entry);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
|
||||
* entry to get unlocked before deleting it.
|
||||
*/
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
void *entry;
|
||||
int ret = __dax_invalidate_mapping_entry(mapping, index, true);
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
entry = get_unlocked_mapping_entry(mapping, index, NULL);
|
||||
/*
|
||||
* This gets called from truncate / punch_hole path. As such, the caller
|
||||
* must hold locks protecting against concurrent modifications of the
|
||||
|
@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|||
* caller has seen exceptional entry for this index, we better find it
|
||||
* at that index as well...
|
||||
*/
|
||||
if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) {
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
return 0;
|
||||
}
|
||||
radix_tree_delete(&mapping->page_tree, index);
|
||||
mapping->nrexceptional--;
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
dax_wake_mapping_entry_waiter(mapping, index, entry, true);
|
||||
WARN_ON_ONCE(!ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 1;
|
||||
/*
|
||||
* Invalidate exceptional DAX entry if easily possible. This handles DAX
|
||||
* entries for invalidate_inode_pages() so we evict the entry only if we can
|
||||
* do so without blocking.
|
||||
*/
|
||||
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
|
||||
{
|
||||
int ret = 0;
|
||||
void *entry, **slot;
|
||||
struct radix_tree_root *page_tree = &mapping->page_tree;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
|
||||
if (!entry || !radix_tree_exceptional_entry(entry) ||
|
||||
slot_locked(mapping, slot))
|
||||
goto out;
|
||||
if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
|
||||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
|
||||
goto out;
|
||||
radix_tree_delete(page_tree, index);
|
||||
mapping->nrexceptional--;
|
||||
ret = 1;
|
||||
out:
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
if (ret)
|
||||
dax_wake_mapping_entry_waiter(mapping, index, entry, true);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate exceptional DAX entry if it is clean.
|
||||
*/
|
||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
pgoff_t index)
|
||||
{
|
||||
return __dax_invalidate_mapping_entry(mapping, index, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -488,15 +539,16 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
|
|||
* otherwise it will simply fall out of the page cache under memory
|
||||
* pressure without ever having been dirtied.
|
||||
*/
|
||||
static int dax_load_hole(struct address_space *mapping, void *entry,
|
||||
static int dax_load_hole(struct address_space *mapping, void **entry,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
/* Hole page already exists? Return it... */
|
||||
if (!radix_tree_exceptional_entry(entry)) {
|
||||
vmf->page = entry;
|
||||
return VM_FAULT_LOCKED;
|
||||
if (!radix_tree_exceptional_entry(*entry)) {
|
||||
page = *entry;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* This will replace locked radix tree entry with a hole page */
|
||||
|
@ -504,8 +556,17 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
|
|||
vmf->gfp_mask | __GFP_ZERO);
|
||||
if (!page)
|
||||
return VM_FAULT_OOM;
|
||||
out:
|
||||
vmf->page = page;
|
||||
return VM_FAULT_LOCKED;
|
||||
ret = finish_fault(vmf);
|
||||
vmf->page = NULL;
|
||||
*entry = page;
|
||||
if (!ret) {
|
||||
/* Grab reference for PTE that is now referencing the page */
|
||||
get_page(page);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size,
|
||||
|
@ -934,6 +995,17 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
|||
if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Write can allocate block for an area which has a hole page mapped
|
||||
* into page tables. We have to tear down these mappings so that data
|
||||
* written by write(2) is visible in mmap.
|
||||
*/
|
||||
if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) {
|
||||
invalidate_inode_pages2_range(inode->i_mapping,
|
||||
pos >> PAGE_SHIFT,
|
||||
(end - 1) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
while (pos < end) {
|
||||
unsigned offset = pos & (PAGE_SIZE - 1);
|
||||
struct blk_dax_ctl dax = { 0 };
|
||||
|
@ -992,23 +1064,6 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
if (iov_iter_rw(iter) == WRITE)
|
||||
flags |= IOMAP_WRITE;
|
||||
|
||||
/*
|
||||
* Yes, even DAX files can have page cache attached to them: A zeroed
|
||||
* page is inserted into the pagecache when we have to serve a write
|
||||
* fault on a hole. It should never be dirtied and can simply be
|
||||
* dropped from the pagecache once we get real data for the page.
|
||||
*
|
||||
* XXX: This is racy against mmap, and there's nothing we can do about
|
||||
* it. We'll eventually need to shift this down even further so that
|
||||
* we can check if we allocated blocks over a hole first.
|
||||
*/
|
||||
if (mapping->nrpages) {
|
||||
ret = invalidate_inode_pages2_range(mapping,
|
||||
pos >> PAGE_SHIFT,
|
||||
(pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT);
|
||||
WARN_ON_ONCE(ret);
|
||||
}
|
||||
|
||||
while (iov_iter_count(iter)) {
|
||||
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
|
||||
iter, dax_iomap_actor);
|
||||
|
@ -1023,6 +1078,15 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dax_iomap_rw);
|
||||
|
||||
static int dax_fault_return(int error)
|
||||
{
|
||||
if (error == 0)
|
||||
return VM_FAULT_NOPAGE;
|
||||
if (error == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/**
|
||||
* dax_iomap_fault - handle a page fault on a DAX file
|
||||
* @vma: The virtual memory area where the fault occurred
|
||||
|
@ -1055,12 +1119,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
if (pos >= i_size_read(inode))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
|
||||
if (IS_ERR(entry)) {
|
||||
error = PTR_ERR(entry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
|
||||
flags |= IOMAP_WRITE;
|
||||
|
||||
|
@ -1071,9 +1129,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
*/
|
||||
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
|
||||
if (error)
|
||||
goto unlock_entry;
|
||||
return dax_fault_return(error);
|
||||
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
|
||||
error = -EIO; /* fs corruption? */
|
||||
vmf_ret = dax_fault_return(-EIO); /* fs corruption? */
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
|
||||
if (IS_ERR(entry)) {
|
||||
vmf_ret = dax_fault_return(PTR_ERR(entry));
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
|
@ -1096,13 +1160,13 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
}
|
||||
|
||||
if (error)
|
||||
goto finish_iomap;
|
||||
goto error_unlock_entry;
|
||||
|
||||
__SetPageUptodate(vmf->cow_page);
|
||||
vmf_ret = finish_fault(vmf);
|
||||
if (!vmf_ret)
|
||||
vmf_ret = VM_FAULT_DONE_COW;
|
||||
goto finish_iomap;
|
||||
goto unlock_entry;
|
||||
}
|
||||
|
||||
switch (iomap.type) {
|
||||
|
@ -1114,12 +1178,15 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
}
|
||||
error = dax_insert_mapping(mapping, iomap.bdev, sector,
|
||||
PAGE_SIZE, &entry, vma, vmf);
|
||||
/* -EBUSY is fine, somebody else faulted on the same PTE */
|
||||
if (error == -EBUSY)
|
||||
error = 0;
|
||||
break;
|
||||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
if (!(vmf->flags & FAULT_FLAG_WRITE)) {
|
||||
vmf_ret = dax_load_hole(mapping, entry, vmf);
|
||||
break;
|
||||
vmf_ret = dax_load_hole(mapping, &entry, vmf);
|
||||
goto unlock_entry;
|
||||
}
|
||||
/*FALLTHRU*/
|
||||
default:
|
||||
|
@ -1128,31 +1195,25 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
break;
|
||||
}
|
||||
|
||||
error_unlock_entry:
|
||||
vmf_ret = dax_fault_return(error) | major;
|
||||
unlock_entry:
|
||||
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
|
||||
finish_iomap:
|
||||
if (ops->iomap_end) {
|
||||
if (error || (vmf_ret & VM_FAULT_ERROR)) {
|
||||
/* keep previous error */
|
||||
ops->iomap_end(inode, pos, PAGE_SIZE, 0, flags,
|
||||
&iomap);
|
||||
} else {
|
||||
error = ops->iomap_end(inode, pos, PAGE_SIZE,
|
||||
PAGE_SIZE, flags, &iomap);
|
||||
}
|
||||
int copied = PAGE_SIZE;
|
||||
|
||||
if (vmf_ret & VM_FAULT_ERROR)
|
||||
copied = 0;
|
||||
/*
|
||||
* The fault is done by now and there's no way back (other
|
||||
* thread may be already happily using PTE we have installed).
|
||||
* Just ignore error from ->iomap_end since we cannot do much
|
||||
* with it.
|
||||
*/
|
||||
ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
|
||||
}
|
||||
unlock_entry:
|
||||
if (vmf_ret != VM_FAULT_LOCKED || error)
|
||||
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
|
||||
out:
|
||||
if (error == -ENOMEM)
|
||||
return VM_FAULT_OOM | major;
|
||||
/* -EBUSY is fine, somebody else faulted on the same PTE */
|
||||
if (error < 0 && error != -EBUSY)
|
||||
return VM_FAULT_SIGBUS | major;
|
||||
if (vmf_ret) {
|
||||
WARN_ON_ONCE(error); /* -EBUSY from ops->iomap_end? */
|
||||
return vmf_ret;
|
||||
}
|
||||
return VM_FAULT_NOPAGE | major;
|
||||
return vmf_ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_iomap_fault);
|
||||
|
||||
|
@ -1276,16 +1337,6 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
|
||||
* PMD or a HZP entry. If it can't (because a 4k page is already in
|
||||
* the tree, for instance), it will return -EEXIST and we just fall
|
||||
* back to 4k entries.
|
||||
*/
|
||||
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
|
||||
if (IS_ERR(entry))
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
* Note that we don't use iomap_apply here. We aren't doing I/O, only
|
||||
* setting up a mapping, so really we're using iomap_begin() as a way
|
||||
|
@ -1294,10 +1345,21 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
pos = (loff_t)pgoff << PAGE_SHIFT;
|
||||
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
|
||||
if (error)
|
||||
goto unlock_entry;
|
||||
goto fallback;
|
||||
|
||||
if (iomap.offset + iomap.length < pos + PMD_SIZE)
|
||||
goto finish_iomap;
|
||||
|
||||
/*
|
||||
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
|
||||
* PMD or a HZP entry. If it can't (because a 4k page is already in
|
||||
* the tree, for instance), it will return -EEXIST and we just fall
|
||||
* back to 4k entries.
|
||||
*/
|
||||
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
|
||||
if (IS_ERR(entry))
|
||||
goto finish_iomap;
|
||||
|
||||
vmf.pgoff = pgoff;
|
||||
vmf.flags = flags;
|
||||
vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
|
||||
|
@ -1310,7 +1372,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
if (WARN_ON_ONCE(write))
|
||||
goto finish_iomap;
|
||||
goto unlock_entry;
|
||||
result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
|
||||
&entry);
|
||||
break;
|
||||
|
@ -1319,20 +1381,23 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
break;
|
||||
}
|
||||
|
||||
finish_iomap:
|
||||
if (ops->iomap_end) {
|
||||
if (result == VM_FAULT_FALLBACK) {
|
||||
ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags,
|
||||
&iomap);
|
||||
} else {
|
||||
error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE,
|
||||
iomap_flags, &iomap);
|
||||
if (error)
|
||||
result = VM_FAULT_FALLBACK;
|
||||
}
|
||||
}
|
||||
unlock_entry:
|
||||
put_locked_mapping_entry(mapping, pgoff, entry);
|
||||
finish_iomap:
|
||||
if (ops->iomap_end) {
|
||||
int copied = PMD_SIZE;
|
||||
|
||||
if (result == VM_FAULT_FALLBACK)
|
||||
copied = 0;
|
||||
/*
|
||||
* The fault is done by now and there's no way back (other
|
||||
* thread may be already happily using PMD we have installed).
|
||||
* Just ignore error from ->iomap_end since we cannot do much
|
||||
* with it.
|
||||
*/
|
||||
ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
|
||||
&iomap);
|
||||
}
|
||||
fallback:
|
||||
if (result == VM_FAULT_FALLBACK) {
|
||||
split_huge_pmd(vma, pmd, address);
|
||||
|
|
|
@ -751,9 +751,8 @@ static int ext2_get_blocks(struct inode *inode,
|
|||
mutex_unlock(&ei->truncate_mutex);
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
*new = true;
|
||||
}
|
||||
*new = true;
|
||||
|
||||
ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
|
|
|
@ -258,7 +258,6 @@ out:
|
|||
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
int result;
|
||||
handle_t *handle = NULL;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
|
@ -266,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
if (write) {
|
||||
sb_start_pagefault(sb);
|
||||
file_update_time(vma->vm_file);
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
|
||||
EXT4_DATA_TRANS_BLOCKS(sb));
|
||||
} else
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
if (IS_ERR(handle))
|
||||
result = VM_FAULT_SIGBUS;
|
||||
else
|
||||
result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
|
||||
|
||||
if (write) {
|
||||
if (!IS_ERR(handle))
|
||||
ext4_journal_stop(handle);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
}
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
if (write)
|
||||
sb_end_pagefault(sb);
|
||||
} else
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -292,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
|||
pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
int result;
|
||||
handle_t *handle = NULL;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
bool write = flags & FAULT_FLAG_WRITE;
|
||||
|
@ -300,27 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
|||
if (write) {
|
||||
sb_start_pagefault(sb);
|
||||
file_update_time(vma->vm_file);
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
|
||||
ext4_chunk_trans_blocks(inode,
|
||||
PMD_SIZE / PAGE_SIZE));
|
||||
} else
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
if (IS_ERR(handle))
|
||||
result = VM_FAULT_SIGBUS;
|
||||
else {
|
||||
result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
|
||||
&ext4_iomap_ops);
|
||||
}
|
||||
|
||||
if (write) {
|
||||
if (!IS_ERR(handle))
|
||||
ext4_journal_stop(handle);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
|
||||
&ext4_iomap_ops);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
if (write)
|
||||
sb_end_pagefault(sb);
|
||||
} else
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
struct iomap_ops *ops);
|
||||
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
|
||||
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
|
||||
pgoff_t index);
|
||||
void dax_wake_mapping_entry_waiter(struct address_space *mapping,
|
||||
pgoff_t index, void *entry, bool wake_all);
|
||||
|
||||
|
|
|
@ -24,20 +24,12 @@
|
|||
#include <linux/rmap.h>
|
||||
#include "internal.h"
|
||||
|
||||
static void clear_exceptional_entry(struct address_space *mapping,
|
||||
pgoff_t index, void *entry)
|
||||
static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
|
||||
void *entry)
|
||||
{
|
||||
struct radix_tree_node *node;
|
||||
void **slot;
|
||||
|
||||
/* Handled by shmem itself */
|
||||
if (shmem_mapping(mapping))
|
||||
return;
|
||||
|
||||
if (dax_mapping(mapping)) {
|
||||
dax_delete_mapping_entry(mapping, index);
|
||||
return;
|
||||
}
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
/*
|
||||
* Regular page slots are stabilized by the page lock even
|
||||
|
@ -55,6 +47,56 @@ unlock:
|
|||
spin_unlock_irq(&mapping->tree_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally remove exceptional entry. Usually called from truncate path.
|
||||
*/
|
||||
static void truncate_exceptional_entry(struct address_space *mapping,
|
||||
pgoff_t index, void *entry)
|
||||
{
|
||||
/* Handled by shmem itself */
|
||||
if (shmem_mapping(mapping))
|
||||
return;
|
||||
|
||||
if (dax_mapping(mapping)) {
|
||||
dax_delete_mapping_entry(mapping, index);
|
||||
return;
|
||||
}
|
||||
clear_shadow_entry(mapping, index, entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate exceptional entry if easily possible. This handles exceptional
|
||||
* entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
|
||||
* clean entries.
|
||||
*/
|
||||
static int invalidate_exceptional_entry(struct address_space *mapping,
|
||||
pgoff_t index, void *entry)
|
||||
{
|
||||
/* Handled by shmem itself */
|
||||
if (shmem_mapping(mapping))
|
||||
return 1;
|
||||
if (dax_mapping(mapping))
|
||||
return dax_invalidate_mapping_entry(mapping, index);
|
||||
clear_shadow_entry(mapping, index, entry);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate exceptional entry if clean. This handles exceptional entries for
|
||||
* invalidate_inode_pages2() so for DAX it evicts only clean entries.
|
||||
*/
|
||||
static int invalidate_exceptional_entry2(struct address_space *mapping,
|
||||
pgoff_t index, void *entry)
|
||||
{
|
||||
/* Handled by shmem itself */
|
||||
if (shmem_mapping(mapping))
|
||||
return 1;
|
||||
if (dax_mapping(mapping))
|
||||
return dax_invalidate_mapping_entry_sync(mapping, index);
|
||||
clear_shadow_entry(mapping, index, entry);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* do_invalidatepage - invalidate part or all of a page
|
||||
* @page: the page which is affected
|
||||
|
@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
|||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
clear_exceptional_entry(mapping, index, page);
|
||||
truncate_exceptional_entry(mapping, index,
|
||||
page);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
|
|||
}
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
clear_exceptional_entry(mapping, index, page);
|
||||
truncate_exceptional_entry(mapping, index,
|
||||
page);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
|
|||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
clear_exceptional_entry(mapping, index, page);
|
||||
invalidate_exceptional_entry(mapping, index,
|
||||
page);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
|
|||
break;
|
||||
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
clear_exceptional_entry(mapping, index, page);
|
||||
if (!invalidate_exceptional_entry2(mapping,
|
||||
index, page))
|
||||
ret = -EBUSY;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue