ext4: punch_hole should wait for DIO writers
punch_hole is the place where we have to wait for all existing writers (writeback, aio, dio), but currently we simply flush pended end_io request which is not sufficient. Other issue is that punch_hole performed w/o i_mutex held which obviously result in dangerous data corruption due to write-after-free. This patch performs following changes: - Guard punch_hole with i_mutex - Recheck inode flags under i_mutex - Block all new dio readers in order to prevent information leak caused by read-after-free pattern. - punch_hole now wait for all writers in flight NOTE: XXX write-after-free race is still possible because new dirty pages may appear due to mmap(), and currently there is no easy way to stop writeback while punch_hole is in progress. [ Fixed error return from ext4_ext_punch_hole() to make sure that we release i_mutex before returning EPERM or ETXTBUSY -- Ted ] Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
1f555cfa29
commit
02d262dffc
|
@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
|||
loff_t first_page_offset, last_page_offset;
|
||||
int credits, err = 0;
|
||||
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
err = filemap_write_and_wait_range(mapping,
|
||||
offset, offset + length - 1);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
/* It's not possible punch hole on append only file */
|
||||
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
||||
err = -EPERM;
|
||||
goto out_mutex;
|
||||
}
|
||||
if (IS_SWAPFILE(inode)) {
|
||||
err = -ETXTBSY;
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
/* No need to punch hole beyond i_size */
|
||||
if (offset >= inode->i_size)
|
||||
return 0;
|
||||
goto out_mutex;
|
||||
|
||||
/*
|
||||
* If the hole extends beyond i_size, set the hole
|
||||
|
@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
|||
first_page_offset = first_page << PAGE_CACHE_SHIFT;
|
||||
last_page_offset = last_page << PAGE_CACHE_SHIFT;
|
||||
|
||||
/*
|
||||
* Write out all dirty pages to avoid race conditions
|
||||
* Then release them.
|
||||
*/
|
||||
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
||||
err = filemap_write_and_wait_range(mapping,
|
||||
offset, offset + length - 1);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Now release the pages */
|
||||
if (last_page_offset > first_page_offset) {
|
||||
truncate_pagecache_range(inode, first_page_offset,
|
||||
last_page_offset - 1);
|
||||
}
|
||||
|
||||
/* finish any pending end_io work */
|
||||
/* Wait all existing dio workers, newcomers will block on i_mutex */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
inode_dio_wait(inode);
|
||||
err = ext4_flush_completed_IO(inode);
|
||||
if (err)
|
||||
return err;
|
||||
goto out_dio;
|
||||
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
handle = ext4_journal_start(inode, credits);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto out_dio;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
@ -4930,6 +4945,10 @@ out:
|
|||
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
out_dio:
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
out_mutex:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return err;
|
||||
}
|
||||
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
|
Loading…
Reference in New Issue