Fix read/truncate race
do_generic_mapping_read currently samples the i_size at the start and doesn't do so again unless it needs to call ->readpage to load a page. After ->readpage it has to re-sample i_size as a truncate may have caused that page to be filled with zeros, and the read() call should not see these. However there are other activities that might cause ->readpage to be called on a page between the time that do_generic_mapping_read samples i_size and when it finds that it has an uptodate page. These include at least read-ahead and possibly another thread performing a read. So do_generic_mapping_read must sample i_size *after* it has an uptodate page. Thus the current sampling at the start and after a read can be replaced with a sampling before the copy-out. The same change applied to __generic_file_splice_read. Note that this fixes any race with truncate_complete_page, but does not fix a possible race with truncate_partial_page. If a partial truncate happens after do_generic_mapping_read samples i_size and before the copy_out, the nuls that truncate_partial_page place in the page could be copied out incorrectly. I think the best fix for that is to *not* zero out parts of the page in truncate_partial_page, but rather to zero out the tail of a page when increasing i_size. Signed-off-by: Neil Brown <neilb@suse.de> Cc: Jens Axboe <jens.axboe@oracle.com> Acked-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
e21ea246bc
commit
a32ea1e1f9
72
mm/filemap.c
72
mm/filemap.c
|
@ -867,13 +867,11 @@ void do_generic_mapping_read(struct address_space *mapping,
|
|||
{
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned long index;
|
||||
unsigned long end_index;
|
||||
unsigned long offset;
|
||||
unsigned long last_index;
|
||||
unsigned long next_index;
|
||||
unsigned long prev_index;
|
||||
unsigned int prev_offset;
|
||||
loff_t isize;
|
||||
struct page *cached_page;
|
||||
int error;
|
||||
struct file_ra_state ra = *_ra;
|
||||
|
@ -886,27 +884,12 @@ void do_generic_mapping_read(struct address_space *mapping,
|
|||
last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
|
||||
offset = *ppos & ~PAGE_CACHE_MASK;
|
||||
|
||||
isize = i_size_read(inode);
|
||||
if (!isize)
|
||||
goto out;
|
||||
|
||||
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
|
||||
for (;;) {
|
||||
struct page *page;
|
||||
unsigned long end_index;
|
||||
loff_t isize;
|
||||
unsigned long nr, ret;
|
||||
|
||||
/* nr is the maximum number of bytes to copy from this page */
|
||||
nr = PAGE_CACHE_SIZE;
|
||||
if (index >= end_index) {
|
||||
if (index > end_index)
|
||||
goto out;
|
||||
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
|
||||
if (nr <= offset) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
nr = nr - offset;
|
||||
|
||||
cond_resched();
|
||||
if (index == next_index)
|
||||
next_index = page_cache_readahead(mapping, &ra, filp,
|
||||
|
@ -921,6 +904,32 @@ find_page:
|
|||
if (!PageUptodate(page))
|
||||
goto page_not_up_to_date;
|
||||
page_ok:
|
||||
/*
|
||||
* i_size must be checked after we know the page is Uptodate.
|
||||
*
|
||||
* Checking i_size after the check allows us to calculate
|
||||
* the correct value for "nr", which means the zero-filled
|
||||
* part of the page is not copied back to userspace (unless
|
||||
* another truncate extends the file - this is desired though).
|
||||
*/
|
||||
|
||||
isize = i_size_read(inode);
|
||||
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
|
||||
if (unlikely(!isize || index > end_index)) {
|
||||
page_cache_release(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* nr is the maximum number of bytes to copy from this page */
|
||||
nr = PAGE_CACHE_SIZE;
|
||||
if (index == end_index) {
|
||||
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
|
||||
if (nr <= offset) {
|
||||
page_cache_release(page);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
nr = nr - offset;
|
||||
|
||||
/* If users can be writing to this page using arbitrary
|
||||
* virtual addresses, take care about potential aliasing
|
||||
|
@ -1007,31 +1016,6 @@ readpage:
|
|||
unlock_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* i_size must be checked after we have done ->readpage.
|
||||
*
|
||||
* Checking i_size after the readpage allows us to calculate
|
||||
* the correct value for "nr", which means the zero-filled
|
||||
* part of the page is not copied back to userspace (unless
|
||||
* another truncate extends the file - this is desired though).
|
||||
*/
|
||||
isize = i_size_read(inode);
|
||||
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
|
||||
if (unlikely(!isize || index > end_index)) {
|
||||
page_cache_release(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* nr is the maximum number of bytes to copy from this page */
|
||||
nr = PAGE_CACHE_SIZE;
|
||||
if (index == end_index) {
|
||||
nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
|
||||
if (nr <= offset) {
|
||||
page_cache_release(page);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
nr = nr - offset;
|
||||
goto page_ok;
|
||||
|
||||
readpage_error:
|
||||
|
|
Loading…
Reference in New Issue