ext4: more efficient SEEK_DATA implementation
Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as ext4_seek_data() iterates hole block-by-block. Fix the problem by using returned hole size from ext4_map_blocks() and thus skip the hole in one go. Update also SEEK_HOLE implementation to follow the same pattern as SEEK_DATA to make future maintenance easier. Furthermore we add cond_resched() to both ext4_seek_data() and ext4_seek_hole() to avoid softlockups in case evil user creates huge fragmented file and we have to go through lots of extents. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
e3fb8eb14e
commit
2d90c160e5
|
@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||||
int used, int quota_claim);
|
int used, int quota_claim);
|
||||||
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_fsblk_t pblk, ext4_lblk_t len);
|
ext4_fsblk_t pblk, ext4_lblk_t len);
|
||||||
|
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
unsigned int map_len,
|
||||||
|
struct extent_status *result);
|
||||||
|
|
||||||
/* indirect.c */
|
/* indirect.c */
|
||||||
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
|
|
@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
||||||
*/
|
*/
|
||||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||||
int whence,
|
int whence,
|
||||||
struct ext4_map_blocks *map,
|
ext4_lblk_t end_blk,
|
||||||
loff_t *offset)
|
loff_t *offset)
|
||||||
{
|
{
|
||||||
struct pagevec pvec;
|
struct pagevec pvec;
|
||||||
|
@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||||
blkbits = inode->i_sb->s_blocksize_bits;
|
blkbits = inode->i_sb->s_blocksize_bits;
|
||||||
startoff = *offset;
|
startoff = *offset;
|
||||||
lastoff = startoff;
|
lastoff = startoff;
|
||||||
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
|
endoff = (loff_t)end_blk << blkbits;
|
||||||
|
|
||||||
index = startoff >> PAGE_CACHE_SHIFT;
|
index = startoff >> PAGE_CACHE_SHIFT;
|
||||||
end = endoff >> PAGE_CACHE_SHIFT;
|
end = endoff >> PAGE_CACHE_SHIFT;
|
||||||
|
@ -559,12 +559,11 @@ out:
|
||||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
ext4_lblk_t start, last, end;
|
ext4_lblk_t start, last, end;
|
||||||
loff_t dataoff, isize;
|
loff_t dataoff, isize;
|
||||||
int blkbits;
|
int blkbits;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
|
|
||||||
|
@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
dataoff = offset;
|
dataoff = offset;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
map.m_lblk = last;
|
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||||
map.m_len = end - last + 1;
|
if (ret <= 0) {
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
/* No extent found -> no data */
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
if (ret == 0)
|
||||||
if (last != start)
|
ret = -ENXIO;
|
||||||
dataoff = (loff_t)last << blkbits;
|
inode_unlock(inode);
|
||||||
break;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
last = es.es_lblk;
|
||||||
* If there is a delay extent at this offset,
|
if (last != start)
|
||||||
* it will be as a data.
|
dataoff = (loff_t)last << blkbits;
|
||||||
*/
|
if (!ext4_es_is_unwritten(&es))
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
|
||||||
if (last != start)
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a unwritten extent at this offset,
|
* If there is a unwritten extent at this offset,
|
||||||
* it will be as a data or a hole according to page
|
* it will be as a data or a hole according to page
|
||||||
* cache that has data or not.
|
* cache that has data or not.
|
||||||
*/
|
*/
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||||
int unwritten;
|
es.es_lblk + es.es_len, &dataoff))
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
break;
|
||||||
&map, &dataoff);
|
last += es.es_len;
|
||||||
if (unwritten)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
last++;
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
dataoff = (loff_t)last << blkbits;
|
||||||
|
cond_resched();
|
||||||
} while (last <= end);
|
} while (last <= end);
|
||||||
|
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
|
@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
ext4_lblk_t start, last, end;
|
ext4_lblk_t start, last, end;
|
||||||
loff_t holeoff, isize;
|
loff_t holeoff, isize;
|
||||||
int blkbits;
|
int blkbits;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
|
|
||||||
|
@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
holeoff = offset;
|
holeoff = offset;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
map.m_lblk = last;
|
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||||
map.m_len = end - last + 1;
|
if (ret < 0) {
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
inode_unlock(inode);
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
return ret;
|
||||||
last += ret;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
/* Found a hole? */
|
||||||
/*
|
if (ret == 0 || es.es_lblk > last) {
|
||||||
* If there is a delay extent at this offset,
|
if (last != start)
|
||||||
* we will skip this extent.
|
holeoff = (loff_t)last << blkbits;
|
||||||
*/
|
break;
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
|
||||||
last = es.es_lblk + es.es_len;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a unwritten extent at this offset,
|
* If there is a unwritten extent at this offset,
|
||||||
* it will be as a data or a hole according to page
|
* it will be as a data or a hole according to page
|
||||||
* cache that has data or not.
|
* cache that has data or not.
|
||||||
*/
|
*/
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
if (ext4_es_is_unwritten(&es) &&
|
||||||
int unwritten;
|
ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
last + es.es_len, &holeoff))
|
||||||
&map, &holeoff);
|
break;
|
||||||
if (!unwritten) {
|
|
||||||
last += ret;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find a hole */
|
last += es.es_len;
|
||||||
break;
|
holeoff = (loff_t)last << blkbits;
|
||||||
|
cond_resched();
|
||||||
} while (last <= end);
|
} while (last <= end);
|
||||||
|
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
|
|
|
@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the first extent at or after @lblk in an inode that is not a hole.
|
||||||
|
* Search for @map_len blocks at most. The extent is returned in @result.
|
||||||
|
*
|
||||||
|
* The function returns 1 if we found an extent. The function returns 0 in
|
||||||
|
* case there is no extent at or after @lblk and in that case also sets
|
||||||
|
* @result->es_len to 0. In case of error, the error code is returned.
|
||||||
|
*/
|
||||||
|
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
unsigned int map_len, struct extent_status *result)
|
||||||
|
{
|
||||||
|
struct ext4_map_blocks map;
|
||||||
|
struct extent_status es = {};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
map.m_lblk = lblk;
|
||||||
|
map.m_len = map_len;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For non-extent based files this loop may iterate several times since
|
||||||
|
* we do not determine full hole size.
|
||||||
|
*/
|
||||||
|
while (map.m_len > 0) {
|
||||||
|
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
/* There's extent covering m_lblk? Just return it. */
|
||||||
|
if (ret > 0) {
|
||||||
|
int status;
|
||||||
|
|
||||||
|
ext4_es_store_pblock(result, map.m_pblk);
|
||||||
|
result->es_lblk = map.m_lblk;
|
||||||
|
result->es_len = map.m_len;
|
||||||
|
if (map.m_flags & EXT4_MAP_UNWRITTEN)
|
||||||
|
status = EXTENT_STATUS_UNWRITTEN;
|
||||||
|
else
|
||||||
|
status = EXTENT_STATUS_WRITTEN;
|
||||||
|
ext4_es_store_status(result, status);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ext4_es_find_delayed_extent_range(inode, map.m_lblk,
|
||||||
|
map.m_lblk + map.m_len - 1,
|
||||||
|
&es);
|
||||||
|
/* Is delalloc data before next block in extent tree? */
|
||||||
|
if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
|
||||||
|
ext4_lblk_t offset = 0;
|
||||||
|
|
||||||
|
if (es.es_lblk < lblk)
|
||||||
|
offset = lblk - es.es_lblk;
|
||||||
|
result->es_lblk = es.es_lblk + offset;
|
||||||
|
ext4_es_store_pblock(result,
|
||||||
|
ext4_es_pblock(&es) + offset);
|
||||||
|
result->es_len = es.es_len - offset;
|
||||||
|
ext4_es_store_status(result, ext4_es_status(&es));
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
/* There's a hole at m_lblk, advance us after it */
|
||||||
|
map.m_lblk += map.m_len;
|
||||||
|
map_len -= map.m_len;
|
||||||
|
map.m_len = map_len;
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
result->es_len = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue