writeback: fix periodic superblock dirty inode flushing
Current -mm tree has bucketful of bug fixes in periodic writeback path. However, we still hit a glitch where dirty pages on a given inode aren't completely flushed to the disk, and system will accumulate large amount of dirty pages beyond what dirty_expire_interval is designed for. The problem is __sync_single_inode() will move an inode to sb->s_dirty list even when there are more pending dirty pages on that inode. If there is another inode with a small number of dirty pages, we hit a case where the loop iteration in wb_kupdate() terminates prematurely because wbc.nr_to_write > 0. Thus leaving the inode that has large amount of dirty pages behind and it has to wait for another dirty_writeback_interval before we flush it again. We effectively only write out MAX_WRITEBACK_PAGES every dirty_writeback_interval. If the rate of dirtying is sufficiently high, the system will start accumulate a large number of dirty pages. So fix it by having another sb->s_more_io list on which to park the inode while we iterate through sb->s_io and to allow each dirty inode which resides on that sb to have an equal chance of flushing some amount of dirty pages. Signed-off-by: Ken Chen <kenchen@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
670e4def6e
commit
0e0f4fc22e
|
@ -165,25 +165,11 @@ static void redirty_tail(struct inode *inode)
|
|||
}
|
||||
|
||||
/*
|
||||
* Redirty an inode, but mark it as the very next-to-be-written inode on its
|
||||
* superblock's dirty-inode list.
|
||||
* We need to preserve s_dirty's reverse-time-orderedness, so we cheat by
|
||||
* setting this inode's dirtied_when to the same value as that of the inode
|
||||
* which is presently head-of-list, if present head-of-list is newer than this
|
||||
* inode. (head-of-list is the least-recently-dirtied inode: the oldest one).
|
||||
* requeue inode for re-scanning after sb->s_io list is exhausted.
|
||||
*/
|
||||
static void redirty_head(struct inode *inode)
|
||||
static void requeue_io(struct inode *inode)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (!list_empty(&sb->s_dirty)) {
|
||||
struct inode *head_inode;
|
||||
|
||||
head_inode = list_entry(sb->s_dirty.prev, struct inode, i_list);
|
||||
if (time_after(inode->dirtied_when, head_inode->dirtied_when))
|
||||
inode->dirtied_when = head_inode->dirtied_when;
|
||||
}
|
||||
list_move_tail(&inode->i_list, &sb->s_dirty);
|
||||
list_move(&inode->i_list, &inode->i_sb->s_more_io);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -255,7 +241,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|||
* uncongested.
|
||||
*/
|
||||
inode->i_state |= I_DIRTY_PAGES;
|
||||
redirty_head(inode);
|
||||
requeue_io(inode);
|
||||
} else {
|
||||
/*
|
||||
* Otherwise fully redirty the inode so that
|
||||
|
@ -315,7 +301,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
|||
* on s_io. We'll have another go at writing back this inode
|
||||
* when the s_dirty iodes get moved back onto s_io.
|
||||
*/
|
||||
redirty_head(inode);
|
||||
requeue_io(inode);
|
||||
|
||||
/*
|
||||
* Even if we don't actually write the inode itself here,
|
||||
|
@ -410,14 +396,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
|
|||
wbc->encountered_congestion = 1;
|
||||
if (!sb_is_blkdev_sb(sb))
|
||||
break; /* Skip a congested fs */
|
||||
redirty_head(inode);
|
||||
requeue_io(inode);
|
||||
continue; /* Skip a congested blockdev */
|
||||
}
|
||||
|
||||
if (wbc->bdi && bdi != wbc->bdi) {
|
||||
if (!sb_is_blkdev_sb(sb))
|
||||
break; /* fs has the wrong queue */
|
||||
redirty_head(inode);
|
||||
requeue_io(inode);
|
||||
continue; /* blockdev has wrong queue */
|
||||
}
|
||||
|
||||
|
@ -427,8 +413,10 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
|
|||
|
||||
/* Was this inode dirtied too recently? */
|
||||
if (wbc->older_than_this && time_after(inode->dirtied_when,
|
||||
*wbc->older_than_this))
|
||||
*wbc->older_than_this)) {
|
||||
list_splice_init(&sb->s_io, sb->s_dirty.prev);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Is another pdflush already flushing this queue? */
|
||||
if (current_is_pdflush() && !writeback_acquire(bdi))
|
||||
|
@ -458,6 +446,10 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
|
|||
if (wbc->nr_to_write <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (list_empty(&sb->s_io))
|
||||
list_splice_init(&sb->s_more_io, &sb->s_io);
|
||||
|
||||
return; /* Leave any unwritten inodes on s_io */
|
||||
}
|
||||
|
||||
|
|
|
@ -67,6 +67,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
|
|||
}
|
||||
INIT_LIST_HEAD(&s->s_dirty);
|
||||
INIT_LIST_HEAD(&s->s_io);
|
||||
INIT_LIST_HEAD(&s->s_more_io);
|
||||
INIT_LIST_HEAD(&s->s_files);
|
||||
INIT_LIST_HEAD(&s->s_instances);
|
||||
INIT_HLIST_HEAD(&s->s_anon);
|
||||
|
|
|
@ -1002,6 +1002,7 @@ struct super_block {
|
|||
struct list_head s_inodes; /* all inodes */
|
||||
struct list_head s_dirty; /* dirty inodes */
|
||||
struct list_head s_io; /* parked for writeback */
|
||||
struct list_head s_more_io; /* parked for more writeback */
|
||||
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
|
||||
struct list_head s_files;
|
||||
|
||||
|
|
Loading…
Reference in New Issue