Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull VFS fixes and cleanups from Al Viro.

Most of this is the series to remove sync_supers() and the
->write_supers VFS callback from Artem Bityutskiy.  One commit to do the
actual removal work, a whole series of commits to fix up stale comments
etc all over the tree.

There's also a regression fix for an incorrect use of mnt_drop_write()
in do_dentry_open().

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  missed mnt_drop_write() in do_dentry_open()
  UBIFS: nuke pdflush from comments
  gfs2: nuke pdflush from comments
  drbd: nuke pdflush from comments
  nilfs2: nuke write_super from comments
  hfs: nuke write_super from comments
  vfs: nuke pdflush from comments
  jbd/jbd2: nuke write_super from comments
  btrfs: nuke pdflush from comments
  btrfs: nuke write_super from comments
  ext4: nuke pdflush from comments
  ext4: nuke write_super from comments
  ext3: nuke write_super from comments
  Documentation: fix the VM knobs descritpion WRT pdflush
  Documentation: get rid of write_super
  vfs: kill write_super and sync_supers
This commit is contained in:
Linus Torvalds 2012-08-04 08:32:46 -07:00
commit e7882d6c40
31 changed files with 38 additions and 194 deletions

View File

@ -224,8 +224,8 @@ all your transactions.
</para>
<para>
Then at umount time , in your put_super() (2.4) or write_super() (2.5)
you can then call journal_destroy() to clean up your in-core journal object.
Then at umount time , in your put_super() you can then call journal_destroy()
to clean up your in-core journal object.
</para>
<para>

View File

@ -114,7 +114,6 @@ prototypes:
int (*drop_inode) (struct inode *);
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
@ -136,7 +135,6 @@ write_inode:
drop_inode: !!!inode->i_lock!!!
evict_inode:
put_super: write
write_super: read
sync_fs: read
freeze_fs: write
unfreeze_fs: write

View File

@ -94,9 +94,8 @@ protected.
---
[mandatory]
BKL is also moved from around sb operations. ->write_super() Is now called
without BKL held. BKL should have been shifted into individual fs sb_op
functions. If you don't need it, remove it.
BKL is also moved from around sb operations. BKL should have been shifted into
individual fs sb_op functions. If you don't need it, remove it.
---
[informational]

View File

@ -216,7 +216,6 @@ struct super_operations {
void (*drop_inode) (struct inode *);
void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
@ -273,9 +272,6 @@ or bottom half).
put_super: called when the VFS wishes to free the superblock
(i.e. unmount). This is called with the superblock lock held
write_super: called when the VFS superblock needs to be written to
disc. This method is optional
sync_fs: called when VFS is writing out all dirty data associated with
a superblock. The second parameter indicates whether the method
should wait until the write out has been completed. Optional.

View File

@ -262,9 +262,9 @@ MINIMUM_BATTERY_MINUTES=10
#
# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
# exceeded, the kernel will wake pdflush which will then reduce the amount
# of dirty memory to dirty_background_ratio. Set this nice and low, so once
# some writeout has commenced, we do a lot of it.
# exceeded, the kernel will wake flusher threads which will then reduce the
# amount of dirty memory to dirty_background_ratio. Set this nice and low,
# so once some writeout has commenced, we do a lot of it.
#
#DIRTY_BACKGROUND_RATIO=5
@ -384,9 +384,9 @@ CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
#
# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
# exceeded, the kernel will wake pdflush which will then reduce the amount
# of dirty memory to dirty_background_ratio. Set this nice and low, so once
# some writeout has commenced, we do a lot of it.
# exceeded, the kernel will wake flusher threads which will then reduce the
# amount of dirty memory to dirty_background_ratio. Set this nice and low,
# so once some writeout has commenced, we do a lot of it.
#
DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}

View File

@ -76,8 +76,8 @@ huge pages although processes will also directly compact memory as required.
dirty_background_bytes
Contains the amount of dirty memory at which the pdflush background writeback
daemon will start writeback.
Contains the amount of dirty memory at which the background kernel
flusher threads will start writeback.
Note: dirty_background_bytes is the counterpart of dirty_background_ratio. Only
one of them may be specified at a time. When one sysctl is written it is
@ -89,7 +89,7 @@ other appears as 0 when read.
dirty_background_ratio
Contains, as a percentage of total system memory, the number of pages at which
the pdflush background writeback daemon will start writing out dirty data.
the background kernel flusher threads will start writing out dirty data.
==============================================================
@ -112,9 +112,9 @@ retained.
dirty_expire_centisecs
This tunable is used to define when dirty data is old enough to be eligible
for writeout by the pdflush daemons. It is expressed in 100'ths of a second.
Data which has been dirty in-memory for longer than this interval will be
written out next time a pdflush daemon wakes up.
for writeout by the kernel flusher threads. It is expressed in 100'ths
of a second. Data which has been dirty in-memory for longer than this
interval will be written out next time a flusher thread wakes up.
==============================================================
@ -128,7 +128,7 @@ data.
dirty_writeback_centisecs
The pdflush writeback daemons will periodically wake up and write `old' data
The kernel flusher threads will periodically wake up and write `old' data
out to disk. This tunable expresses the interval between those wakeups, in
100'ths of a second.

View File

@ -3537,9 +3537,9 @@ static void drbd_cleanup(void)
}
/**
* drbd_congested() - Callback for pdflush
* drbd_congested() - Callback for the flusher thread
* @congested_data: User data
* @bdi_bits: Bits pdflush is currently interested in
* @bdi_bits: Bits the BDI flusher thread is currently interested in
*
* Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
*/

View File

@ -1312,7 +1312,7 @@ EXPORT_SYMBOL(bio_copy_kern);
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
* But other code (eg, pdflush) could clean the pages if they are mapped
* But other code (eg, flusher threads) could clean the pages if they are mapped
* pagecache.
*
* Simply disabling the call to bio_set_pages_dirty() is a good way to test the

View File

@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow,
* If this code finds it can't get good compression, it puts an
* entry onto the work queue to write the uncompressed bytes. This
* makes sure that both compressed inodes and uncompressed inodes
* are written in the same order that pdflush sent them down.
* are written in the same order that the flusher thread sent them
* down.
*/
static noinline int compress_file_range(struct inode *inode,
struct page *locked_page,

View File

@ -596,7 +596,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* pages in the range can be dirty, clean or writeback. We
* start IO on any dirty ones so the wait doesn't stall waiting
* for pdflush to find them
* for the flusher thread to find them
*/
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->i_mapping, start, end);

View File

@ -100,10 +100,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
}
/* NOTE:
* We move write_super stuff at umount in order to avoid deadlock
* for umount hold all lock.
*/
static void save_error_info(struct btrfs_fs_info *fs_info)
{
__save_error_info(fs_info);

View File

@ -1744,10 +1744,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->fs_devices = root->fs_info->fs_devices;
/*
* we don't want write_supers to jump in here with our device
* half setup
*/
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
list_add(&device->dev_alloc_list,

View File

@ -3459,14 +3459,6 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
* inode out, but prune_icache isn't a user-visible syncing function.
* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
* we start and wait on commits.
*
* Is this efficient/effective? Well, we're being nice to the system
* by cleaning up our inodes proactively so they can be reaped
* without I/O. But we are potentially leaving up to five seconds'
* worth of inodes floating about which prune_icache wants us to
* write out. One way to fix that would be to get prune_icache()
* to do a write_super() to free up some memory. It has the desired
* effect.
*/
int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
{

View File

@ -64,11 +64,6 @@ static int ext3_freeze(struct super_block *sb);
/*
* Wrappers for journal_start/end.
*
* The only special thing we need to do here is to make sure that all
* journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
@ -90,12 +85,6 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
return journal_start(journal, nblocks);
}
/*
* The only special thing we need to do here is to make sure that all
* journal_stop calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
int __ext3_journal_stop(const char *where, handle_t *handle)
{
struct super_block *sb;

View File

@ -1970,7 +1970,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
* This function can get called via...
* - ext4_da_writepages after taking page lock (have journal handle)
* - journal_submit_inode_data_buffers (no journal handle)
* - shrink_page_list via pdflush (no journal handle)
* - shrink_page_list via the kswapd/direct reclaim (no journal handle)
* - grab_page_cache when doing write_begin (have journal handle)
*
* We don't do any block allocation in this function. If we have page with
@ -4589,14 +4589,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
* inode out, but prune_icache isn't a user-visible syncing function.
* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
* we start and wait on commits.
*
* Is this efficient/effective? Well, we're being nice to the system
* by cleaning up our inodes proactively so they can be reaped
* without I/O. But we are potentially leaving up to five seconds'
* worth of inodes floating about which prune_icache wants us to
* write out. One way to fix that would be to get prune_icache()
* to do a write_super() to free up some memory. It has the desired
* effect.
*/
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{

View File

@ -326,11 +326,6 @@ static void ext4_put_nojournal(handle_t *handle)
/*
* Wrappers for jbd2_journal_start/end.
*
* The only special thing we need to do here is to make sure that all
* journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{
@ -356,12 +351,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
return jbd2_journal_start(journal, nblocks);
}
/*
* The only special thing we need to do here is to make sure that all
* jbd2_journal_stop calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;

View File

@ -52,7 +52,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* potentially cause a busy-wait loop from pdflush and kswapd
* potentially cause a busy-wait loop from flusher thread and kswapd
* activity, but those code paths have their own higher-level
* throttling.
*/

View File

@ -236,10 +236,10 @@ out:
* hfs_mdb_commit()
*
* Description:
* This updates the MDB on disk (look also at hfs_write_super()).
* This updates the MDB on disk.
* It does not check, if the superblock has been modified, or
* if the filesystem has been mounted read-only. It is mainly
* called by hfs_write_super() and hfs_btree_extend().
* called by hfs_sync_fs() and flush_mdb().
* Input Variable(s):
* struct hfs_mdb *mdb: Pointer to the hfs MDB
* int backup;

View File

@ -534,8 +534,8 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
ret = 1;
} else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
* If commit has been started, then we have to wait for
* completion of that transaction.
*/
if (ptid)
*ptid = journal->j_committing_transaction->t_tid;

View File

@ -612,8 +612,8 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
ret = 1;
} else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
* If commit has been started, then we have to wait for
* completion of that transaction.
*/
if (ptid)
*ptid = journal->j_committing_transaction->t_tid;

View File

@ -676,17 +676,13 @@ static const struct super_operations nilfs_sops = {
.alloc_inode = nilfs_alloc_inode,
.destroy_inode = nilfs_destroy_inode,
.dirty_inode = nilfs_dirty_inode,
/* .write_inode = nilfs_write_inode, */
/* .drop_inode = nilfs_drop_inode, */
.evict_inode = nilfs_evict_inode,
.put_super = nilfs_put_super,
/* .write_super = nilfs_write_super, */
.sync_fs = nilfs_sync_fs,
.freeze_fs = nilfs_freeze,
.unfreeze_fs = nilfs_unfreeze,
.statfs = nilfs_statfs,
.remount_fs = nilfs_remount,
/* .umount_begin */
.show_options = nilfs_show_options
};

View File

@ -107,8 +107,6 @@ struct the_nilfs {
* used for
* - loading the latest checkpoint exclusively.
* - allocating a new full segment.
* - protecting s_dirt in the super_block struct
* (see nilfs_write_super) and the following fields.
*/
struct buffer_head *ns_sbh[2];
struct nilfs_super_block *ns_sbp[2];

View File

@ -717,7 +717,7 @@ cleanup_all:
* here, so just reset the state.
*/
file_reset_write(f);
mnt_drop_write(f->f_path.mnt);
__mnt_drop_write(f->f_path.mnt);
}
}
cleanup_file:

View File

@ -536,46 +536,6 @@ void drop_super(struct super_block *sb)
EXPORT_SYMBOL(drop_super);
/**
* sync_supers - helper for periodic superblock writeback
*
* Call the write_super method if present on all dirty superblocks in
* the system. This is for the periodic writeback used by most older
* filesystems. For data integrity superblock writeback use
* sync_filesystems() instead.
*
* Note: check the dirty flag before waiting, so we don't
* hold up the sync while mounting a device. (The newly
* mounted device won't need syncing.)
*/
void sync_supers(void)
{
struct super_block *sb, *p = NULL;
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
if (hlist_unhashed(&sb->s_instances))
continue;
if (sb->s_op->write_super && sb->s_dirt) {
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
sb->s_op->write_super(sb);
up_read(&sb->s_umount);
spin_lock(&sb_lock);
if (p)
__put_super(p);
p = sb;
}
}
if (p)
__put_super(p);
spin_unlock(&sb_lock);
}
/**
* iterate_supers - call function for all active superblocks
* @f: function to call

View File

@ -37,11 +37,11 @@
*
* A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
* implement. However, this is not true for 'ubifs_writepage()', which may be
* called with @i_mutex unlocked. For example, when pdflush is doing background
* write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
* work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
* "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
* we are only guaranteed that the page is locked.
* called with @i_mutex unlocked. For example, when flusher thread is doing
* background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
* At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
* in the "sys_write -> alloc_pages -> direct reclaim path". So, in
* 'ubifs_writepage()' we are only guaranteed that the page is locked.
*
* Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
* read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->

View File

@ -303,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
mutex_lock(&ui->ui_mutex);
/*
* Due to races between write-back forced by budgeting
* (see 'sync_some_inodes()') and pdflush write-back, the inode may
* (see 'sync_some_inodes()') and background write-back, the inode may
* have already been synchronized, do not do this again. This might
* also happen if it was synchronized in an VFS operation, e.g.
* 'ubifs_link()'.

View File

@ -124,7 +124,6 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
void bdi_start_background_writeback(struct backing_dev_info *bdi);
int bdi_writeback_thread(void *data);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_arm_supers_timer(void);
void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);

View File

@ -1491,7 +1491,6 @@ struct sb_writers {
struct super_block {
struct list_head s_list; /* Keep this first */
dev_t s_dev; /* search index; _not_ kdev_t */
unsigned char s_dirt;
unsigned char s_blocksize_bits;
unsigned long s_blocksize;
loff_t s_maxbytes; /* Max file size */
@ -1861,7 +1860,6 @@ struct super_operations {
int (*drop_inode) (struct inode *);
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
@ -2397,7 +2395,6 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync);
extern int vfs_fsync(struct file *file, int datasync);
extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
extern void sync_supers(void);
extern void emergency_sync(void);
extern void emergency_remount(void);
#ifdef CONFIG_BLOCK

View File

@ -104,7 +104,6 @@ static inline void wait_on_inode(struct inode *inode)
wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);
}
/*
* mm/page-writeback.c
*/

View File

@ -39,12 +39,6 @@ DEFINE_SPINLOCK(bdi_lock);
LIST_HEAD(bdi_list);
LIST_HEAD(bdi_pending_list);
static struct task_struct *sync_supers_tsk;
static struct timer_list sync_supers_timer;
static int bdi_sync_supers(void *);
static void sync_supers_timer_fn(unsigned long);
void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
{
if (wb1 < wb2) {
@ -250,12 +244,6 @@ static int __init default_bdi_init(void)
{
int err;
sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
BUG_ON(IS_ERR(sync_supers_tsk));
setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
bdi_arm_supers_timer();
err = bdi_init(&default_backing_dev_info);
if (!err)
bdi_register(&default_backing_dev_info, NULL, "default");
@ -270,46 +258,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
return wb_has_dirty_io(&bdi->wb);
}
/*
* kupdated() used to do this. We cannot do it from the bdi_forker_thread()
* or we risk deadlocking on ->s_umount. The longer term solution would be
* to implement sync_supers_bdi() or similar and simply do it from the
* bdi writeback thread individually.
*/
static int bdi_sync_supers(void *unused)
{
set_user_nice(current, 0);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
/*
* Do this periodically, like kupdated() did before.
*/
sync_supers();
}
return 0;
}
void bdi_arm_supers_timer(void)
{
unsigned long next;
if (!dirty_writeback_interval)
return;
next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
mod_timer(&sync_supers_timer, round_jiffies_up(next));
}
static void sync_supers_timer_fn(unsigned long unused)
{
wake_up_process(sync_supers_tsk);
bdi_arm_supers_timer();
}
static void wakeup_timer_fn(unsigned long data)
{
struct backing_dev_info *bdi = (struct backing_dev_info *)data;

View File

@ -1532,7 +1532,6 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
bdi_arm_supers_timer();
return 0;
}