proc: Fix proc_sys_prune_dcache to hold a sb reference
Andrei Vagin writes: FYI: This bug has been reproduced on 4.11.7 > BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc] > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80 > CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1 > Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015 > Workqueue: events proc_cleanup_work > Call Trace: > dump_stack+0x63/0x86 > __warn+0xcb/0xf0 > warn_slowpath_null+0x1d/0x20 > umount_check+0x6e/0x80 > d_walk+0xc6/0x270 > ? dentry_free+0x80/0x80 > do_one_tree+0x26/0x40 > shrink_dcache_for_umount+0x2d/0x90 > generic_shutdown_super+0x1f/0xf0 > kill_anon_super+0x12/0x20 > proc_kill_sb+0x40/0x50 > deactivate_locked_super+0x43/0x70 > deactivate_super+0x5a/0x60 > cleanup_mnt+0x3f/0x90 > mntput_no_expire+0x13b/0x190 > kern_unmount+0x3e/0x50 > pid_ns_release_proc+0x15/0x20 > proc_cleanup_work+0x15/0x20 > process_one_work+0x197/0x450 > worker_thread+0x4e/0x4a0 > kthread+0x109/0x140 > ? process_one_work+0x450/0x450 > ? kthread_park+0x90/0x90 > ret_from_fork+0x2c/0x40 > ---[ end trace e1c109611e5d0b41 ]--- > VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day... > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: _raw_spin_lock+0xc/0x30 > PGD 0 Fix this by taking a reference to the super block in proc_sys_prune_dcache. The superblock reference is the core of the fix however the sysctl_inodes list is converted to a hlist so that hlist_del_init_rcu may be used. This allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while not causing problems for proc_sys_evict_inode when if it later choses to remove the inode from the sysctl_inodes list. Removing inodes from the sysctl_inodes list allows proc_sys_prune_dcache to have a progress guarantee, while still being able to drop all locks. The fact that head->unregistering is set in start_unregistering ensures that no more inodes will be added to the the sysctl_inodes list. Previously the code did a dance where it delayed calling iput until the next entry in the list was being considered to ensure the inode remained on the sysctl_inodes list until the next entry was walked to. The structure of the loop in this patch does not need that so is much easier to understand and maintain. Cc: stable@vger.kernel.org Reported-by: Andrei Vagin <avagin@gmail.com> Tested-by: Andrei Vagin <avagin@openvz.org> Fixes:ace0c791e6
("proc/sysctl: Don't grab i_lock under sysctl_lock.") Fixes:d6cffbbe9a
("proc/sysctl: prune stale dentries during unregistering") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
This commit is contained in:
parent
296990deb3
commit
2fd1d2c4ce
|
@ -67,7 +67,7 @@ struct proc_inode {
|
|||
struct proc_dir_entry *pde;
|
||||
struct ctl_table_header *sysctl;
|
||||
struct ctl_table *sysctl_entry;
|
||||
struct list_head sysctl_inodes;
|
||||
struct hlist_node sysctl_inodes;
|
||||
const struct proc_ns_operations *ns_ops;
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
|
|
@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
|
|||
head->set = set;
|
||||
head->parent = NULL;
|
||||
head->node = node;
|
||||
INIT_LIST_HEAD(&head->inodes);
|
||||
INIT_HLIST_HEAD(&head->inodes);
|
||||
if (node) {
|
||||
struct ctl_table *entry;
|
||||
for (entry = table; entry->procname; entry++, node++)
|
||||
|
@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
|
|||
complete(p->unregistering);
|
||||
}
|
||||
|
||||
/* called under sysctl_lock */
|
||||
static void proc_sys_prune_dcache(struct ctl_table_header *head)
|
||||
{
|
||||
struct inode *inode, *prev = NULL;
|
||||
struct inode *inode;
|
||||
struct proc_inode *ei;
|
||||
struct hlist_node *node;
|
||||
struct super_block *sb;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
|
||||
inode = igrab(&ei->vfs_inode);
|
||||
if (inode) {
|
||||
rcu_read_unlock();
|
||||
iput(prev);
|
||||
prev = inode;
|
||||
d_prune_aliases(inode);
|
||||
for (;;) {
|
||||
node = hlist_first_rcu(&head->inodes);
|
||||
if (!node)
|
||||
break;
|
||||
ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
|
||||
spin_lock(&sysctl_lock);
|
||||
hlist_del_init_rcu(&ei->sysctl_inodes);
|
||||
spin_unlock(&sysctl_lock);
|
||||
|
||||
inode = &ei->vfs_inode;
|
||||
sb = inode->i_sb;
|
||||
if (!atomic_inc_not_zero(&sb->s_active))
|
||||
continue;
|
||||
inode = igrab(inode);
|
||||
rcu_read_unlock();
|
||||
if (unlikely(!inode)) {
|
||||
deactivate_super(sb);
|
||||
rcu_read_lock();
|
||||
continue;
|
||||
}
|
||||
|
||||
d_prune_aliases(inode);
|
||||
iput(inode);
|
||||
deactivate_super(sb);
|
||||
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
iput(prev);
|
||||
}
|
||||
|
||||
/* called under sysctl_lock, will reacquire if has to wait */
|
||||
|
@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
|
|||
}
|
||||
ei->sysctl = head;
|
||||
ei->sysctl_entry = table;
|
||||
list_add_rcu(&ei->sysctl_inodes, &head->inodes);
|
||||
hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
|
||||
head->count++;
|
||||
spin_unlock(&sysctl_lock);
|
||||
|
||||
|
@ -489,7 +506,7 @@ out:
|
|||
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
|
||||
{
|
||||
spin_lock(&sysctl_lock);
|
||||
list_del_rcu(&PROC_I(inode)->sysctl_inodes);
|
||||
hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
|
||||
if (!--head->count)
|
||||
kfree_rcu(head, rcu);
|
||||
spin_unlock(&sysctl_lock);
|
||||
|
|
|
@ -143,7 +143,7 @@ struct ctl_table_header
|
|||
struct ctl_table_set *set;
|
||||
struct ctl_dir *parent;
|
||||
struct ctl_node *node;
|
||||
struct list_head inodes; /* head for proc_inode->sysctl_inodes */
|
||||
struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
|
||||
};
|
||||
|
||||
struct ctl_dir {
|
||||
|
|
Loading…
Reference in New Issue