linux-sg2042/fs/autofs4/inode.c

354 lines
8.1 KiB
C
Raw Normal View History

/* -*- c -*- --------------------------------------------------------------- *
*
* linux/fs/autofs/inode.c
*
* Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
* Copyright 2005-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
*
* ------------------------------------------------------------------------- */
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
#include <linux/bitops.h>
#include <linux/magic.h>
#include "autofs_i.h"
#include <linux/module.h>
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
{
struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
if (ino) {
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
INIT_LIST_HEAD(&ino->active);
INIT_LIST_HEAD(&ino->expiring);
ino->last_used = jiffies;
ino->sbi = sbi;
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
}
return ino;
}
void autofs4_clean_ino(struct autofs_info *ino)
{
ino->uid = 0;
ino->gid = 0;
ino->last_used = jiffies;
}
void autofs4_free_ino(struct autofs_info *ino)
{
kfree(ino);
}
void autofs4_kill_sb(struct super_block *sb)
{
struct autofs_sb_info *sbi = autofs4_sbi(sb);
/*
* In the event of a failure in get_sb_nodev the superblock
* info is not present so nothing else has been setup, so
[PATCH] autofs: fix error code path in autofs_fill_sb() When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and new (observed at least with 5.x.x) automount deamon is started, kernel correctly reports incompatible version of kernel and userland daemon, but then screws things up instead of correct handling of the error: autofs: kernel does not match daemon version ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- automount/4199 is trying to release lock (&type->s_umount_key) at: [<c0163b9e>] get_sb_nodev+0x76/0xa4 but there are no more locks to release! other info that might help us debug this: no locks held by automount/4199. stack backtrace: [<c0103b15>] dump_trace+0x68/0x1b2 [<c0103c77>] show_trace_log_lvl+0x18/0x2c [<c01041db>] show_trace+0xf/0x11 [<c010424d>] dump_stack+0x12/0x14 [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3 [<c012fd4f>] lock_release+0x8d/0x164 [<c012b452>] up_write+0x14/0x27 [<c0163b9e>] get_sb_nodev+0x76/0xa4 [<c0163689>] vfs_kern_mount+0x83/0xf6 [<c016373e>] do_kern_mount+0x2d/0x3e [<c017513f>] do_mount+0x607/0x67a [<c0175224>] sys_mount+0x72/0xa4 [<c0102b96>] sysenter_past_esp+0x5f/0x99 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99 Leftover inexact backtrace: ======================= and then deadlock comes. The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but before that, it calls kill_anon_super() to destroy the superblock which won't be needed. This is however way too soon to call kill_anon_super(), because get_sb_nodev() has to perform its own cleanup of the superblock first (deactivate_super(), etc.). The correct time to call kill_anon_super() is in the autofs_kill_sb() callback, which is called by deactivate_super() at proper time, when the superblock is ready to be killed. I can see the same faulty codepath also in autofs4. This patch solves issues in both filesystems in a same way - it postpones the kill_anon_super() until the proper time is signalized by deactivate_super() calling the kill_sb() callback. [raven@themaw.net: update comment] Signed-off-by: Jiri Kosina <jkosina@suse.cz> Acked-by: Ian Kent <raven@themaw.net> Cc: <stable@kernel.org> Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:39:38 +08:00
* just call kill_anon_super when we are called from
* deactivate_super.
*/
if (!sbi)
[PATCH] autofs: fix error code path in autofs_fill_sb() When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and new (observed at least with 5.x.x) automount deamon is started, kernel correctly reports incompatible version of kernel and userland daemon, but then screws things up instead of correct handling of the error: autofs: kernel does not match daemon version ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- automount/4199 is trying to release lock (&type->s_umount_key) at: [<c0163b9e>] get_sb_nodev+0x76/0xa4 but there are no more locks to release! other info that might help us debug this: no locks held by automount/4199. stack backtrace: [<c0103b15>] dump_trace+0x68/0x1b2 [<c0103c77>] show_trace_log_lvl+0x18/0x2c [<c01041db>] show_trace+0xf/0x11 [<c010424d>] dump_stack+0x12/0x14 [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3 [<c012fd4f>] lock_release+0x8d/0x164 [<c012b452>] up_write+0x14/0x27 [<c0163b9e>] get_sb_nodev+0x76/0xa4 [<c0163689>] vfs_kern_mount+0x83/0xf6 [<c016373e>] do_kern_mount+0x2d/0x3e [<c017513f>] do_mount+0x607/0x67a [<c0175224>] sys_mount+0x72/0xa4 [<c0102b96>] sysenter_past_esp+0x5f/0x99 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99 Leftover inexact backtrace: ======================= and then deadlock comes. The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but before that, it calls kill_anon_super() to destroy the superblock which won't be needed. This is however way too soon to call kill_anon_super(), because get_sb_nodev() has to perform its own cleanup of the superblock first (deactivate_super(), etc.). The correct time to call kill_anon_super() is in the autofs_kill_sb() callback, which is called by deactivate_super() at proper time, when the superblock is ready to be killed. I can see the same faulty codepath also in autofs4. This patch solves issues in both filesystems in a same way - it postpones the kill_anon_super() until the proper time is signalized by deactivate_super() calling the kill_sb() callback. [raven@themaw.net: update comment] Signed-off-by: Jiri Kosina <jkosina@suse.cz> Acked-by: Ian Kent <raven@themaw.net> Cc: <stable@kernel.org> Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:39:38 +08:00
goto out_kill_sb;
/* Free wait queues, close pipe */
autofs4_catatonic_mode(sbi);
sb->s_fs_info = NULL;
kfree(sbi);
[PATCH] autofs: fix error code path in autofs_fill_sb() When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and new (observed at least with 5.x.x) automount deamon is started, kernel correctly reports incompatible version of kernel and userland daemon, but then screws things up instead of correct handling of the error: autofs: kernel does not match daemon version ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- automount/4199 is trying to release lock (&type->s_umount_key) at: [<c0163b9e>] get_sb_nodev+0x76/0xa4 but there are no more locks to release! other info that might help us debug this: no locks held by automount/4199. stack backtrace: [<c0103b15>] dump_trace+0x68/0x1b2 [<c0103c77>] show_trace_log_lvl+0x18/0x2c [<c01041db>] show_trace+0xf/0x11 [<c010424d>] dump_stack+0x12/0x14 [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3 [<c012fd4f>] lock_release+0x8d/0x164 [<c012b452>] up_write+0x14/0x27 [<c0163b9e>] get_sb_nodev+0x76/0xa4 [<c0163689>] vfs_kern_mount+0x83/0xf6 [<c016373e>] do_kern_mount+0x2d/0x3e [<c017513f>] do_mount+0x607/0x67a [<c0175224>] sys_mount+0x72/0xa4 [<c0102b96>] sysenter_past_esp+0x5f/0x99 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99 Leftover inexact backtrace: ======================= and then deadlock comes. The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but before that, it calls kill_anon_super() to destroy the superblock which won't be needed. This is however way too soon to call kill_anon_super(), because get_sb_nodev() has to perform its own cleanup of the superblock first (deactivate_super(), etc.). The correct time to call kill_anon_super() is in the autofs_kill_sb() callback, which is called by deactivate_super() at proper time, when the superblock is ready to be killed. I can see the same faulty codepath also in autofs4. This patch solves issues in both filesystems in a same way - it postpones the kill_anon_super() until the proper time is signalized by deactivate_super() calling the kill_sb() callback. [raven@themaw.net: update comment] Signed-off-by: Jiri Kosina <jkosina@suse.cz> Acked-by: Ian Kent <raven@themaw.net> Cc: <stable@kernel.org> Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:39:38 +08:00
out_kill_sb:
DPRINTK("shutting down");
kill_litter_super(sb);
}
static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
{
struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
struct inode *root_inode = mnt->mnt_sb->s_root->d_inode;
if (!sbi)
return 0;
seq_printf(m, ",fd=%d", sbi->pipefd);
if (root_inode->i_uid != 0)
seq_printf(m, ",uid=%u", root_inode->i_uid);
if (root_inode->i_gid != 0)
seq_printf(m, ",gid=%u", root_inode->i_gid);
seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
seq_printf(m, ",minproto=%d", sbi->min_proto);
seq_printf(m, ",maxproto=%d", sbi->max_proto);
if (autofs_type_offset(sbi->type))
seq_printf(m, ",offset");
else if (autofs_type_direct(sbi->type))
seq_printf(m, ",direct");
else
seq_printf(m, ",indirect");
return 0;
}
static void autofs4_evict_inode(struct inode *inode)
{
end_writeback(inode);
kfree(inode->i_private);
}
static const struct super_operations autofs4_sops = {
.statfs = simple_statfs,
.show_options = autofs4_show_options,
.evict_inode = autofs4_evict_inode,
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
Opt_indirect, Opt_direct, Opt_offset};
static const match_table_t tokens = {
{Opt_fd, "fd=%u"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_pgrp, "pgrp=%u"},
{Opt_minproto, "minproto=%u"},
{Opt_maxproto, "maxproto=%u"},
{Opt_indirect, "indirect"},
{Opt_direct, "direct"},
{Opt_offset, "offset"},
{Opt_err, NULL}
};
static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
*uid = current_uid();
*gid = current_gid();
*pgrp = task_pgrp_nr(current);
*minproto = AUTOFS_MIN_PROTO_VERSION;
*maxproto = AUTOFS_MAX_PROTO_VERSION;
*pipefd = -1;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_fd:
if (match_int(args, pipefd))
return 1;
break;
case Opt_uid:
if (match_int(args, &option))
return 1;
*uid = option;
break;
case Opt_gid:
if (match_int(args, &option))
return 1;
*gid = option;
break;
case Opt_pgrp:
if (match_int(args, &option))
return 1;
*pgrp = option;
break;
case Opt_minproto:
if (match_int(args, &option))
return 1;
*minproto = option;
break;
case Opt_maxproto:
if (match_int(args, &option))
return 1;
*maxproto = option;
break;
case Opt_indirect:
set_autofs_type_indirect(type);
break;
case Opt_direct:
set_autofs_type_direct(type);
break;
case Opt_offset:
set_autofs_type_offset(type);
break;
default:
return 1;
}
}
return (*pipefd < 0);
}
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
struct dentry * root;
struct file * pipe;
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
goto fail_unlock;
DPRINTK("starting up, sbi = %p",sbi);
s->s_fs_info = sbi;
sbi->magic = AUTOFS_SBI_MAGIC;
sbi->pipefd = -1;
sbi->pipe = NULL;
sbi->catatonic = 1;
sbi->exp_timeout = 0;
sbi->oz_pgrp = task_pgrp_nr(current);
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
mutex_init(&sbi->wq_mutex);
spin_lock_init(&sbi->fs_lock);
sbi->queues = NULL;
spin_lock_init(&sbi->lookup_lock);
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
INIT_LIST_HEAD(&sbi->active_list);
INIT_LIST_HEAD(&sbi->expiring_list);
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs4_sops;
s->s_d_op = &autofs4_dentry_operations;
s->s_time_gran = 1;
/*
* Get the root inode and dentry, but defer checking for errors.
*/
ino = autofs4_new_ino(sbi);
if (!ino)
goto fail_free;
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
if (!root_inode)
goto fail_ino;
root = d_alloc_root(root_inode);
if (!root)
goto fail_iput;
pipe = NULL;
root->d_fsdata = ino;
/* Can this call block? */
if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
&sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
&sbi->max_proto)) {
printk("autofs: called with bogus options\n");
goto fail_dput;
}
if (autofs_type_trigger(sbi->type))
__managed_dentry_set_managed(root);
root_inode->i_fop = &autofs4_root_operations;
root_inode->i_op = &autofs4_dir_inode_operations;
/* Couldn't this be tested earlier? */
if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
printk("autofs: kernel does not match daemon version "
"daemon (%d, %d) kernel (%d, %d)\n",
sbi->min_proto, sbi->max_proto,
AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
goto fail_dput;
}
/* Establish highest kernel protocol version */
if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
sbi->version = AUTOFS_MAX_PROTO_VERSION;
else
sbi->version = sbi->max_proto;
sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
pipe = fget(pipefd);
if (!pipe) {
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
if (!pipe->f_op || !pipe->f_op->write)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
sbi->catatonic = 0;
/*
* Success! Install the root dentry now to indicate completion.
*/
s->s_root = root;
return 0;
/*
* Failure ... clean up.
*/
fail_fput:
printk("autofs: pipe file descriptor does not contain proper ops\n");
fput(pipe);
/* fall through */
fail_dput:
dput(root);
goto fail_free;
fail_iput:
printk("autofs: get root dentry failed\n");
iput(root_inode);
fail_ino:
kfree(ino);
fail_free:
kfree(sbi);
s->s_fs_info = NULL;
fail_unlock:
return -EINVAL;
}
struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (inode == NULL)
return NULL;
inode->i_mode = mode;
if (sb->s_root) {
inode->i_uid = sb->s_root->d_inode->i_uid;
inode->i_gid = sb->s_root->d_inode->i_gid;
}
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = get_next_ino();
if (S_ISDIR(mode)) {
set_nlink(inode, 2);
inode->i_op = &autofs4_dir_inode_operations;
inode->i_fop = &autofs4_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &autofs4_symlink_inode_operations;
}
return inode;
}