OpenCloudOS-Kernel/net/sunrpc/rpc_pipe.c

1222 lines
28 KiB
C
Raw Normal View History

/*
* net/sunrpc/rpc_pipe.c
*
* Userland/kernel interface for rpcauth_gss.
* Code shamelessly plagiarized from fs/nfsd/nfsctl.c
* and fs/sysfs/inode.c
*
* Copyright (c) 2002, Trond Myklebust <trond.myklebust@fys.uio.no>
*
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/kernel.h>
#include <linux/rcupdate.h>
#include <asm/ioctls.h>
#include <linux/poll.h>
#include <linux/wait.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/clnt.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/cache.h>
#include <linux/nsproxy.h>
#include <linux/notifier.h>
#include "netns.h"
#include "sunrpc.h"
#define RPCDBG_FACILITY RPCDBG_DEBUG
#define NET_NAME(net) ((net == &init_net) ? " (init_net)" : "")
static struct file_system_type rpc_pipe_fs_type;
static struct kmem_cache *rpc_inode_cachep __read_mostly;
#define RPC_UPCALL_TIMEOUT (30*HZ)
static BLOCKING_NOTIFIER_HEAD(rpc_pipefs_notifier_list);
int rpc_pipefs_notifier_register(struct notifier_block *nb)
{
return blocking_notifier_chain_cond_register(&rpc_pipefs_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_register);
void rpc_pipefs_notifier_unregister(struct notifier_block *nb)
{
blocking_notifier_chain_unregister(&rpc_pipefs_notifier_list, nb);
}
EXPORT_SYMBOL_GPL(rpc_pipefs_notifier_unregister);
static void rpc_purge_list(wait_queue_head_t *waitq, struct list_head *head,
void (*destroy_msg)(struct rpc_pipe_msg *), int err)
{
struct rpc_pipe_msg *msg;
if (list_empty(head))
return;
do {
msg = list_entry(head->next, struct rpc_pipe_msg, list);
list_del_init(&msg->list);
msg->errno = err;
destroy_msg(msg);
} while (!list_empty(head));
if (waitq)
wake_up(waitq);
}
static void
2006-11-22 22:55:48 +08:00
rpc_timeout_upcall_queue(struct work_struct *work)
{
LIST_HEAD(free_list);
struct rpc_pipe *pipe =
container_of(work, struct rpc_pipe, queue_timeout.work);
void (*destroy_msg)(struct rpc_pipe_msg *);
struct dentry *dentry;
spin_lock(&pipe->lock);
destroy_msg = pipe->ops->destroy_msg;
if (pipe->nreaders == 0) {
list_splice_init(&pipe->pipe, &free_list);
pipe->pipelen = 0;
}
dentry = dget(pipe->dentry);
spin_unlock(&pipe->lock);
rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL,
&free_list, destroy_msg, -ETIMEDOUT);
dput(dentry);
}
ssize_t rpc_pipe_generic_upcall(struct file *filp, struct rpc_pipe_msg *msg,
char __user *dst, size_t buflen)
{
char *data = (char *)msg->data + msg->copied;
size_t mlen = min(msg->len - msg->copied, buflen);
unsigned long left;
left = copy_to_user(dst, data, mlen);
if (left == mlen) {
msg->errno = -EFAULT;
return -EFAULT;
}
mlen -= left;
msg->copied += mlen;
msg->errno = 0;
return mlen;
}
EXPORT_SYMBOL_GPL(rpc_pipe_generic_upcall);
/**
* rpc_queue_upcall - queue an upcall message to userspace
* @pipe: upcall pipe on which to queue given message
* @msg: message to queue
*
* Call with an @inode created by rpc_mkpipe() to queue an upcall.
* A userspace process may then later read the upcall by performing a
* read on an open file for this inode. It is up to the caller to
* initialize the fields of @msg (other than @msg->list) appropriately.
*/
int
rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg)
{
int res = -EPIPE;
struct dentry *dentry;
spin_lock(&pipe->lock);
if (pipe->nreaders) {
list_add_tail(&msg->list, &pipe->pipe);
pipe->pipelen += msg->len;
res = 0;
} else if (pipe->flags & RPC_PIPE_WAIT_FOR_OPEN) {
if (list_empty(&pipe->pipe))
queue_delayed_work(rpciod_workqueue,
&pipe->queue_timeout,
RPC_UPCALL_TIMEOUT);
list_add_tail(&msg->list, &pipe->pipe);
pipe->pipelen += msg->len;
res = 0;
}
dentry = dget(pipe->dentry);
spin_unlock(&pipe->lock);
if (dentry) {
wake_up(&RPC_I(dentry->d_inode)->waitq);
dput(dentry);
}
return res;
}
EXPORT_SYMBOL_GPL(rpc_queue_upcall);
static inline void
rpc_inode_setowner(struct inode *inode, void *private)
{
RPC_I(inode)->private = private;
}
static void
rpc_close_pipes(struct inode *inode)
{
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
int need_release;
LIST_HEAD(free_list);
mutex_lock(&inode->i_mutex);
spin_lock(&pipe->lock);
need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
pipe->nreaders = 0;
list_splice_init(&pipe->in_upcall, &free_list);
list_splice_init(&pipe->pipe, &free_list);
pipe->pipelen = 0;
pipe->dentry = NULL;
spin_unlock(&pipe->lock);
rpc_purge_list(&RPC_I(inode)->waitq, &free_list, pipe->ops->destroy_msg, -EPIPE);
pipe->nwriters = 0;
if (need_release && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
cancel_delayed_work_sync(&pipe->queue_timeout);
rpc_inode_setowner(inode, NULL);
RPC_I(inode)->pipe = NULL;
mutex_unlock(&inode->i_mutex);
}
static struct inode *
rpc_alloc_inode(struct super_block *sb)
{
struct rpc_inode *rpci;
rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
if (!rpci)
return NULL;
return &rpci->vfs_inode;
}
static void
2011-01-07 14:49:49 +08:00
rpc_i_callback(struct rcu_head *head)
{
2011-01-07 14:49:49 +08:00
struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
}
2011-01-07 14:49:49 +08:00
static void
rpc_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, rpc_i_callback);
}
static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
struct rpc_pipe *pipe;
int first_open;
int res = -ENXIO;
mutex_lock(&inode->i_mutex);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
first_open = pipe->nreaders == 0 && pipe->nwriters == 0;
if (first_open && pipe->ops->open_pipe) {
res = pipe->ops->open_pipe(inode);
if (res)
goto out;
}
if (filp->f_mode & FMODE_READ)
pipe->nreaders++;
if (filp->f_mode & FMODE_WRITE)
pipe->nwriters++;
res = 0;
out:
mutex_unlock(&inode->i_mutex);
return res;
}
static int
rpc_pipe_release(struct inode *inode, struct file *filp)
{
struct rpc_pipe *pipe;
struct rpc_pipe_msg *msg;
int last_close;
mutex_lock(&inode->i_mutex);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
msg = filp->private_data;
if (msg != NULL) {
spin_lock(&pipe->lock);
msg->errno = -EAGAIN;
list_del_init(&msg->list);
spin_unlock(&pipe->lock);
pipe->ops->destroy_msg(msg);
}
if (filp->f_mode & FMODE_WRITE)
pipe->nwriters --;
if (filp->f_mode & FMODE_READ) {
pipe->nreaders --;
if (pipe->nreaders == 0) {
LIST_HEAD(free_list);
spin_lock(&pipe->lock);
list_splice_init(&pipe->pipe, &free_list);
pipe->pipelen = 0;
spin_unlock(&pipe->lock);
rpc_purge_list(&RPC_I(inode)->waitq, &free_list,
pipe->ops->destroy_msg, -EAGAIN);
}
}
last_close = pipe->nwriters == 0 && pipe->nreaders == 0;
if (last_close && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
out:
mutex_unlock(&inode->i_mutex);
return 0;
}
static ssize_t
rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
{
struct inode *inode = file_inode(filp);
struct rpc_pipe *pipe;
struct rpc_pipe_msg *msg;
int res = 0;
mutex_lock(&inode->i_mutex);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
res = -EPIPE;
goto out_unlock;
}
msg = filp->private_data;
if (msg == NULL) {
spin_lock(&pipe->lock);
if (!list_empty(&pipe->pipe)) {
msg = list_entry(pipe->pipe.next,
struct rpc_pipe_msg,
list);
list_move(&msg->list, &pipe->in_upcall);
pipe->pipelen -= msg->len;
filp->private_data = msg;
msg->copied = 0;
}
spin_unlock(&pipe->lock);
if (msg == NULL)
goto out_unlock;
}
/* NOTE: it is up to the callback to update msg->copied */
res = pipe->ops->upcall(filp, msg, buf, len);
if (res < 0 || msg->len == msg->copied) {
filp->private_data = NULL;
spin_lock(&pipe->lock);
list_del_init(&msg->list);
spin_unlock(&pipe->lock);
pipe->ops->destroy_msg(msg);
}
out_unlock:
mutex_unlock(&inode->i_mutex);
return res;
}
static ssize_t
rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
{
struct inode *inode = file_inode(filp);
int res;
mutex_lock(&inode->i_mutex);
res = -EPIPE;
if (RPC_I(inode)->pipe != NULL)
res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
mutex_unlock(&inode->i_mutex);
return res;
}
static unsigned int
rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
{
struct inode *inode = file_inode(filp);
struct rpc_inode *rpci = RPC_I(inode);
unsigned int mask = POLLOUT | POLLWRNORM;
poll_wait(filp, &rpci->waitq, wait);
mutex_lock(&inode->i_mutex);
if (rpci->pipe == NULL)
mask |= POLLERR | POLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
mask |= POLLIN | POLLRDNORM;
mutex_unlock(&inode->i_mutex);
return mask;
}
static long
rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct rpc_pipe *pipe;
int len;
switch (cmd) {
case FIONREAD:
mutex_lock(&inode->i_mutex);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
mutex_unlock(&inode->i_mutex);
return -EPIPE;
}
spin_lock(&pipe->lock);
len = pipe->pipelen;
if (filp->private_data) {
struct rpc_pipe_msg *msg;
msg = filp->private_data;
len += msg->len - msg->copied;
}
spin_unlock(&pipe->lock);
mutex_unlock(&inode->i_mutex);
return put_user(len, (int __user *)arg);
default:
return -EINVAL;
}
}
static const struct file_operations rpc_pipe_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = rpc_pipe_read,
.write = rpc_pipe_write,
.poll = rpc_pipe_poll,
.unlocked_ioctl = rpc_pipe_ioctl,
.open = rpc_pipe_open,
.release = rpc_pipe_release,
};
static int
rpc_show_info(struct seq_file *m, void *v)
{
struct rpc_clnt *clnt = m->private;
rcu_read_lock();
seq_printf(m, "RPC server: %s\n",
rcu_dereference(clnt->cl_xprt)->servername);
seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
clnt->cl_prog, clnt->cl_vers);
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
rcu_read_unlock();
return 0;
}
static int
rpc_info_open(struct inode *inode, struct file *file)
{
struct rpc_clnt *clnt = NULL;
int ret = single_open(file, rpc_show_info, NULL);
if (!ret) {
struct seq_file *m = file->private_data;
spin_lock(&file->f_path.dentry->d_lock);
if (!d_unhashed(file->f_path.dentry))
clnt = RPC_I(inode)->private;
if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
spin_unlock(&file->f_path.dentry->d_lock);
m->private = clnt;
} else {
spin_unlock(&file->f_path.dentry->d_lock);
single_release(inode, file);
ret = -EINVAL;
}
}
return ret;
}
static int
rpc_info_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
struct rpc_clnt *clnt = (struct rpc_clnt *)m->private;
if (clnt)
rpc_release_client(clnt);
return single_release(inode, file);
}
static const struct file_operations rpc_info_operations = {
.owner = THIS_MODULE,
.open = rpc_info_open,
.read = seq_read,
.llseek = seq_lseek,
.release = rpc_info_release,
};
/*
* Description of fs contents.
*/
struct rpc_filelist {
const char *name;
const struct file_operations *i_fop;
umode_t mode;
};
static int rpc_delete_dentry(const struct dentry *dentry)
{
return 1;
}
static const struct dentry_operations rpc_dentry_operations = {
.d_delete = rpc_delete_dentry,
};
static struct inode *
rpc_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (!inode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
default:
break;
}
return inode;
}
static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
umode_t mode,
const struct file_operations *i_fop,
void *private)
{
struct inode *inode;
kernel panic when mount NFSv4 On Tue, 2010-12-14 at 16:58 +0800, Mi Jinlong wrote: > Hi, > > When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic > at NFS client's __rpc_create_common function. > > The panic place is: > rpc_mkpipe > __rpc_lookup_create() <=== find pipefile *idmap* > __rpc_mkpipe() <=== pipefile is *idmap* > __rpc_create_common() > ****** BUG_ON(!d_unhashed(dentry)); ****** *panic* > > It means that the dentry's d_flags have be set DCACHE_UNHASHED, > but it should not be set here. > > Is someone known this bug? or give me some idea? > > A reproduce program is append, but it can't reproduce the bug every time. > the export is: "/nfsroot *(rw,no_root_squash,fsid=0,insecure)" > > And the panic message is append. > > ============================================================================ > #!/bin/sh > > LOOPTOTAL=768 > LOOPCOUNT=0 > ret=0 > > while [ $LOOPCOUNT -ne $LOOPTOTAL ] > do > ((LOOPCOUNT += 1)) > service nfs restart > /usr/sbin/rpc.idmapd > mount -t nfs4 127.0.0.1:/ /mnt|| return 1; > ls -l /var/lib/nfs/rpc_pipefs/nfs/*/ > umount /mnt > echo $LOOPCOUNT > done > > =============================================================================== > Code: af 60 01 00 00 89 fa 89 f0 e8 64 cf 89 f0 e8 5c 7c 64 cf 31 c0 8b 5c 24 10 8b > 74 24 14 8b 7c 24 18 8b 6c 24 1c 83 c4 20 c3 <0f> 0b eb fc 8b 46 28 c7 44 24 08 20 > de ee f0 c7 44 24 04 56 ea > EIP:[<f0ee92ea>] __rpc_create_common+0x8a/0xc0 [sunrpc] SS:ESP 0068:eccb5d28 > ---[ end trace 8f5606cd08928ed2]--- > Kernel panic - not syncing: Fatal exception > Pid:7131, comm: mount.nfs4 Tainted: G D -------------------2.6.32 #1 > Call Trace: > [<c080ad18>] ? panic+0x42/0xed > [<c080e42c>] ? oops_end+0xbc/0xd0 > [<c040b090>] ? do_invalid_op+0x0/0x90 > [<c040b10f>] ? do_invalid_op+0x7f/0x90 > [<f0ee92ea>] ? __rpc_create_common+0x8a/0xc0[sunrpc] > [<f0edc433>] ? rpc_free_task+0x33/0x70[sunrpc] > [<f0ed6508>] ? prc_call_sync+0x48/0x60[sunrpc] > [<f0ed656e>] ? rpc_ping+0x4e/0x60[sunrpc] > [<f0ed6eaf>] ? rpc_create+0x38f/0x4f0[sunrpc] > [<c080d80b>] ? error_code+0x73/0x78 > [<f0ee92ea>] ? __rpc_create_common+0x8a/0xc0[sunrpc] > [<c0532bda>] ? d_lookup+0x2a/0x40 > [<f0ee94b1>] ? rpc_mkpipe+0x111/0x1b0[sunrpc] > [<f10a59f4>] ? nfs_create_rpc_client+0xb4/0xf0[nfs] > [<f10d6c6d>] ? nfs_fscache_get_client_cookie+0x1d/0x50[nfs] > [<f10d3fcb>] ? nfs_idmap_new+0x7b/0x140[nfs] > [<c05e76aa>] ? strlcpy+0x3a/0x60 > [<f10a60ca>] ? nfs4_set_client+0xea/0x2b0[nfs] > [<f10a6d0c>] ? nfs4_create_server+0xac/0x1b0[nfs] > [<c04f1400>] ? krealloc+0x40/0x50 > [<f10b0e8b>] ? nfs4_remote_get_sb+0x6b/0x250[nfs] > [<c04f14ec>] ? kstrdup+0x3c/0x60 > [<c0520739>] ? vfs_kern_mount+0x69/0x170 > [<f10b1a3c>] ? nfs_do_root_mount+0x6c/0xa0[nfs] > [<f10b1b47>] ? nfs4_try_mount+0x37/0xa0[nfs] > [<f10afe6d>] ? nfs4_validate_text_mount_data+-x7d/0xf0[nfs] > [<f10b1c42>] ? nfs4_get_sb+0x92/0x2f0 > [<c0520739>] ? vfs_kern_mount+0x69/0x170 > [<c05366d2>] ? get_fs_type+0x32/0xb0 > [<c052089f>] ? do_kern_mount+0x3f/0xe0 > [<c053954f>] ? do_mount+0x2ef/0x740 > [<c0537740>] ? copy_mount_options+0xb0/0x120 > [<c0539a0e>] ? sys_mount+0x6e/0xa0 Hi, Does the following patch fix the problem? Cheers Trond -------------------------- SUNRPC: Fix a BUG in __rpc_create_common From: Trond Myklebust <Trond.Myklebust@netapp.com> Mi Jinlong reports: When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic at NFS client's __rpc_create_common function. The panic place is: rpc_mkpipe __rpc_lookup_create() <=== find pipefile *idmap* __rpc_mkpipe() <=== pipefile is *idmap* __rpc_create_common() ****** BUG_ON(!d_unhashed(dentry)); ****** *panic* The test is wrong: we can find ourselves with a hashed negative dentry here if the idmapper tried to look up the file before we got round to creating it. Just replace the BUG_ON() with a d_drop(dentry). Reported-by: Mi Jinlong <mijinlong@cn.fujitsu.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
2010-12-21 05:19:26 +08:00
d_drop(dentry);
inode = rpc_get_inode(dir->i_sb, mode);
if (!inode)
goto out_err;
inode->i_ino = iunique(dir->i_sb, 100);
if (i_fop)
inode->i_fop = i_fop;
if (private)
rpc_inode_setowner(inode, private);
d_add(dentry, inode);
return 0;
out_err:
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
__FILE__, __func__, dentry->d_name.name);
dput(dentry);
return -ENOMEM;
}
static int __rpc_create(struct inode *dir, struct dentry *dentry,
umode_t mode,
const struct file_operations *i_fop,
void *private)
{
int err;
err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private);
if (err)
return err;
fsnotify_create(dir, dentry);
return 0;
}
static int __rpc_mkdir(struct inode *dir, struct dentry *dentry,
umode_t mode,
const struct file_operations *i_fop,
void *private)
{
int err;
err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private);
if (err)
return err;
inc_nlink(dir);
fsnotify_mkdir(dir, dentry);
return 0;
}
static void
init_pipe(struct rpc_pipe *pipe)
{
pipe->nreaders = 0;
pipe->nwriters = 0;
INIT_LIST_HEAD(&pipe->in_upcall);
INIT_LIST_HEAD(&pipe->in_downcall);
INIT_LIST_HEAD(&pipe->pipe);
pipe->pipelen = 0;
INIT_DELAYED_WORK(&pipe->queue_timeout,
rpc_timeout_upcall_queue);
pipe->ops = NULL;
spin_lock_init(&pipe->lock);
pipe->dentry = NULL;
}
void rpc_destroy_pipe_data(struct rpc_pipe *pipe)
{
kfree(pipe);
}
EXPORT_SYMBOL_GPL(rpc_destroy_pipe_data);
struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags)
{
struct rpc_pipe *pipe;
pipe = kzalloc(sizeof(struct rpc_pipe), GFP_KERNEL);
if (!pipe)
return ERR_PTR(-ENOMEM);
init_pipe(pipe);
pipe->ops = ops;
pipe->flags = flags;
return pipe;
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_data);
static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
umode_t mode,
const struct file_operations *i_fop,
void *private,
struct rpc_pipe *pipe)
{
struct rpc_inode *rpci;
int err;
err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
if (err)
return err;
rpci = RPC_I(dentry->d_inode);
rpci->private = private;
rpci->pipe = pipe;
fsnotify_create(dir, dentry);
return 0;
}
static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
{
int ret;
dget(dentry);
ret = simple_rmdir(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
}
int rpc_rmdir(struct dentry *dentry)
{
struct dentry *parent;
struct inode *dir;
int error;
parent = dget_parent(dentry);
dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
error = __rpc_rmdir(dir, dentry);
mutex_unlock(&dir->i_mutex);
dput(parent);
return error;
}
EXPORT_SYMBOL_GPL(rpc_rmdir);
static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
{
int ret;
dget(dentry);
ret = simple_unlink(dir, dentry);
d_delete(dentry);
dput(dentry);
return ret;
}
static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
rpc_close_pipes(inode);
return __rpc_unlink(dir, dentry);
}
static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
struct qstr *name)
{
struct dentry *dentry;
dentry = d_lookup(parent, name);
if (!dentry) {
dentry = d_alloc(parent, name);
if (!dentry)
return ERR_PTR(-ENOMEM);
}
if (dentry->d_inode == NULL) {
d_set_d_op(dentry, &rpc_dentry_operations);
return dentry;
}
dput(dentry);
return ERR_PTR(-EEXIST);
}
/*
* FIXME: This probably has races.
*/
static void __rpc_depopulate(struct dentry *parent,
const struct rpc_filelist *files,
int start, int eof)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
struct qstr name;
int i;
for (i = start; i < eof; i++) {
name.name = files[i].name;
name.len = strlen(files[i].name);
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
if (dentry == NULL)
continue;
if (dentry->d_inode == NULL)
goto next;
switch (dentry->d_inode->i_mode & S_IFMT) {
default:
BUG();
case S_IFREG:
__rpc_unlink(dir, dentry);
break;
case S_IFDIR:
__rpc_rmdir(dir, dentry);
}
next:
dput(dentry);
}
}
static void rpc_depopulate(struct dentry *parent,
const struct rpc_filelist *files,
int start, int eof)
{
struct inode *dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
__rpc_depopulate(parent, files, start, eof);
mutex_unlock(&dir->i_mutex);
}
static int rpc_populate(struct dentry *parent,
const struct rpc_filelist *files,
int start, int eof,
void *private)
{
struct inode *dir = parent->d_inode;
struct dentry *dentry;
int i, err;
mutex_lock(&dir->i_mutex);
for (i = start; i < eof; i++) {
struct qstr q;
q.name = files[i].name;
q.len = strlen(files[i].name);
q.hash = full_name_hash(q.name, q.len);
dentry = __rpc_lookup_create_exclusive(parent, &q);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_bad;
switch (files[i].mode & S_IFMT) {
default:
BUG();
case S_IFREG:
err = __rpc_create(dir, dentry,
files[i].mode,
files[i].i_fop,
private);
break;
case S_IFDIR:
err = __rpc_mkdir(dir, dentry,
files[i].mode,
NULL,
private);
}
if (err != 0)
goto out_bad;
}
mutex_unlock(&dir->i_mutex);
return 0;
out_bad:
__rpc_depopulate(parent, files, start, eof);
mutex_unlock(&dir->i_mutex);
printk(KERN_WARNING "%s: %s failed to populate directory %s\n",
__FILE__, __func__, parent->d_name.name);
return err;
}
static struct dentry *rpc_mkdir_populate(struct dentry *parent,
struct qstr *name, umode_t mode, void *private,
int (*populate)(struct dentry *, void *), void *args_populate)
{
struct dentry *dentry;
struct inode *dir = parent->d_inode;
int error;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
error = __rpc_mkdir(dir, dentry, mode, NULL, private);
if (error != 0)
goto out_err;
if (populate != NULL) {
error = populate(dentry, args_populate);
if (error)
goto err_rmdir;
}
out:
mutex_unlock(&dir->i_mutex);
return dentry;
err_rmdir:
__rpc_rmdir(dir, dentry);
out_err:
dentry = ERR_PTR(error);
goto out;
}
static int rpc_rmdir_depopulate(struct dentry *dentry,
void (*depopulate)(struct dentry *))
{
struct dentry *parent;
struct inode *dir;
int error;
parent = dget_parent(dentry);
dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (depopulate != NULL)
depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
mutex_unlock(&dir->i_mutex);
dput(parent);
return error;
}
/**
* rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication
* @parent: dentry of directory to create new "pipe" in
* @name: name of pipe
* @private: private data to associate with the pipe, for the caller's use
* @pipe: &rpc_pipe containing input parameters
*
* Data is made available for userspace to read by calls to
* rpc_queue_upcall(). The actual reads will result in calls to
* @ops->upcall, which will be called with the file pointer,
* message, and userspace buffer to copy to.
*
* Writes can come at any time, and do not necessarily have to be
* responses to upcalls. They will result in calls to @msg->downcall.
*
* The @private argument passed here will be available to all these methods
* from the file pointer, via RPC_I(file_inode(file))->private.
*/
struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
void *private, struct rpc_pipe *pipe)
{
struct dentry *dentry;
struct inode *dir = parent->d_inode;
umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
struct qstr q;
int err;
if (pipe->ops->upcall == NULL)
umode &= ~S_IRUGO;
if (pipe->ops->downcall == NULL)
umode &= ~S_IWUGO;
q.name = name;
q.len = strlen(name);
q.hash = full_name_hash(q.name, q.len),
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, &q);
if (IS_ERR(dentry))
goto out;
err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
private, pipe);
if (err)
goto out_err;
out:
mutex_unlock(&dir->i_mutex);
return dentry;
out_err:
dentry = ERR_PTR(err);
printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n",
__FILE__, __func__, parent->d_name.name, name,
err);
goto out;
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry);
/**
* rpc_unlink - remove a pipe
* @dentry: dentry for the pipe, as returned from rpc_mkpipe
*
* After this call, lookups will no longer find the pipe, and any
* attempts to read or write using preexisting opens of the pipe will
* return -EPIPE.
*/
int
rpc_unlink(struct dentry *dentry)
{
struct dentry *parent;
struct inode *dir;
int error = 0;
parent = dget_parent(dentry);
dir = parent->d_inode;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
error = __rpc_rmpipe(dir, dentry);
mutex_unlock(&dir->i_mutex);
dput(parent);
return error;
}
EXPORT_SYMBOL_GPL(rpc_unlink);
enum {
RPCAUTH_info,
RPCAUTH_EOF
};
static const struct rpc_filelist authfiles[] = {
[RPCAUTH_info] = {
.name = "info",
.i_fop = &rpc_info_operations,
.mode = S_IFREG | S_IRUSR,
},
};
static int rpc_clntdir_populate(struct dentry *dentry, void *private)
{
return rpc_populate(dentry,
authfiles, RPCAUTH_info, RPCAUTH_EOF,
private);
}
static void rpc_clntdir_depopulate(struct dentry *dentry)
{
rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF);
}
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
* @dentry: dentry from the rpc_pipefs root to the new directory
* @name: &struct qstr for the name
* @rpc_client: rpc client to associate with this directory
*
* This creates a directory at the given @path associated with
* @rpc_clnt, which will contain a file named "info" with some basic
* information about the client, together with any "pipes" that may
* later be created using rpc_mkpipe().
*/
struct dentry *rpc_create_client_dir(struct dentry *dentry,
struct qstr *name,
struct rpc_clnt *rpc_client)
{
return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
rpc_clntdir_populate, rpc_client);
}
/**
* rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
* @dentry: dentry for the pipe
*/
int rpc_remove_client_dir(struct dentry *dentry)
{
return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
}
static const struct rpc_filelist cache_pipefs_files[3] = {
[0] = {
.name = "channel",
.i_fop = &cache_file_operations_pipefs,
.mode = S_IFREG|S_IRUSR|S_IWUSR,
},
[1] = {
.name = "content",
.i_fop = &content_file_operations_pipefs,
.mode = S_IFREG|S_IRUSR,
},
[2] = {
.name = "flush",
.i_fop = &cache_flush_operations_pipefs,
.mode = S_IFREG|S_IRUSR|S_IWUSR,
},
};
static int rpc_cachedir_populate(struct dentry *dentry, void *private)
{
return rpc_populate(dentry,
cache_pipefs_files, 0, 3,
private);
}
static void rpc_cachedir_depopulate(struct dentry *dentry)
{
rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
}
struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name,
umode_t umode, struct cache_detail *cd)
{
return rpc_mkdir_populate(parent, name, umode, NULL,
rpc_cachedir_populate, cd);
}
void rpc_remove_cache_dir(struct dentry *dentry)
{
rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate);
}
/*
* populate the filesystem
*/
static const struct super_operations s_ops = {
.alloc_inode = rpc_alloc_inode,
.destroy_inode = rpc_destroy_inode,
.statfs = simple_statfs,
};
#define RPCAUTH_GSSMAGIC 0x67596969
/*
* We have a single directory with 1 node in it.
*/
enum {
RPCAUTH_lockd,
RPCAUTH_mount,
RPCAUTH_nfs,
RPCAUTH_portmap,
RPCAUTH_statd,
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
RPCAUTH_nfsd,
RPCAUTH_RootEOF
};
static const struct rpc_filelist files[] = {
[RPCAUTH_lockd] = {
.name = "lockd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_mount] = {
.name = "mount",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_nfs] = {
.name = "nfs",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_portmap] = {
.name = "portmap",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_statd] = {
.name = "statd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_nfsd4_cb] = {
.name = "nfsd4_cb",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_cache] = {
.name = "cache",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
[RPCAUTH_nfsd] = {
.name = "nfsd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
};
/*
* This call can be used only in RPC pipefs mount notification hooks.
*/
struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name)
{
struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
dir.hash = full_name_hash(dir.name, dir.len);
return d_lookup(sb->s_root, &dir);
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
void rpc_pipefs_init_net(struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
mutex_init(&sn->pipefs_sb_lock);
}
/*
* This call will be used for per network namespace operations calls.
* Note: Function will be returned with pipefs_sb_lock taken if superblock was
* found. This lock have to be released by rpc_put_sb_net() when all operations
* will be completed.
*/
struct super_block *rpc_get_sb_net(const struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
mutex_lock(&sn->pipefs_sb_lock);
if (sn->pipefs_sb)
return sn->pipefs_sb;
mutex_unlock(&sn->pipefs_sb_lock);
return NULL;
}
EXPORT_SYMBOL_GPL(rpc_get_sb_net);
void rpc_put_sb_net(const struct net *net)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
WARN_ON(sn->pipefs_sb == NULL);
mutex_unlock(&sn->pipefs_sb_lock);
}
EXPORT_SYMBOL_GPL(rpc_put_sb_net);
static int
rpc_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct dentry *root;
struct net *net = data;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = RPCAUTH_GSSMAGIC;
sb->s_op = &s_ops;
sb->s_time_gran = 1;
inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
sb->s_root = root = d_make_root(inode);
if (!root)
return -ENOMEM;
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
net, NET_NAME(net));
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
sb);
if (err)
goto err_depopulate;
sb->s_fs_info = get_net(net);
return 0;
err_depopulate:
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
sn->pipefs_sb = NULL;
__rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
return err;
}
static struct dentry *
rpc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_ns(fs_type, flags, current->nsproxy->net_ns, rpc_fill_super);
}
static void rpc_kill_sb(struct super_block *sb)
{
struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
mutex_lock(&sn->pipefs_sb_lock);
if (sn->pipefs_sb != sb) {
mutex_unlock(&sn->pipefs_sb_lock);
goto out;
}
sn->pipefs_sb = NULL;
mutex_unlock(&sn->pipefs_sb_lock);
dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
net, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
put_net(net);
out:
kill_litter_super(sb);
}
static struct file_system_type rpc_pipe_fs_type = {
.owner = THIS_MODULE,
.name = "rpc_pipefs",
.mount = rpc_mount,
.kill_sb = rpc_kill_sb,
};
fs: Limit sys_mount to only request filesystem modules. Modify the request_module to prefix the file system type with "fs-" and add aliases to all of the filesystems that can be built as modules to match. A common practice is to build all of the kernel code and leave code that is not commonly needed as modules, with the result that many users are exposed to any bug anywhere in the kernel. Looking for filesystems with a fs- prefix limits the pool of possible modules that can be loaded by mount to just filesystems trivially making things safer with no real cost. Using aliases means user space can control the policy of which filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf with blacklist and alias directives. Allowing simple, safe, well understood work-arounds to known problematic software. This also addresses a rare but unfortunate problem where the filesystem name is not the same as it's module name and module auto-loading would not work. While writing this patch I saw a handful of such cases. The most significant being autofs that lives in the module autofs4. This is relevant to user namespaces because we can reach the request module in get_fs_type() without having any special permissions, and people get uncomfortable when a user specified string (in this case the filesystem type) goes all of the way to request_module. After having looked at this issue I don't think there is any particular reason to perform any filtering or permission checks beyond making it clear in the module request that we want a filesystem module. The common pattern in the kernel is to call request_module() without regards to the users permissions. In general all a filesystem module does once loaded is call register_filesystem() and go to sleep. Which means there is not much attack surface exposed by loading a filesytem module unless the filesystem is mounted. In a user namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT, which most filesystems do not set today. Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Acked-by: Kees Cook <keescook@chromium.org> Reported-by: Kees Cook <keescook@google.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2013-03-03 11:39:14 +08:00
MODULE_ALIAS_FS("rpc_pipefs");
static void
init_once(void *foo)
{
struct rpc_inode *rpci = (struct rpc_inode *) foo;
inode_init_once(&rpci->vfs_inode);
rpci->private = NULL;
rpci->pipe = NULL;
init_waitqueue_head(&rpci->waitq);
}
int register_rpc_pipefs(void)
{
int err;
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
err = rpc_clients_notifier_register();
if (err)
goto err_notifier;
err = register_filesystem(&rpc_pipe_fs_type);
if (err)
goto err_register;
return 0;
err_register:
rpc_clients_notifier_unregister();
err_notifier:
kmem_cache_destroy(rpc_inode_cachep);
return err;
}
void unregister_rpc_pipefs(void)
{
rpc_clients_notifier_unregister();
kmem_cache_destroy(rpc_inode_cachep);
unregister_filesystem(&rpc_pipe_fs_type);
}