Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull assorted fixes - mostly vfs - from Al Viro:
 "Assorted fixes, with an unexpected detour into vfio refcounting logics
  (fell out when digging in an analog of eventpoll race in there)."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  task_work: add a scheduling point in task_work_run()
  fs: fix fs/namei.c kernel-doc warnings
  eventpoll: use-after-possible-free in epoll_create1()
  vfio: grab vfio_device reference *before* exposing the sucker via fd_install()
  vfio: get rid of vfio_device_put()/vfio_group_get_device* races
  vfio: get rid of open-coding kref_put_mutex
  introduce kref_put_mutex()
  vfio: don't dereference after kfree...
  mqueue: lift mnt_want_write() outside ->i_mutex, clean up a bit
This commit is contained in:
Linus Torvalds 2012-08-22 09:56:06 -07:00
commit 467e9e51d0
5 changed files with 56 additions and 46 deletions

View File

@ -264,6 +264,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
return group; return group;
} }
/* called with vfio.group_lock held */
static void vfio_group_release(struct kref *kref) static void vfio_group_release(struct kref *kref)
{ {
struct vfio_group *group = container_of(kref, struct vfio_group, kref); struct vfio_group *group = container_of(kref, struct vfio_group, kref);
@ -287,13 +288,7 @@ static void vfio_group_release(struct kref *kref)
static void vfio_group_put(struct vfio_group *group) static void vfio_group_put(struct vfio_group *group)
{ {
mutex_lock(&vfio.group_lock); kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
/*
* Release needs to unlock to unregister the notifier, so only
* unlock if not released.
*/
if (!kref_put(&group->kref, vfio_group_release))
mutex_unlock(&vfio.group_lock);
} }
/* Assume group_lock or group reference is held */ /* Assume group_lock or group reference is held */
@ -401,7 +396,6 @@ static void vfio_device_release(struct kref *kref)
struct vfio_device, kref); struct vfio_device, kref);
struct vfio_group *group = device->group; struct vfio_group *group = device->group;
mutex_lock(&group->device_lock);
list_del(&device->group_next); list_del(&device->group_next);
mutex_unlock(&group->device_lock); mutex_unlock(&group->device_lock);
@ -416,8 +410,9 @@ static void vfio_device_release(struct kref *kref)
/* Device reference always implies a group reference */ /* Device reference always implies a group reference */
static void vfio_device_put(struct vfio_device *device) static void vfio_device_put(struct vfio_device *device)
{ {
kref_put(&device->kref, vfio_device_release); struct vfio_group *group = device->group;
vfio_group_put(device->group); kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
} }
static void vfio_device_get(struct vfio_device *device) static void vfio_device_get(struct vfio_device *device)
@ -1116,10 +1111,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
*/ */
filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
fd_install(ret, filep);
vfio_device_get(device); vfio_device_get(device);
atomic_inc(&group->container_users); atomic_inc(&group->container_users);
fd_install(ret, filep);
break; break;
} }
mutex_unlock(&group->device_lock); mutex_unlock(&group->device_lock);

View File

@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
error = PTR_ERR(file); error = PTR_ERR(file);
goto out_free_fd; goto out_free_fd;
} }
fd_install(fd, file);
ep->file = file; ep->file = file;
fd_install(fd, file);
return fd; return fd;
out_free_fd: out_free_fd:

View File

@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask)
/** /**
* sb_permission - Check superblock-level permissions * sb_permission - Check superblock-level permissions
* @sb: Superblock of inode to check permission on * @sb: Superblock of inode to check permission on
* @inode: Inode to check permission on
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
* *
* Separate out file-system wide checks from inode-specific permission checks. * Separate out file-system wide checks from inode-specific permission checks.
@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1;
/** /**
* may_follow_link - Check symlink following for unsafe situations * may_follow_link - Check symlink following for unsafe situations
* @link: The path of the symlink * @link: The path of the symlink
* @nd: nameidata pathwalk data
* *
* In the case of the sysctl_protected_symlinks sysctl being enabled, * In the case of the sysctl_protected_symlinks sysctl being enabled,
* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is

View File

@ -18,6 +18,7 @@
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/mutex.h>
struct kref { struct kref {
atomic_t refcount; atomic_t refcount;
@ -93,4 +94,21 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
{ {
return kref_sub(kref, 1, release); return kref_sub(kref, 1, release);
} }
static inline int kref_put_mutex(struct kref *kref,
void (*release)(struct kref *kref),
struct mutex *lock)
{
WARN_ON(release == NULL);
if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) {
mutex_lock(lock);
if (unlikely(!atomic_dec_and_test(&kref->refcount))) {
mutex_unlock(lock);
return 0;
}
release(kref);
return 1;
}
return 0;
}
#endif /* _KREF_H_ */ #endif /* _KREF_H_ */

View File

@ -726,7 +726,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
struct mq_attr *attr) struct mq_attr *attr)
{ {
const struct cred *cred = current_cred(); const struct cred *cred = current_cred();
struct file *result;
int ret; int ret;
if (attr) { if (attr) {
@ -748,21 +747,11 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
} }
mode &= ~current_umask(); mode &= ~current_umask();
ret = mnt_want_write(path->mnt);
if (ret)
return ERR_PTR(ret);
ret = vfs_create(dir, path->dentry, mode, true); ret = vfs_create(dir, path->dentry, mode, true);
path->dentry->d_fsdata = NULL; path->dentry->d_fsdata = NULL;
if (!ret) if (ret)
result = dentry_open(path, oflag, cred); return ERR_PTR(ret);
else return dentry_open(path, oflag, cred);
result = ERR_PTR(ret);
/*
* dentry_open() took a persistent mnt_want_write(),
* so we can now drop this one.
*/
mnt_drop_write(path->mnt);
return result;
} }
/* Opens existing queue */ /* Opens existing queue */
@ -788,7 +777,9 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
struct mq_attr attr; struct mq_attr attr;
int fd, error; int fd, error;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct dentry *root = ipc_ns->mq_mnt->mnt_root; struct vfsmount *mnt = ipc_ns->mq_mnt;
struct dentry *root = mnt->mnt_root;
int ro;
if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
return -EFAULT; return -EFAULT;
@ -802,6 +793,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
if (fd < 0) if (fd < 0)
goto out_putname; goto out_putname;
ro = mnt_want_write(mnt); /* we'll drop it in any case */
error = 0; error = 0;
mutex_lock(&root->d_inode->i_mutex); mutex_lock(&root->d_inode->i_mutex);
path.dentry = lookup_one_len(name, root, strlen(name)); path.dentry = lookup_one_len(name, root, strlen(name));
@ -809,7 +801,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
error = PTR_ERR(path.dentry); error = PTR_ERR(path.dentry);
goto out_putfd; goto out_putfd;
} }
path.mnt = mntget(ipc_ns->mq_mnt); path.mnt = mntget(mnt);
if (oflag & O_CREAT) { if (oflag & O_CREAT) {
if (path.dentry->d_inode) { /* entry already exists */ if (path.dentry->d_inode) { /* entry already exists */
@ -820,6 +812,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
} }
filp = do_open(&path, oflag); filp = do_open(&path, oflag);
} else { } else {
if (ro) {
error = ro;
goto out;
}
filp = do_create(ipc_ns, root->d_inode, filp = do_create(ipc_ns, root->d_inode,
&path, oflag, mode, &path, oflag, mode,
u_attr ? &attr : NULL); u_attr ? &attr : NULL);
@ -845,6 +841,7 @@ out_putfd:
fd = error; fd = error;
} }
mutex_unlock(&root->d_inode->i_mutex); mutex_unlock(&root->d_inode->i_mutex);
mnt_drop_write(mnt);
out_putname: out_putname:
putname(name); putname(name);
return fd; return fd;
@ -857,40 +854,38 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
struct dentry *dentry; struct dentry *dentry;
struct inode *inode = NULL; struct inode *inode = NULL;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct vfsmount *mnt = ipc_ns->mq_mnt;
name = getname(u_name); name = getname(u_name);
if (IS_ERR(name)) if (IS_ERR(name))
return PTR_ERR(name); return PTR_ERR(name);
mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex, err = mnt_want_write(mnt);
I_MUTEX_PARENT); if (err)
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); goto out_name;
mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_one_len(name, mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) { if (IS_ERR(dentry)) {
err = PTR_ERR(dentry); err = PTR_ERR(dentry);
goto out_unlock; goto out_unlock;
} }
if (!dentry->d_inode) {
err = -ENOENT;
goto out_err;
}
inode = dentry->d_inode; inode = dentry->d_inode;
if (inode) if (!inode) {
err = -ENOENT;
} else {
ihold(inode); ihold(inode);
err = mnt_want_write(ipc_ns->mq_mnt); err = vfs_unlink(dentry->d_parent->d_inode, dentry);
if (err) }
goto out_err;
err = vfs_unlink(dentry->d_parent->d_inode, dentry);
mnt_drop_write(ipc_ns->mq_mnt);
out_err:
dput(dentry); dput(dentry);
out_unlock: out_unlock:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); mutex_unlock(&mnt->mnt_root->d_inode->i_mutex);
putname(name);
if (inode) if (inode)
iput(inode); iput(inode);
mnt_drop_write(mnt);
out_name:
putname(name);
return err; return err;
} }