af_unix: Fix splice-bind deadlock
On 2015/11/06, Dmitry Vyukov reported a deadlock involving the splice system call and AF_UNIX sockets, http://lists.openwall.net/netdev/2015/11/06/24 The situation was analyzed as (a while ago) A: socketpair() B: splice() from a pipe to /mnt/regular_file does sb_start_write() on /mnt C: try to freeze /mnt wait for B to finish with /mnt A: bind() try to bind our socket to /mnt/new_socket_name lock our socket, see it not bound yet decide that it needs to create something in /mnt try to do sb_start_write() on /mnt, block (it's waiting for C). D: splice() from the same pipe to our socket lock the pipe, see that socket is connected try to lock the socket, block waiting for A B: get around to actually feeding a chunk from pipe to file, try to lock the pipe. Deadlock. on 2015/11/10 by Al Viro, http://lists.openwall.net/netdev/2015/11/10/4 The patch fixes this by removing the kern_path_create related code from unix_mknod and executing it as part of unix_bind prior acquiring the readlock of the socket in question. This means that A (as used above) will sb_start_write on /mnt before it acquires the readlock, hence, it won't indirectly block B which first did a sb_start_write and then waited for a thread trying to acquire the readlock. Consequently, A being blocked by C waiting for B won't cause a deadlock anymore (effectively, both A and B acquire two locks in opposite order in the situation described above). Dmitry Vyukov(<dvyukov@google.com>) tested the original patch. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b5bdacf3bb
commit
c845acb324
|
@ -953,32 +953,20 @@ fail:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
|
||||
static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
|
||||
struct path *res)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
struct path path;
|
||||
int err = 0;
|
||||
/*
|
||||
* Get the parent directory, calculate the hash for last
|
||||
* component.
|
||||
*/
|
||||
dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
|
||||
err = PTR_ERR(dentry);
|
||||
if (IS_ERR(dentry))
|
||||
return err;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* All right, let's create it.
|
||||
*/
|
||||
err = security_path_mknod(&path, dentry, mode, 0);
|
||||
err = security_path_mknod(path, dentry, mode, 0);
|
||||
if (!err) {
|
||||
err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
|
||||
err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
|
||||
if (!err) {
|
||||
res->mnt = mntget(path.mnt);
|
||||
res->mnt = mntget(path->mnt);
|
||||
res->dentry = dget(dentry);
|
||||
}
|
||||
}
|
||||
done_path_create(&path, dentry);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||
struct unix_sock *u = unix_sk(sk);
|
||||
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
|
||||
char *sun_path = sunaddr->sun_path;
|
||||
int err;
|
||||
int err, name_err;
|
||||
unsigned int hash;
|
||||
struct unix_address *addr;
|
||||
struct hlist_head *list;
|
||||
struct path path;
|
||||
struct dentry *dentry;
|
||||
|
||||
err = -EINVAL;
|
||||
if (sunaddr->sun_family != AF_UNIX)
|
||||
|
@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||
goto out;
|
||||
addr_len = err;
|
||||
|
||||
name_err = 0;
|
||||
dentry = NULL;
|
||||
if (sun_path[0]) {
|
||||
/* Get the parent directory, calculate the hash for last
|
||||
* component.
|
||||
*/
|
||||
dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
|
||||
|
||||
if (IS_ERR(dentry)) {
|
||||
/* delay report until after 'already bound' check */
|
||||
name_err = PTR_ERR(dentry);
|
||||
dentry = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
err = mutex_lock_interruptible(&u->readlock);
|
||||
if (err)
|
||||
goto out;
|
||||
goto out_path;
|
||||
|
||||
err = -EINVAL;
|
||||
if (u->addr)
|
||||
goto out_up;
|
||||
|
||||
if (name_err) {
|
||||
err = name_err == -EEXIST ? -EADDRINUSE : name_err;
|
||||
goto out_up;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
|
||||
if (!addr)
|
||||
|
@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||
addr->hash = hash ^ sk->sk_type;
|
||||
atomic_set(&addr->refcnt, 1);
|
||||
|
||||
if (sun_path[0]) {
|
||||
struct path path;
|
||||
if (dentry) {
|
||||
struct path u_path;
|
||||
umode_t mode = S_IFSOCK |
|
||||
(SOCK_INODE(sock)->i_mode & ~current_umask());
|
||||
err = unix_mknod(sun_path, mode, &path);
|
||||
err = unix_mknod(dentry, &path, mode, &u_path);
|
||||
if (err) {
|
||||
if (err == -EEXIST)
|
||||
err = -EADDRINUSE;
|
||||
|
@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
|||
goto out_up;
|
||||
}
|
||||
addr->hash = UNIX_HASH_SIZE;
|
||||
hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
|
||||
hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
|
||||
spin_lock(&unix_table_lock);
|
||||
u->path = path;
|
||||
u->path = u_path;
|
||||
list = &unix_socket_table[hash];
|
||||
} else {
|
||||
spin_lock(&unix_table_lock);
|
||||
|
@ -1063,6 +1073,10 @@ out_unlock:
|
|||
spin_unlock(&unix_table_lock);
|
||||
out_up:
|
||||
mutex_unlock(&u->readlock);
|
||||
out_path:
|
||||
if (dentry)
|
||||
done_path_create(&path, dentry);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue