[PATCH] splice: fix problems with sys_tee()
Several issues noticed/fixed: - We cannot reliably block in link_pipe() while holding both input and output mutexes. So do preparatory checks before locking down both mutexes and doing the link. - The ipipe->nrbufs vs i check was bad, because we could have dropped the ipipe lock in-between. This causes us to potentially look at unknown buffers if we were racing with someone else reading this pipe. Signed-off-by: Jens Axboe <axboe@suse.de>
This commit is contained in:
parent
b3cf257623
commit
aadd06e5c5
240
fs/splice.c
240
fs/splice.c
|
@ -1306,6 +1306,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure there's data to read. Wait for input if we can, otherwise
|
||||||
|
* return an appropriate error.
|
||||||
|
*/
|
||||||
|
static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check ->nrbufs without the inode lock first. This function
|
||||||
|
* is speculative anyways, so missing one is ok.
|
||||||
|
*/
|
||||||
|
if (pipe->nrbufs)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
mutex_lock(&pipe->inode->i_mutex);
|
||||||
|
|
||||||
|
while (!pipe->nrbufs) {
|
||||||
|
if (signal_pending(current)) {
|
||||||
|
ret = -ERESTARTSYS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!pipe->writers)
|
||||||
|
break;
|
||||||
|
if (!pipe->waiting_writers) {
|
||||||
|
if (flags & SPLICE_F_NONBLOCK) {
|
||||||
|
ret = -EAGAIN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pipe_wait(pipe);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&pipe->inode->i_mutex);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make sure there's writeable room. Wait for room if we can, otherwise
|
||||||
|
* return an appropriate error.
|
||||||
|
*/
|
||||||
|
static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check ->nrbufs without the inode lock first. This function
|
||||||
|
* is speculative anyways, so missing one is ok.
|
||||||
|
*/
|
||||||
|
if (pipe->nrbufs < PIPE_BUFFERS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
mutex_lock(&pipe->inode->i_mutex);
|
||||||
|
|
||||||
|
while (pipe->nrbufs >= PIPE_BUFFERS) {
|
||||||
|
if (!pipe->readers) {
|
||||||
|
send_sig(SIGPIPE, current, 0);
|
||||||
|
ret = -EPIPE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (flags & SPLICE_F_NONBLOCK) {
|
||||||
|
ret = -EAGAIN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (signal_pending(current)) {
|
||||||
|
ret = -ERESTARTSYS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pipe->waiting_writers++;
|
||||||
|
pipe_wait(pipe);
|
||||||
|
pipe->waiting_writers--;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&pipe->inode->i_mutex);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Link contents of ipipe to opipe.
|
* Link contents of ipipe to opipe.
|
||||||
*/
|
*/
|
||||||
|
@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
|
||||||
size_t len, unsigned int flags)
|
size_t len, unsigned int flags)
|
||||||
{
|
{
|
||||||
struct pipe_buffer *ibuf, *obuf;
|
struct pipe_buffer *ibuf, *obuf;
|
||||||
int ret, do_wakeup, i, ipipe_first;
|
int ret = 0, i = 0, nbuf;
|
||||||
|
|
||||||
ret = do_wakeup = ipipe_first = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Potential ABBA deadlock, work around it by ordering lock
|
* Potential ABBA deadlock, work around it by ordering lock
|
||||||
|
@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe,
|
||||||
* could deadlock (one doing tee from A -> B, the other from B -> A).
|
* could deadlock (one doing tee from A -> B, the other from B -> A).
|
||||||
*/
|
*/
|
||||||
if (ipipe->inode < opipe->inode) {
|
if (ipipe->inode < opipe->inode) {
|
||||||
ipipe_first = 1;
|
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
|
||||||
mutex_lock(&ipipe->inode->i_mutex);
|
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
|
||||||
mutex_lock(&opipe->inode->i_mutex);
|
|
||||||
} else {
|
} else {
|
||||||
mutex_lock(&opipe->inode->i_mutex);
|
mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
|
||||||
mutex_lock(&ipipe->inode->i_mutex);
|
mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0;; i++) {
|
do {
|
||||||
if (!opipe->readers) {
|
if (!opipe->readers) {
|
||||||
send_sig(SIGPIPE, current, 0);
|
send_sig(SIGPIPE, current, 0);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = -EPIPE;
|
ret = -EPIPE;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ipipe->nrbufs - i) {
|
|
||||||
ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we have room, fill this buffer
|
|
||||||
*/
|
|
||||||
if (opipe->nrbufs < PIPE_BUFFERS) {
|
|
||||||
int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Get a reference to this pipe buffer,
|
|
||||||
* so we can copy the contents over.
|
|
||||||
*/
|
|
||||||
ibuf->ops->get(ipipe, ibuf);
|
|
||||||
|
|
||||||
obuf = opipe->bufs + nbuf;
|
|
||||||
*obuf = *ibuf;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't inherit the gift flag, we need to
|
|
||||||
* prevent multiple steals of this page.
|
|
||||||
*/
|
|
||||||
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
|
|
||||||
|
|
||||||
if (obuf->len > len)
|
|
||||||
obuf->len = len;
|
|
||||||
|
|
||||||
opipe->nrbufs++;
|
|
||||||
do_wakeup = 1;
|
|
||||||
ret += obuf->len;
|
|
||||||
len -= obuf->len;
|
|
||||||
|
|
||||||
if (!len)
|
|
||||||
break;
|
|
||||||
if (opipe->nrbufs < PIPE_BUFFERS)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We have input available, but no output room.
|
|
||||||
* If we already copied data, return that. If we
|
|
||||||
* need to drop the opipe lock, it must be ordered
|
|
||||||
* last to avoid deadlocks.
|
|
||||||
*/
|
|
||||||
if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
|
|
||||||
if (!ret)
|
|
||||||
ret = -EAGAIN;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (signal_pending(current)) {
|
|
||||||
if (!ret)
|
|
||||||
ret = -ERESTARTSYS;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (do_wakeup) {
|
|
||||||
smp_mb();
|
|
||||||
if (waitqueue_active(&opipe->wait))
|
|
||||||
wake_up_interruptible(&opipe->wait);
|
|
||||||
kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
|
|
||||||
do_wakeup = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
opipe->waiting_writers++;
|
|
||||||
pipe_wait(opipe);
|
|
||||||
opipe->waiting_writers--;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* No input buffers, do the usual checks for available
|
* If we have iterated all input buffers or ran out of
|
||||||
* writers and blocking and wait if necessary
|
* output room, break.
|
||||||
*/
|
*/
|
||||||
if (!ipipe->writers)
|
if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
|
||||||
break;
|
break;
|
||||||
if (!ipipe->waiting_writers) {
|
|
||||||
if (ret)
|
ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
|
||||||
break;
|
nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* pipe_wait() drops the ipipe mutex. To avoid deadlocks
|
* Get a reference to this pipe buffer,
|
||||||
* with another process, we can only safely do that if
|
* so we can copy the contents over.
|
||||||
* the ipipe lock is ordered last.
|
|
||||||
*/
|
*/
|
||||||
if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
|
ibuf->ops->get(ipipe, ibuf);
|
||||||
if (!ret)
|
|
||||||
ret = -EAGAIN;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (signal_pending(current)) {
|
|
||||||
if (!ret)
|
|
||||||
ret = -ERESTARTSYS;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (waitqueue_active(&ipipe->wait))
|
obuf = opipe->bufs + nbuf;
|
||||||
wake_up_interruptible_sync(&ipipe->wait);
|
*obuf = *ibuf;
|
||||||
kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
|
|
||||||
|
|
||||||
pipe_wait(ipipe);
|
/*
|
||||||
}
|
* Don't inherit the gift flag, we need to
|
||||||
|
* prevent multiple steals of this page.
|
||||||
|
*/
|
||||||
|
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
|
||||||
|
|
||||||
|
if (obuf->len > len)
|
||||||
|
obuf->len = len;
|
||||||
|
|
||||||
|
opipe->nrbufs++;
|
||||||
|
ret += obuf->len;
|
||||||
|
len -= obuf->len;
|
||||||
|
i++;
|
||||||
|
} while (len);
|
||||||
|
|
||||||
mutex_unlock(&ipipe->inode->i_mutex);
|
mutex_unlock(&ipipe->inode->i_mutex);
|
||||||
mutex_unlock(&opipe->inode->i_mutex);
|
mutex_unlock(&opipe->inode->i_mutex);
|
||||||
|
|
||||||
if (do_wakeup) {
|
/*
|
||||||
|
* If we put data in the output pipe, wakeup any potential readers.
|
||||||
|
*/
|
||||||
|
if (ret > 0) {
|
||||||
smp_mb();
|
smp_mb();
|
||||||
if (waitqueue_active(&opipe->wait))
|
if (waitqueue_active(&opipe->wait))
|
||||||
wake_up_interruptible(&opipe->wait);
|
wake_up_interruptible(&opipe->wait);
|
||||||
|
@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
|
||||||
{
|
{
|
||||||
struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
|
struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
|
||||||
struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
|
struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
|
||||||
|
int ret = -EINVAL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Link ipipe to the two output pipes, consuming as we go along.
|
* Duplicate the contents of ipipe to opipe without actually
|
||||||
|
* copying the data.
|
||||||
*/
|
*/
|
||||||
if (ipipe && opipe)
|
if (ipipe && opipe && ipipe != opipe) {
|
||||||
return link_pipe(ipipe, opipe, len, flags);
|
/*
|
||||||
|
* Keep going, unless we encounter an error. The ipipe/opipe
|
||||||
|
* ordering doesn't really matter.
|
||||||
|
*/
|
||||||
|
ret = link_ipipe_prep(ipipe, flags);
|
||||||
|
if (!ret) {
|
||||||
|
ret = link_opipe_prep(opipe, flags);
|
||||||
|
if (!ret) {
|
||||||
|
ret = link_pipe(ipipe, opipe, len, flags);
|
||||||
|
if (!ret && (flags & SPLICE_F_NONBLOCK))
|
||||||
|
ret = -EAGAIN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return -EINVAL;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
|
asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
|
||||||
|
|
Loading…
Reference in New Issue