Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull final vfs updates from Al Viro:
 "Assorted cleanups and fixes all over the place"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  sg_write()/bsg_write() is not fit to be called under KERNEL_DS
  ufs: fix function declaration for ufs_truncate_blocks
  fs: exec: apply CLOEXEC before changing dumpable task flags
  seq_file: reset iterator to first record for zero offset
  vfs: fix isize/pos/len checks for reflink & dedupe
  [iov_iter] fix iterate_all_kinds() on empty iterators
  move aio compat to fs/aio.c
  reorganize do_make_slave()
  clone_private_mount() doesn't need to touch namespace_sem
  remove a bogus claim about namespace_sem being held by callers of mnt_alloc_id()
This commit is contained in:
Linus Torvalds 2016-12-23 10:52:43 -08:00
commit a307d0a007
15 changed files with 198 additions and 168 deletions

View File

@ -655,6 +655,9 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
return -EINVAL;
bsg_set_block(bd, file);
bytes_written = 0;

View File

@ -581,6 +581,9 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
sg_io_hdr_t *hp;
unsigned char cmnd[SG_MAX_CDB_SIZE];
if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
return -EINVAL;
if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
return -ENXIO;
SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp,

View File

@ -1367,6 +1367,39 @@ out:
return ret;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
{
struct kioctx *ioctx = NULL;
unsigned long ctx;
long ret;
ret = get_user(ctx, ctx32p);
if (unlikely(ret))
goto out;
ret = -EINVAL;
if (unlikely(ctx || nr_events == 0)) {
pr_debug("EINVAL: ctx %lu nr_events %u\n",
ctx, nr_events);
goto out;
}
ioctx = ioctx_alloc(nr_events);
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
/* truncating is ok because it's a user address */
ret = put_user((u32)ioctx->user_id, ctx32p);
if (ret)
kill_ioctx(current->mm, ioctx, NULL);
percpu_ref_put(&ioctx->users);
}
out:
return ret;
}
#endif
/* sys_io_destroy:
* Destroy the aio_context specified. May cancel any outstanding
* AIOs and block on completion. Will fail with -ENOSYS if not
@ -1591,7 +1624,7 @@ out_put_req:
return ret;
}
long do_io_submit(aio_context_t ctx_id, long nr,
static long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user *__user *iocbpp, bool compat)
{
struct kioctx *ctx;
@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
return do_io_submit(ctx_id, nr, iocbpp, 0);
}
#ifdef CONFIG_COMPAT
static inline long
copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
{
compat_uptr_t uptr;
int i;
for (i = 0; i < nr; ++i) {
if (get_user(uptr, ptr32 + i))
return -EFAULT;
if (put_user(compat_ptr(uptr), ptr64 + i))
return -EFAULT;
}
return 0;
}
#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
int, nr, u32 __user *, iocb)
{
struct iocb __user * __user *iocb64;
long ret;
if (unlikely(nr < 0))
return -EINVAL;
if (nr > MAX_AIO_SUBMITS)
nr = MAX_AIO_SUBMITS;
iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
ret = copy_iocb(nr, iocb, iocb64);
if (!ret)
ret = do_io_submit(ctx_id, nr, iocb64, 1);
return ret;
}
#endif
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
}
return ret;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
struct io_event __user *, events,
struct compat_timespec __user *, timeout)
{
struct timespec t;
struct timespec __user *ut = NULL;
if (timeout) {
if (compat_get_timespec(&t, timeout))
return -EFAULT;
ut = compat_alloc_user_space(sizeof(*ut));
if (copy_to_user(ut, &t, sizeof(t)))
return -EFAULT;
}
return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
}
#endif

View File

@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
return compat_sys_fcntl64(fd, cmd, arg);
}
COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
{
long ret;
aio_context_t ctx64;
mm_segment_t oldfs = get_fs();
if (unlikely(get_user(ctx64, ctx32p)))
return -EFAULT;
set_fs(KERNEL_DS);
/* The __user pointer cast is valid because of the set_fs() */
ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64);
set_fs(oldfs);
/* truncating is ok because it's a user address */
if (!ret)
ret = put_user((u32) ctx64, ctx32p);
return ret;
}
COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
compat_long_t, min_nr,
compat_long_t, nr,
struct io_event __user *, events,
struct compat_timespec __user *, timeout)
{
struct timespec t;
struct timespec __user *ut = NULL;
if (timeout) {
if (compat_get_timespec(&t, timeout))
return -EFAULT;
ut = compat_alloc_user_space(sizeof(*ut));
if (copy_to_user(ut, &t, sizeof(t)) )
return -EFAULT;
}
return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
}
/* A write operation does a read from user space and vice versa */
#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
@ -602,42 +563,6 @@ out:
return ret;
}
static inline long
copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
{
compat_uptr_t uptr;
int i;
for (i = 0; i < nr; ++i) {
if (get_user(uptr, ptr32 + i))
return -EFAULT;
if (put_user(compat_ptr(uptr), ptr64 + i))
return -EFAULT;
}
return 0;
}
#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
int, nr, u32 __user *, iocb)
{
struct iocb __user * __user *iocb64;
long ret;
if (unlikely(nr < 0))
return -EINVAL;
if (nr > MAX_AIO_SUBMITS)
nr = MAX_AIO_SUBMITS;
iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
ret = copy_iocb(nr, iocb, iocb64);
if (!ret)
ret = do_io_submit(ctx_id, nr, iocb64, 1);
return ret;
}
struct compat_ncp_mount_data {
compat_int_t version;
compat_uint_t ncp_fd;

View File

@ -1268,6 +1268,13 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_thread();
current->personality &= ~bprm->per_clear;
/*
* We have to apply CLOEXEC before we change whether the process is
* dumpable (in setup_new_exec) to avoid a race with a process in userspace
* trying to access the should-be-closed file descriptors of a process
* undergoing exec(2).
*/
do_close_on_exec(current->files);
return 0;
out:
@ -1330,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm)
group */
current->self_exec_id++;
flush_signal_handlers(current, 0);
do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);

View File

@ -96,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
return &mountpoint_hashtable[tmp & mp_hash_mask];
}
/*
* allocation is serialized by namespace_sem, but we need the spinlock to
* serialize with freeing.
*/
static int mnt_alloc_id(struct mount *mnt)
{
int res;
@ -1034,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if (IS_MNT_SLAVE(old))
list_add(&mnt->mnt_slave, &old->mnt_slave);
mnt->mnt_master = old->mnt_master;
} else {
CLEAR_MNT_SHARED(mnt);
}
if (flag & CL_MAKE_SHARED)
set_mnt_shared(mnt);
@ -1828,9 +1826,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL);
down_read(&namespace_sem);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
up_read(&namespace_sem);
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);

View File

@ -4834,7 +4834,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
if (ret || len == 0)
if (ret <= 0)
goto out_unlock;
/* Lock out changes to the allocation maps and remap. */

View File

@ -67,39 +67,15 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
static int do_make_slave(struct mount *mnt)
{
struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
struct mount *slave_mnt;
struct mount *master, *slave_mnt;
/*
* slave 'mnt' to a peer mount that has the
* same root dentry. If none is available then
* slave it to anything that is available.
*/
while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
if (peer_mnt == mnt) {
peer_mnt = next_peer(mnt);
if (peer_mnt == mnt)
peer_mnt = NULL;
}
if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
list_empty(&mnt->mnt_share))
if (list_empty(&mnt->mnt_share)) {
if (IS_MNT_SHARED(mnt)) {
mnt_release_group_id(mnt);
list_del_init(&mnt->mnt_share);
mnt->mnt_group_id = 0;
if (peer_mnt)
master = peer_mnt;
if (master) {
list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
slave_mnt->mnt_master = master;
list_move(&mnt->mnt_slave, &master->mnt_slave_list);
list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
} else {
CLEAR_MNT_SHARED(mnt);
}
master = mnt->mnt_master;
if (!master) {
struct list_head *p = &mnt->mnt_slave_list;
while (!list_empty(p)) {
slave_mnt = list_first_entry(p,
@ -107,9 +83,31 @@ static int do_make_slave(struct mount *mnt)
list_del_init(&slave_mnt->mnt_slave);
slave_mnt->mnt_master = NULL;
}
return 0;
}
mnt->mnt_master = master;
} else {
struct mount *m;
/*
* slave 'mnt' to a peer mount that has the
* same root dentry. If none is available then
* slave it to anything that is available.
*/
for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
master = m;
break;
}
}
list_del_init(&mnt->mnt_share);
mnt->mnt_group_id = 0;
CLEAR_MNT_SHARED(mnt);
}
list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
slave_mnt->mnt_master = master;
list_move(&mnt->mnt_slave, &master->mnt_slave_list);
list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
mnt->mnt_master = master;
return 0;
}

View File

@ -1669,6 +1669,9 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
* Check that the two inodes are eligible for cloning, the ranges make
* sense, and then flush all dirty data. Caller must ensure that the
* inodes have been locked against any other modifications.
*
* Returns: 0 for "nothing to clone", 1 for "something to clone", or
* the usual negative error code.
*/
int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
struct inode *inode_out, loff_t pos_out,
@ -1695,17 +1698,15 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
/* Are we going all the way to the end? */
isize = i_size_read(inode_in);
if (isize == 0) {
*len = 0;
if (isize == 0)
return 0;
}
/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (*len == 0) {
if (is_dedupe) {
*len = 0;
if (is_dedupe || pos_in == isize)
return 0;
}
if (pos_in > isize)
return -EINVAL;
*len = isize - pos_in;
}
@ -1769,7 +1770,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
return -EBADE;
}
return 0;
return 1;
}
EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
@ -1955,6 +1956,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
goto out;
ret = 0;
if (off + len > i_size_read(src))
return -EINVAL;
/* pre-format output fields to sane values */
for (i = 0; i < count; i++) {
same->info[i].bytes_deduped = 0ULL;

View File

@ -190,6 +190,13 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
*/
m->version = file->f_version;
/*
* if request is to read from zero offset, reset iterator to first
* record as it might have been already advanced by previous requests
*/
if (*ppos == 0)
m->index = 0;
/* Don't assume *ppos is where we left it */
if (unlikely(*ppos != m->read_pos)) {
while ((err = traverse(m, *ppos)) == -EAGAIN)

View File

@ -1191,7 +1191,7 @@ out:
return err;
}
void ufs_truncate_blocks(struct inode *inode)
static void ufs_truncate_blocks(struct inode *inode)
{
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))

View File

@ -1161,7 +1161,7 @@ xfs_reflink_remap_range(
ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
&len, is_dedupe);
if (ret || len == 0)
if (ret <= 0)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);

View File

@ -14,14 +14,9 @@ typedef int (kiocb_cancel_fn)(struct kiocb *);
/* prototypes */
#ifdef CONFIG_AIO
extern void exit_aio(struct mm_struct *mm);
extern long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user *__user *iocbpp, bool compat);
void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
#else
static inline void exit_aio(struct mm_struct *mm) { }
static inline long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user * __user *iocbpp,
bool compat) { return 0; }
static inline void kiocb_set_cancel_fn(struct kiocb *req,
kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */

View File

@ -150,6 +150,9 @@ cond_syscall(sys_io_destroy);
cond_syscall(sys_io_submit);
cond_syscall(sys_io_cancel);
cond_syscall(sys_io_getevents);
cond_syscall(compat_sys_io_setup);
cond_syscall(compat_sys_io_submit);
cond_syscall(compat_sys_io_getevents);
cond_syscall(sys_sysfs);
cond_syscall(sys_syslog);
cond_syscall(sys_process_vm_readv);

View File

@ -73,6 +73,7 @@
}
#define iterate_all_kinds(i, n, v, I, B, K) { \
if (likely(n)) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
struct bio_vec v; \
@ -87,6 +88,7 @@
struct iovec v; \
iterate_iovec(i, n, v, iov, skip, (I)) \
} \
} \
}
#define iterate_and_advance(i, n, v, I, B, K) { \
@ -576,7 +578,7 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
WARN_ON(1);
return false;
}
if (unlikely(i->count < bytes)) \
if (unlikely(i->count < bytes))
return false;
iterate_all_kinds(i, bytes, v, ({
@ -620,7 +622,7 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
WARN_ON(1);
return false;
}
if (unlikely(i->count < bytes)) \
if (unlikely(i->count < bytes))
return false;
iterate_all_kinds(i, bytes, v, ({
if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
@ -837,11 +839,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
unsigned long res = 0;
size_t size = i->count;
if (!size)
return 0;
if (unlikely(i->type & ITER_PIPE)) {
if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
return size | i->iov_offset;
return size;
}
@ -858,8 +857,6 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
{
unsigned long res = 0;
size_t size = i->count;
if (!size)
return 0;
if (unlikely(i->type & ITER_PIPE)) {
WARN_ON(1);
@ -908,6 +905,9 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
size_t capacity;
int idx;
if (!maxsize)
return 0;
if (!sanity(i))
return -EFAULT;
@ -926,9 +926,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (maxsize > i->count)
maxsize = i->count;
if (!maxsize)
return 0;
if (unlikely(i->type & ITER_PIPE))
return pipe_get_pages(i, pages, maxsize, maxpages, start);
iterate_all_kinds(i, maxsize, v, ({
@ -975,6 +972,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
int idx;
int npages;
if (!maxsize)
return 0;
if (!sanity(i))
return -EFAULT;
@ -1006,9 +1006,6 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
if (maxsize > i->count)
maxsize = i->count;
if (!maxsize)
return 0;
if (unlikely(i->type & ITER_PIPE))
return pipe_get_pages_alloc(i, pages, maxsize, start);
iterate_all_kinds(i, maxsize, v, ({