new iov_iter flavour - ITER_UBUF
Equivalent of single-segment iovec. Initialized by iov_iter_ubuf(), checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC ones. We are going to expose the things like ->write_iter() et.al. to those in subsequent commits. New predicate (user_backed_iter()) that is true for ITER_IOVEC and ITER_UBUF; places like direct-IO handling should use that for checking that pages we modify after getting them from iov_iter_get_pages() would need to be dirtied. DO NOT assume that replacing iter_is_iovec() with user_backed_iter() will solve all problems - there's code that uses iter_is_iovec() to decide how to poke around in iov_iter guts and for that the predicate replacement obviously won't suffice. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
fa9db655d0
commit
fcb14cb1bd
|
@ -75,7 +75,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
|||
|
||||
if (iov_iter_rw(iter) == READ) {
|
||||
bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
|
||||
if (iter_is_iovec(iter))
|
||||
if (user_backed_iter(iter))
|
||||
should_dirty = true;
|
||||
} else {
|
||||
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
|
||||
|
@ -204,7 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
|||
}
|
||||
|
||||
dio->size = 0;
|
||||
if (is_read && iter_is_iovec(iter))
|
||||
if (is_read && user_backed_iter(iter))
|
||||
dio->flags |= DIO_SHOULD_DIRTY;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
@ -335,7 +335,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
|||
dio->size = bio->bi_iter.bi_size;
|
||||
|
||||
if (is_read) {
|
||||
if (iter_is_iovec(iter)) {
|
||||
if (user_backed_iter(iter)) {
|
||||
dio->flags |= DIO_SHOULD_DIRTY;
|
||||
bio_set_pages_dirty(bio);
|
||||
}
|
||||
|
|
|
@ -1262,7 +1262,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
|
|||
size_t count = iov_iter_count(iter);
|
||||
loff_t pos = iocb->ki_pos;
|
||||
bool write = iov_iter_rw(iter) == WRITE;
|
||||
bool should_dirty = !write && iter_is_iovec(iter);
|
||||
bool should_dirty = !write && user_backed_iter(iter);
|
||||
|
||||
if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
|
||||
return -EROFS;
|
||||
|
|
|
@ -4004,7 +4004,7 @@ static ssize_t __cifs_readv(
|
|||
if (!is_sync_kiocb(iocb))
|
||||
ctx->iocb = iocb;
|
||||
|
||||
if (iter_is_iovec(to))
|
||||
if (user_backed_iter(to))
|
||||
ctx->should_dirty = true;
|
||||
|
||||
if (direct) {
|
||||
|
|
|
@ -1251,7 +1251,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
|
|||
spin_lock_init(&dio->bio_lock);
|
||||
dio->refcount = 1;
|
||||
|
||||
dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
|
||||
dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ;
|
||||
sdio.iter = iter;
|
||||
sdio.final_block_in_request = end >> blkbits;
|
||||
|
||||
|
|
|
@ -1356,7 +1356,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
|
|||
if (!fud)
|
||||
return -EPERM;
|
||||
|
||||
if (!iter_is_iovec(to))
|
||||
if (!user_backed_iter(to))
|
||||
return -EINVAL;
|
||||
|
||||
fuse_copy_init(&cs, 1, to);
|
||||
|
@ -1949,7 +1949,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
|
|||
if (!fud)
|
||||
return -EPERM;
|
||||
|
||||
if (!iter_is_iovec(from))
|
||||
if (!user_backed_iter(from))
|
||||
return -EINVAL;
|
||||
|
||||
fuse_copy_init(&cs, 0, from);
|
||||
|
|
|
@ -1465,7 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
|
|||
inode_unlock(inode);
|
||||
}
|
||||
|
||||
io->should_dirty = !write && iter_is_iovec(iter);
|
||||
io->should_dirty = !write && user_backed_iter(iter);
|
||||
while (count) {
|
||||
ssize_t nres;
|
||||
fl_owner_t owner = current->files;
|
||||
|
|
|
@ -780,7 +780,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
|
|||
|
||||
if (!count)
|
||||
return false;
|
||||
if (!iter_is_iovec(i))
|
||||
if (!user_backed_iter(i))
|
||||
return false;
|
||||
|
||||
size = PAGE_SIZE;
|
||||
|
|
|
@ -533,7 +533,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||
iomi.flags |= IOMAP_NOWAIT;
|
||||
}
|
||||
|
||||
if (iter_is_iovec(iter))
|
||||
if (user_backed_iter(iter))
|
||||
dio->flags |= IOMAP_DIO_DIRTY;
|
||||
} else {
|
||||
iomi.flags |= IOMAP_WRITE;
|
||||
|
|
|
@ -478,7 +478,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
|
|||
if (!is_sync_kiocb(iocb))
|
||||
dreq->iocb = iocb;
|
||||
|
||||
if (iter_is_iovec(iter))
|
||||
if (user_backed_iter(iter))
|
||||
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
|
||||
|
||||
if (!swap)
|
||||
|
|
|
@ -26,6 +26,7 @@ enum iter_type {
|
|||
ITER_PIPE,
|
||||
ITER_XARRAY,
|
||||
ITER_DISCARD,
|
||||
ITER_UBUF,
|
||||
};
|
||||
|
||||
struct iov_iter_state {
|
||||
|
@ -38,6 +39,7 @@ struct iov_iter {
|
|||
u8 iter_type;
|
||||
bool nofault;
|
||||
bool data_source;
|
||||
bool user_backed;
|
||||
size_t iov_offset;
|
||||
size_t count;
|
||||
union {
|
||||
|
@ -46,6 +48,7 @@ struct iov_iter {
|
|||
const struct bio_vec *bvec;
|
||||
struct xarray *xarray;
|
||||
struct pipe_inode_info *pipe;
|
||||
void __user *ubuf;
|
||||
};
|
||||
union {
|
||||
unsigned long nr_segs;
|
||||
|
@ -70,6 +73,11 @@ static inline void iov_iter_save_state(struct iov_iter *iter,
|
|||
state->nr_segs = iter->nr_segs;
|
||||
}
|
||||
|
||||
static inline bool iter_is_ubuf(const struct iov_iter *i)
|
||||
{
|
||||
return iov_iter_type(i) == ITER_UBUF;
|
||||
}
|
||||
|
||||
static inline bool iter_is_iovec(const struct iov_iter *i)
|
||||
{
|
||||
return iov_iter_type(i) == ITER_IOVEC;
|
||||
|
@ -105,6 +113,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
|
|||
return i->data_source ? WRITE : READ;
|
||||
}
|
||||
|
||||
static inline bool user_backed_iter(const struct iov_iter *i)
|
||||
{
|
||||
return i->user_backed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Total number of bytes covered by an iovec.
|
||||
*
|
||||
|
@ -322,4 +335,17 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
|
|||
int import_single_range(int type, void __user *buf, size_t len,
|
||||
struct iovec *iov, struct iov_iter *i);
|
||||
|
||||
static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
|
||||
void __user *buf, size_t count)
|
||||
{
|
||||
WARN_ON(direction & ~(READ | WRITE));
|
||||
*i = (struct iov_iter) {
|
||||
.iter_type = ITER_UBUF,
|
||||
.user_backed = true,
|
||||
.data_source = direction,
|
||||
.ubuf = buf,
|
||||
.count = count
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -16,6 +16,16 @@
|
|||
|
||||
#define PIPE_PARANOIA /* for now */
|
||||
|
||||
/* covers ubuf and kbuf alike */
|
||||
#define iterate_buf(i, n, base, len, off, __p, STEP) { \
|
||||
size_t __maybe_unused off = 0; \
|
||||
len = n; \
|
||||
base = __p + i->iov_offset; \
|
||||
len -= (STEP); \
|
||||
i->iov_offset += len; \
|
||||
n = len; \
|
||||
}
|
||||
|
||||
/* covers iovec and kvec alike */
|
||||
#define iterate_iovec(i, n, base, len, off, __p, STEP) { \
|
||||
size_t off = 0; \
|
||||
|
@ -110,7 +120,12 @@ __out: \
|
|||
if (unlikely(i->count < n)) \
|
||||
n = i->count; \
|
||||
if (likely(n)) { \
|
||||
if (likely(iter_is_iovec(i))) { \
|
||||
if (likely(iter_is_ubuf(i))) { \
|
||||
void __user *base; \
|
||||
size_t len; \
|
||||
iterate_buf(i, n, base, len, off, \
|
||||
i->ubuf, (I)) \
|
||||
} else if (likely(iter_is_iovec(i))) { \
|
||||
const struct iovec *iov = i->iov; \
|
||||
void __user *base; \
|
||||
size_t len; \
|
||||
|
@ -275,7 +290,11 @@ out:
|
|||
*/
|
||||
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
|
||||
{
|
||||
if (iter_is_iovec(i)) {
|
||||
if (iter_is_ubuf(i)) {
|
||||
size_t n = min(size, iov_iter_count(i));
|
||||
n -= fault_in_readable(i->ubuf + i->iov_offset, n);
|
||||
return size - n;
|
||||
} else if (iter_is_iovec(i)) {
|
||||
size_t count = min(size, iov_iter_count(i));
|
||||
const struct iovec *p;
|
||||
size_t skip;
|
||||
|
@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
|
|||
*/
|
||||
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
|
||||
{
|
||||
if (iter_is_iovec(i)) {
|
||||
if (iter_is_ubuf(i)) {
|
||||
size_t n = min(size, iov_iter_count(i));
|
||||
n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
|
||||
return size - n;
|
||||
} else if (iter_is_iovec(i)) {
|
||||
size_t count = min(size, iov_iter_count(i));
|
||||
const struct iovec *p;
|
||||
size_t skip;
|
||||
|
@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
|
|||
*i = (struct iov_iter) {
|
||||
.iter_type = ITER_IOVEC,
|
||||
.nofault = false,
|
||||
.user_backed = true,
|
||||
.data_source = direction,
|
||||
.iov = iov,
|
||||
.nr_segs = nr_segs,
|
||||
|
@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|||
{
|
||||
if (unlikely(iov_iter_is_pipe(i)))
|
||||
return copy_pipe_to_iter(addr, bytes, i);
|
||||
if (iter_is_iovec(i))
|
||||
if (user_backed_iter(i))
|
||||
might_fault();
|
||||
iterate_and_advance(i, bytes, base, len, off,
|
||||
copyout(base, addr + off, len),
|
||||
|
@ -583,7 +607,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|||
{
|
||||
if (unlikely(iov_iter_is_pipe(i)))
|
||||
return copy_mc_pipe_to_iter(addr, bytes, i);
|
||||
if (iter_is_iovec(i))
|
||||
if (user_backed_iter(i))
|
||||
might_fault();
|
||||
__iterate_and_advance(i, bytes, base, len, off,
|
||||
copyout_mc(base, addr + off, len),
|
||||
|
@ -601,7 +625,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
|||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
if (iter_is_iovec(i))
|
||||
if (user_backed_iter(i))
|
||||
might_fault();
|
||||
iterate_and_advance(i, bytes, base, len, off,
|
||||
copyin(addr + off, base, len),
|
||||
|
@ -894,16 +918,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
|
|||
{
|
||||
if (unlikely(i->count < size))
|
||||
size = i->count;
|
||||
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
|
||||
if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
|
||||
i->iov_offset += size;
|
||||
i->count -= size;
|
||||
} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
|
||||
/* iovec and kvec have identical layouts */
|
||||
iov_iter_iovec_advance(i, size);
|
||||
} else if (iov_iter_is_bvec(i)) {
|
||||
iov_iter_bvec_advance(i, size);
|
||||
} else if (iov_iter_is_pipe(i)) {
|
||||
pipe_advance(i, size);
|
||||
} else if (unlikely(iov_iter_is_xarray(i))) {
|
||||
i->iov_offset += size;
|
||||
i->count -= size;
|
||||
} else if (iov_iter_is_discard(i)) {
|
||||
i->count -= size;
|
||||
}
|
||||
|
@ -950,7 +974,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
|
|||
return;
|
||||
}
|
||||
unroll -= i->iov_offset;
|
||||
if (iov_iter_is_xarray(i)) {
|
||||
if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
|
||||
BUG(); /* We should never go beyond the start of the specified
|
||||
* range since we might then be straying into pages that
|
||||
* aren't pinned.
|
||||
|
@ -1158,6 +1182,14 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
|
|||
bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
|
||||
unsigned len_mask)
|
||||
{
|
||||
if (likely(iter_is_ubuf(i))) {
|
||||
if (i->count & len_mask)
|
||||
return false;
|
||||
if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
|
||||
return iov_iter_aligned_iovec(i, addr_mask, len_mask);
|
||||
|
||||
|
@ -1233,6 +1265,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
|
|||
|
||||
unsigned long iov_iter_alignment(const struct iov_iter *i)
|
||||
{
|
||||
if (likely(iter_is_ubuf(i))) {
|
||||
size_t size = i->count;
|
||||
if (size)
|
||||
return ((unsigned long)i->ubuf + i->iov_offset) | size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* iovec and kvec have identical layouts */
|
||||
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
|
||||
return iov_iter_alignment_iovec(i);
|
||||
|
@ -1263,6 +1302,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
|
|||
size_t size = i->count;
|
||||
unsigned k;
|
||||
|
||||
if (iter_is_ubuf(i))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(!iter_is_iovec(i)))
|
||||
return ~0U;
|
||||
|
||||
|
@ -1385,12 +1427,15 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
|
|||
return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
|
||||
}
|
||||
|
||||
/* must be done on non-empty ITER_IOVEC one */
|
||||
/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
|
||||
static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
|
||||
{
|
||||
size_t skip;
|
||||
long k;
|
||||
|
||||
if (iter_is_ubuf(i))
|
||||
return (unsigned long)i->ubuf + i->iov_offset;
|
||||
|
||||
for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
|
||||
size_t len = i->iov[k].iov_len - skip;
|
||||
|
||||
|
@ -1432,7 +1477,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
|
|||
if (maxsize > MAX_RW_COUNT)
|
||||
maxsize = MAX_RW_COUNT;
|
||||
|
||||
if (likely(iter_is_iovec(i))) {
|
||||
if (likely(user_backed_iter(i))) {
|
||||
unsigned int gup_flags = 0;
|
||||
unsigned long addr;
|
||||
|
||||
|
@ -1559,7 +1604,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
|||
if (maxsize > MAX_RW_COUNT)
|
||||
maxsize = MAX_RW_COUNT;
|
||||
|
||||
if (likely(iter_is_iovec(i))) {
|
||||
if (likely(user_backed_iter(i))) {
|
||||
unsigned int gup_flags = 0;
|
||||
unsigned long addr;
|
||||
|
||||
|
@ -1715,6 +1760,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
|
|||
{
|
||||
if (unlikely(!i->count))
|
||||
return 0;
|
||||
if (likely(iter_is_ubuf(i))) {
|
||||
unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
|
||||
int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
|
||||
return min(npages, maxpages);
|
||||
}
|
||||
/* iovec and kvec have identical layouts */
|
||||
if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
|
||||
return iov_npages(i, maxpages);
|
||||
|
@ -1749,17 +1799,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
|
|||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
|
||||
return NULL;
|
||||
if (iov_iter_is_bvec(new))
|
||||
return new->bvec = kmemdup(new->bvec,
|
||||
new->nr_segs * sizeof(struct bio_vec),
|
||||
flags);
|
||||
else
|
||||
else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
|
||||
/* iovec and kvec have identical layout */
|
||||
return new->iov = kmemdup(new->iov,
|
||||
new->nr_segs * sizeof(struct iovec),
|
||||
flags);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL(dup_iter);
|
||||
|
||||
|
@ -1953,10 +2002,12 @@ EXPORT_SYMBOL(import_single_range);
|
|||
void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
|
||||
{
|
||||
if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
|
||||
!iov_iter_is_kvec(i))
|
||||
!iov_iter_is_kvec(i) && !iter_is_ubuf(i))
|
||||
return;
|
||||
i->iov_offset = state->iov_offset;
|
||||
i->count = state->count;
|
||||
if (iter_is_ubuf(i))
|
||||
return;
|
||||
/*
|
||||
* For the *vec iters, nr_segs + iov is constant - if we increment
|
||||
* the vec, then we also decrement the nr_segs count. Hence we don't
|
||||
|
|
|
@ -2602,7 +2602,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
|||
ret = copy_page_to_iter(page, offset, nr, to);
|
||||
put_page(page);
|
||||
|
||||
} else if (iter_is_iovec(to)) {
|
||||
} else if (user_backed_iter(to)) {
|
||||
/*
|
||||
* Copy to user tends to be so well optimized, but
|
||||
* clear_user() not so much, that it is noticeably
|
||||
|
|
Loading…
Reference in New Issue