io_uring: modularize io_sqe_buffer_register
Split io_sqe_buffer_register into two routines: - io_sqe_buffer_register() registers a single buffer - io_sqe_buffers_register iterates over all user specified buffers Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
3a81fd0204
commit
0a96bbe499
162
fs/io_uring.c
162
fs/io_uring.c
|
@ -8370,7 +8370,7 @@ static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
|
||||||
return pages;
|
return pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
|
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
|
@ -8488,85 +8488,45 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
|
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
||||||
unsigned nr_args)
|
struct io_mapped_ubuf *imu,
|
||||||
|
struct page **last_hpage)
|
||||||
{
|
{
|
||||||
struct vm_area_struct **vmas = NULL;
|
struct vm_area_struct **vmas = NULL;
|
||||||
struct page **pages = NULL;
|
struct page **pages = NULL;
|
||||||
struct page *last_hpage = NULL;
|
|
||||||
int i, j, got_pages = 0;
|
|
||||||
int ret = -EINVAL;
|
|
||||||
|
|
||||||
if (ctx->user_bufs)
|
|
||||||
return -EBUSY;
|
|
||||||
if (!nr_args || nr_args > UIO_MAXIOV)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!ctx->user_bufs)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
for (i = 0; i < nr_args; i++) {
|
|
||||||
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
|
|
||||||
unsigned long off, start, end, ubuf;
|
unsigned long off, start, end, ubuf;
|
||||||
int pret, nr_pages;
|
|
||||||
struct iovec iov;
|
|
||||||
size_t size;
|
size_t size;
|
||||||
|
int ret, pret, nr_pages, i;
|
||||||
|
|
||||||
ret = io_copy_iov(ctx, &iov, arg, i);
|
ubuf = (unsigned long) iov->iov_base;
|
||||||
if (ret)
|
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||||
goto err;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't impose further limits on the size and buffer
|
|
||||||
* constraints here, we'll -EINVAL later when IO is
|
|
||||||
* submitted if they are wrong.
|
|
||||||
*/
|
|
||||||
ret = -EFAULT;
|
|
||||||
if (!iov.iov_base || !iov.iov_len)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
/* arbitrary limit, but we need something */
|
|
||||||
if (iov.iov_len > SZ_1G)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
ubuf = (unsigned long) iov.iov_base;
|
|
||||||
end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
||||||
start = ubuf >> PAGE_SHIFT;
|
start = ubuf >> PAGE_SHIFT;
|
||||||
nr_pages = end - start;
|
nr_pages = end - start;
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
if (!pages || nr_pages > got_pages) {
|
|
||||||
kvfree(vmas);
|
|
||||||
kvfree(pages);
|
|
||||||
pages = kvmalloc_array(nr_pages, sizeof(struct page *),
|
|
||||||
GFP_KERNEL);
|
|
||||||
vmas = kvmalloc_array(nr_pages,
|
|
||||||
sizeof(struct vm_area_struct *),
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!pages || !vmas) {
|
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto err;
|
|
||||||
}
|
pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
|
||||||
got_pages = nr_pages;
|
if (!pages)
|
||||||
}
|
goto done;
|
||||||
|
|
||||||
|
vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!vmas)
|
||||||
|
goto done;
|
||||||
|
|
||||||
imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
|
imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
ret = -ENOMEM;
|
|
||||||
if (!imu->bvec)
|
if (!imu->bvec)
|
||||||
goto err;
|
goto done;
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
pret = pin_user_pages(ubuf, nr_pages,
|
pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
|
||||||
FOLL_WRITE | FOLL_LONGTERM,
|
|
||||||
pages, vmas);
|
pages, vmas);
|
||||||
if (pret == nr_pages) {
|
if (pret == nr_pages) {
|
||||||
/* don't support file backed memory */
|
/* don't support file backed memory */
|
||||||
for (j = 0; j < nr_pages; j++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
struct vm_area_struct *vma = vmas[j];
|
struct vm_area_struct *vma = vmas[i];
|
||||||
|
|
||||||
if (vma->vm_file &&
|
if (vma->vm_file &&
|
||||||
!is_file_hugepages(vma->vm_file)) {
|
!is_file_hugepages(vma->vm_file)) {
|
||||||
|
@ -8586,42 +8546,86 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||||
if (pret > 0)
|
if (pret > 0)
|
||||||
unpin_user_pages(pages, pret);
|
unpin_user_pages(pages, pret);
|
||||||
kvfree(imu->bvec);
|
kvfree(imu->bvec);
|
||||||
goto err;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = io_buffer_account_pin(ctx, pages, pret, imu, &last_hpage);
|
ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
unpin_user_pages(pages, pret);
|
unpin_user_pages(pages, pret);
|
||||||
kvfree(imu->bvec);
|
kvfree(imu->bvec);
|
||||||
goto err;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
off = ubuf & ~PAGE_MASK;
|
off = ubuf & ~PAGE_MASK;
|
||||||
size = iov.iov_len;
|
size = iov->iov_len;
|
||||||
for (j = 0; j < nr_pages; j++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
size_t vec_len;
|
size_t vec_len;
|
||||||
|
|
||||||
vec_len = min_t(size_t, size, PAGE_SIZE - off);
|
vec_len = min_t(size_t, size, PAGE_SIZE - off);
|
||||||
imu->bvec[j].bv_page = pages[j];
|
imu->bvec[i].bv_page = pages[i];
|
||||||
imu->bvec[j].bv_len = vec_len;
|
imu->bvec[i].bv_len = vec_len;
|
||||||
imu->bvec[j].bv_offset = off;
|
imu->bvec[i].bv_offset = off;
|
||||||
off = 0;
|
off = 0;
|
||||||
size -= vec_len;
|
size -= vec_len;
|
||||||
}
|
}
|
||||||
/* store original address for later verification */
|
/* store original address for later verification */
|
||||||
imu->ubuf = ubuf;
|
imu->ubuf = ubuf;
|
||||||
imu->len = iov.iov_len;
|
imu->len = iov->iov_len;
|
||||||
imu->nr_bvecs = nr_pages;
|
imu->nr_bvecs = nr_pages;
|
||||||
|
ret = 0;
|
||||||
|
done:
|
||||||
|
kvfree(pages);
|
||||||
|
kvfree(vmas);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||||
|
unsigned int nr_args)
|
||||||
|
{
|
||||||
|
int i, ret;
|
||||||
|
struct iovec iov;
|
||||||
|
struct page *last_hpage = NULL;
|
||||||
|
|
||||||
|
if (ctx->user_bufs)
|
||||||
|
return -EBUSY;
|
||||||
|
if (!nr_args || nr_args > UIO_MAXIOV)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!ctx->user_bufs)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
for (i = 0; i < nr_args; i++) {
|
||||||
|
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
|
||||||
|
|
||||||
|
ret = io_copy_iov(ctx, &iov, arg, i);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't impose further limits on the size and buffer
|
||||||
|
* constraints here, we'll -EINVAL later when IO is
|
||||||
|
* submitted if they are wrong.
|
||||||
|
*/
|
||||||
|
ret = -EFAULT;
|
||||||
|
if (!iov.iov_base || !iov.iov_len)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* arbitrary limit, but we need something */
|
||||||
|
if (iov.iov_len > SZ_1G)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = io_sqe_buffer_register(ctx, &iov, imu, &last_hpage);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
|
||||||
ctx->nr_user_bufs++;
|
ctx->nr_user_bufs++;
|
||||||
}
|
}
|
||||||
kvfree(pages);
|
|
||||||
kvfree(vmas);
|
if (ret)
|
||||||
return 0;
|
io_sqe_buffers_unregister(ctx);
|
||||||
err:
|
|
||||||
kvfree(pages);
|
|
||||||
kvfree(vmas);
|
|
||||||
io_sqe_buffer_unregister(ctx);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8675,7 +8679,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
|
||||||
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||||
{
|
{
|
||||||
io_finish_async(ctx);
|
io_finish_async(ctx);
|
||||||
io_sqe_buffer_unregister(ctx);
|
io_sqe_buffers_unregister(ctx);
|
||||||
|
|
||||||
if (ctx->sqo_task) {
|
if (ctx->sqo_task) {
|
||||||
put_task_struct(ctx->sqo_task);
|
put_task_struct(ctx->sqo_task);
|
||||||
|
@ -10057,13 +10061,13 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
||||||
|
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case IORING_REGISTER_BUFFERS:
|
case IORING_REGISTER_BUFFERS:
|
||||||
ret = io_sqe_buffer_register(ctx, arg, nr_args);
|
ret = io_sqe_buffers_register(ctx, arg, nr_args);
|
||||||
break;
|
break;
|
||||||
case IORING_UNREGISTER_BUFFERS:
|
case IORING_UNREGISTER_BUFFERS:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (arg || nr_args)
|
if (arg || nr_args)
|
||||||
break;
|
break;
|
||||||
ret = io_sqe_buffer_unregister(ctx);
|
ret = io_sqe_buffers_unregister(ctx);
|
||||||
break;
|
break;
|
||||||
case IORING_REGISTER_FILES:
|
case IORING_REGISTER_FILES:
|
||||||
ret = io_sqe_files_register(ctx, arg, nr_args);
|
ret = io_sqe_files_register(ctx, arg, nr_args);
|
||||||
|
|
Loading…
Reference in New Issue