ublk: add read()/write() support for ublk char device
Support pread()/pwrite() on ublk char device for reading/writing request io buffer, so data copy between io request buffer and userspace buffer can be moved to ublk server from ublk driver. Then UBLK_F_NEED_GET_DATA becomes not necessary, so ublk server can allocate buffer without one extra round uring command communication for userspace to provide buffer. IO buffer can be located by iocb->ki_pos which encodes buffer offset, io tag and queue id info, and type of iocb->ki_pos is u64, so it is big enough for holding reasonable queue depth, nr_queues and max io buffer size. Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20230519065030.351216-7-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
38f2dd3441
commit
62fe99cef9
|
@ -207,6 +207,23 @@ static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
|
||||||
|
|
||||||
static struct miscdevice ublk_misc;
|
static struct miscdevice ublk_misc;
|
||||||
|
|
||||||
|
static inline unsigned ublk_pos_to_hwq(loff_t pos)
|
||||||
|
{
|
||||||
|
return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_QID_OFF) &
|
||||||
|
UBLK_QID_BITS_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned ublk_pos_to_buf_off(loff_t pos)
|
||||||
|
{
|
||||||
|
return (pos - UBLKSRV_IO_BUF_OFFSET) & UBLK_IO_BUF_BITS_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned ublk_pos_to_tag(loff_t pos)
|
||||||
|
{
|
||||||
|
return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_TAG_OFF) &
|
||||||
|
UBLK_TAG_BITS_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||||
{
|
{
|
||||||
struct request_queue *q = ub->ub_disk->queue;
|
struct request_queue *q = ub->ub_disk->queue;
|
||||||
|
@ -1429,6 +1446,36 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||||
return -EIOCBQUEUED;
|
return -EIOCBQUEUED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||||
|
struct ublk_queue *ubq, int tag, size_t offset)
|
||||||
|
{
|
||||||
|
struct request *req;
|
||||||
|
|
||||||
|
if (!ublk_need_req_ref(ubq))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
|
||||||
|
if (!req)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!ublk_get_req_ref(ubq, req))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
|
||||||
|
goto fail_put;
|
||||||
|
|
||||||
|
if (!ublk_rq_has_data(req))
|
||||||
|
goto fail_put;
|
||||||
|
|
||||||
|
if (offset > blk_rq_bytes(req))
|
||||||
|
goto fail_put;
|
||||||
|
|
||||||
|
return req;
|
||||||
|
fail_put:
|
||||||
|
ublk_put_req_ref(ubq, req);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -1446,11 +1493,112 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||||
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
|
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ublk_check_ubuf_dir(const struct request *req,
|
||||||
|
int ubuf_dir)
|
||||||
|
{
|
||||||
|
/* copy ubuf to request pages */
|
||||||
|
if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* copy request pages to ubuf */
|
||||||
|
if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct request *ublk_check_and_get_req(struct kiocb *iocb,
|
||||||
|
struct iov_iter *iter, size_t *off, int dir)
|
||||||
|
{
|
||||||
|
struct ublk_device *ub = iocb->ki_filp->private_data;
|
||||||
|
struct ublk_queue *ubq;
|
||||||
|
struct request *req;
|
||||||
|
size_t buf_off;
|
||||||
|
u16 tag, q_id;
|
||||||
|
|
||||||
|
if (!ub)
|
||||||
|
return ERR_PTR(-EACCES);
|
||||||
|
|
||||||
|
if (!user_backed_iter(iter))
|
||||||
|
return ERR_PTR(-EACCES);
|
||||||
|
|
||||||
|
if (ub->dev_info.state == UBLK_S_DEV_DEAD)
|
||||||
|
return ERR_PTR(-EACCES);
|
||||||
|
|
||||||
|
tag = ublk_pos_to_tag(iocb->ki_pos);
|
||||||
|
q_id = ublk_pos_to_hwq(iocb->ki_pos);
|
||||||
|
buf_off = ublk_pos_to_buf_off(iocb->ki_pos);
|
||||||
|
|
||||||
|
if (q_id >= ub->dev_info.nr_hw_queues)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
ubq = ublk_get_queue(ub, q_id);
|
||||||
|
if (!ubq)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
if (tag >= ubq->q_depth)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
req = __ublk_check_and_get_req(ub, ubq, tag, buf_off);
|
||||||
|
if (!req)
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
if (!req->mq_hctx || !req->mq_hctx->driver_data)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
if (!ublk_check_ubuf_dir(req, dir))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
*off = buf_off;
|
||||||
|
return req;
|
||||||
|
fail:
|
||||||
|
ublk_put_req_ref(ubq, req);
|
||||||
|
return ERR_PTR(-EACCES);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||||
|
{
|
||||||
|
struct ublk_queue *ubq;
|
||||||
|
struct request *req;
|
||||||
|
size_t buf_off;
|
||||||
|
size_t ret;
|
||||||
|
|
||||||
|
req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST);
|
||||||
|
if (IS_ERR(req))
|
||||||
|
return PTR_ERR(req);
|
||||||
|
|
||||||
|
ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST);
|
||||||
|
ubq = req->mq_hctx->driver_data;
|
||||||
|
ublk_put_req_ref(ubq, req);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
|
{
|
||||||
|
struct ublk_queue *ubq;
|
||||||
|
struct request *req;
|
||||||
|
size_t buf_off;
|
||||||
|
size_t ret;
|
||||||
|
|
||||||
|
req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE);
|
||||||
|
if (IS_ERR(req))
|
||||||
|
return PTR_ERR(req);
|
||||||
|
|
||||||
|
ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE);
|
||||||
|
ubq = req->mq_hctx->driver_data;
|
||||||
|
ublk_put_req_ref(ubq, req);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct file_operations ublk_ch_fops = {
|
static const struct file_operations ublk_ch_fops = {
|
||||||
.owner = THIS_MODULE,
|
.owner = THIS_MODULE,
|
||||||
.open = ublk_ch_open,
|
.open = ublk_ch_open,
|
||||||
.release = ublk_ch_release,
|
.release = ublk_ch_release,
|
||||||
.llseek = no_llseek,
|
.llseek = no_llseek,
|
||||||
|
.read_iter = ublk_ch_read_iter,
|
||||||
|
.write_iter = ublk_ch_write_iter,
|
||||||
.uring_cmd = ublk_ch_uring_cmd,
|
.uring_cmd = ublk_ch_uring_cmd,
|
||||||
.mmap = ublk_ch_mmap,
|
.mmap = ublk_ch_mmap,
|
||||||
};
|
};
|
||||||
|
@ -2362,6 +2510,9 @@ static int __init ublk_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
|
||||||
|
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
|
||||||
|
|
||||||
init_waitqueue_head(&ublk_idr_wq);
|
init_waitqueue_head(&ublk_idr_wq);
|
||||||
|
|
||||||
ret = misc_register(&ublk_misc);
|
ret = misc_register(&ublk_misc);
|
||||||
|
|
|
@ -93,9 +93,29 @@
|
||||||
#define UBLKSRV_CMD_BUF_OFFSET 0
|
#define UBLKSRV_CMD_BUF_OFFSET 0
|
||||||
#define UBLKSRV_IO_BUF_OFFSET 0x80000000
|
#define UBLKSRV_IO_BUF_OFFSET 0x80000000
|
||||||
|
|
||||||
/* tag bit is 12bit, so at most 4096 IOs for each queue */
|
/* tag bit is 16bit, so far limit at most 4096 IOs for each queue */
|
||||||
#define UBLK_MAX_QUEUE_DEPTH 4096
|
#define UBLK_MAX_QUEUE_DEPTH 4096
|
||||||
|
|
||||||
|
/* single IO buffer max size is 32MB */
|
||||||
|
#define UBLK_IO_BUF_OFF 0
|
||||||
|
#define UBLK_IO_BUF_BITS 25
|
||||||
|
#define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1)
|
||||||
|
|
||||||
|
/* so at most 64K IOs for each queue */
|
||||||
|
#define UBLK_TAG_OFF UBLK_IO_BUF_BITS
|
||||||
|
#define UBLK_TAG_BITS 16
|
||||||
|
#define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1)
|
||||||
|
|
||||||
|
/* max 4096 queues */
|
||||||
|
#define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS)
|
||||||
|
#define UBLK_QID_BITS 12
|
||||||
|
#define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1)
|
||||||
|
|
||||||
|
#define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS)
|
||||||
|
|
||||||
|
#define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS)
|
||||||
|
#define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* zero copy requires 4k block size, and can remap ublk driver's io
|
* zero copy requires 4k block size, and can remap ublk driver's io
|
||||||
* request into ublksrv's vm space
|
* request into ublksrv's vm space
|
||||||
|
|
Loading…
Reference in New Issue