2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2013-05-12 22:14:07 +08:00
|
|
|
* loop.h
|
2005-04-17 06:20:36 +08:00
|
|
|
*
|
|
|
|
* Written by Theodore Ts'o, 3/29/93.
|
|
|
|
*
|
|
|
|
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
|
|
|
* permitted under the GNU General Public License.
|
|
|
|
*/
|
2012-10-13 17:46:48 +08:00
|
|
|
#ifndef _LINUX_LOOP_H
|
|
|
|
#define _LINUX_LOOP_H
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#include <linux/bio.h>
|
|
|
|
#include <linux/blkdev.h>
|
2015-01-03 06:20:25 +08:00
|
|
|
#include <linux/blk-mq.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/spinlock.h>
|
2006-03-23 19:00:38 +08:00
|
|
|
#include <linux/mutex.h>
|
2012-10-13 17:46:48 +08:00
|
|
|
#include <uapi/linux/loop.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Possible states of device */
|
|
|
|
enum {
|
|
|
|
Lo_unbound,
|
|
|
|
Lo_bound,
|
|
|
|
Lo_rundown,
|
2021-06-05 22:09:50 +08:00
|
|
|
Lo_deleting,
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct loop_func_table;
|
|
|
|
|
|
|
|
struct loop_device {
|
|
|
|
int lo_number;
|
2015-05-06 12:26:23 +08:00
|
|
|
atomic_t lo_refcnt;
|
2005-04-17 06:20:36 +08:00
|
|
|
loff_t lo_offset;
|
|
|
|
loff_t lo_sizelimit;
|
|
|
|
int lo_flags;
|
|
|
|
char lo_file_name[LO_NAME_SIZE];
|
|
|
|
|
|
|
|
struct file * lo_backing_file;
|
|
|
|
struct block_device *lo_device;
|
|
|
|
|
2005-10-21 15:22:34 +08:00
|
|
|
gfp_t old_gfp_mask;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
spinlock_t lo_lock;
|
|
|
|
int lo_state;
|
2021-06-29 10:38:15 +08:00
|
|
|
spinlock_t lo_work_lock;
|
|
|
|
struct workqueue_struct *workqueue;
|
|
|
|
struct work_struct rootcg_work;
|
|
|
|
struct list_head rootcg_cmd_list;
|
|
|
|
struct list_head idle_worker_list;
|
|
|
|
struct rb_root worker_tree;
|
|
|
|
struct timer_list timer;
|
2015-08-17 10:31:49 +08:00
|
|
|
bool use_dio;
|
2018-05-05 00:58:09 +08:00
|
|
|
bool sysfs_inited;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-07-24 09:44:00 +08:00
|
|
|
struct request_queue *lo_queue;
|
block: loop: improve performance via blk-mq
The conversion is a bit straightforward, and use work queue to
dispatch requests of loop block, and one big change is that requests
is submitted to backend file/device concurrently with work queue,
so throughput may get improved much. Given write requests over same
file are often run exclusively, so don't handle them concurrently for
avoiding extra context switch cost, possible lock contention and work
schedule cost. Also with blk-mq, there is opportunity to get loop I/O
merged before submitting to backend file/device.
In the following test:
- base: v3.19-rc2-2041231
- loop over file in ext4 file system on SSD disk
- bs: 4k, libaio, io depth: 64, O_DIRECT, num of jobs: 1
- throughput: IOPS
------------------------------------------------------
| | base | base with loop-mq | delta |
------------------------------------------------------
| randread | 1740 | 25318 | +1355%|
------------------------------------------------------
| read | 42196 | 51771 | +22.6%|
-----------------------------------------------------
| randwrite | 35709 | 34624 | -3% |
-----------------------------------------------------
| write | 39137 | 40326 | +3% |
-----------------------------------------------------
So loop-mq can improve throughput for both read and randread, meantime,
performance of write and randwrite isn't hurted basically.
Another benefit is that loop driver code gets simplified
much after blk-mq conversion, and the patch can be thought as
cleanup too.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-12-31 21:22:57 +08:00
|
|
|
struct blk_mq_tag_set tag_set;
|
2007-05-08 15:28:20 +08:00
|
|
|
struct gendisk *lo_disk;
|
2021-01-26 22:46:30 +08:00
|
|
|
struct mutex lo_mutex;
|
2021-09-02 08:07:35 +08:00
|
|
|
bool idr_visible;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
block: loop: improve performance via blk-mq
The conversion is a bit straightforward, and use work queue to
dispatch requests of loop block, and one big change is that requests
is submitted to backend file/device concurrently with work queue,
so throughput may get improved much. Given write requests over same
file are often run exclusively, so don't handle them concurrently for
avoiding extra context switch cost, possible lock contention and work
schedule cost. Also with blk-mq, there is opportunity to get loop I/O
merged before submitting to backend file/device.
In the following test:
- base: v3.19-rc2-2041231
- loop over file in ext4 file system on SSD disk
- bs: 4k, libaio, io depth: 64, O_DIRECT, num of jobs: 1
- throughput: IOPS
------------------------------------------------------
| | base | base with loop-mq | delta |
------------------------------------------------------
| randread | 1740 | 25318 | +1355%|
------------------------------------------------------
| read | 42196 | 51771 | +22.6%|
-----------------------------------------------------
| randwrite | 35709 | 34624 | -3% |
-----------------------------------------------------
| write | 39137 | 40326 | +3% |
-----------------------------------------------------
So loop-mq can improve throughput for both read and randread, meantime,
performance of write and randwrite isn't hurted basically.
Another benefit is that loop driver code gets simplified
much after blk-mq conversion, and the patch can be thought as
cleanup too.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-12-31 21:22:57 +08:00
|
|
|
struct loop_cmd {
|
2021-06-29 10:38:15 +08:00
|
|
|
struct list_head list_entry;
|
2017-09-21 05:24:34 +08:00
|
|
|
bool use_aio; /* use AIO interface to handle I/O */
|
|
|
|
atomic_t ref; /* only for aio */
|
2017-04-20 22:03:02 +08:00
|
|
|
long ret;
|
block: loop: support DIO & AIO
There are at least 3 advantages to use direct I/O and AIO on
read/write loop's backing file:
1) double cache can be avoided, then memory usage gets
decreased a lot
2) not like user space direct I/O, there isn't cost of
pinning pages
3) avoid context switch for obtaining good throughput
- in buffered file read, random I/O top throughput is often obtained
only if they are submitted concurrently from lots of tasks; but for
sequential I/O, most of times they can be hit from page cache, so
concurrent submissions often introduce unnecessary context switch
and can't improve throughput much. There was such discussion[1]
to use non-blocking I/O to improve the problem for application.
- with direct I/O and AIO, concurrent submissions can be
avoided and random read throughput can't be affected meantime
xfstests(-g auto, ext4) is basically passed when running with
direct I/O(aio), one exception is generic/232, but it failed in
loop buffered I/O(4.2-rc6-next-20150814) too.
Follows the fio test result for performance purpose:
4 jobs fio test inside ext4 file system over loop block
1) How to run
- KVM: 4 VCPUs, 2G RAM
- linux kernel: 4.2-rc6-next-20150814(base) with the patchset
- the loop block is over one image on SSD.
- linux psync, 4 jobs, size 1500M, ext4 over loop block
- test result: IOPS from fio output
2) Throughput(IOPS) becomes a bit better with direct I/O(aio)
-------------------------------------------------------------
test cases |randread |read |randwrite |write |
-------------------------------------------------------------
base |8015 |113811 |67442 |106978
-------------------------------------------------------------
base+loop aio |8136 |125040 |67811 |111376
-------------------------------------------------------------
- somehow, it should be caused by more page cache avaiable for
application or one extra page copy is avoided in case of direct I/O
3) context switch
- context switch decreased by ~50% with loop direct I/O(aio)
compared with loop buffered I/O(4.2-rc6-next-20150814)
4) memory usage from /proc/meminfo
-------------------------------------------------------------
| Buffers | Cached
-------------------------------------------------------------
base | > 760MB | ~950MB
-------------------------------------------------------------
base+loop direct I/O(aio) | < 5MB | ~1.6GB
-------------------------------------------------------------
- so there are much more page caches available for application with
direct I/O
[1] https://lwn.net/Articles/612483/
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
2015-08-17 10:31:51 +08:00
|
|
|
struct kiocb iocb;
|
2017-09-01 13:09:46 +08:00
|
|
|
struct bio_vec *bvec;
|
2021-06-29 10:38:21 +08:00
|
|
|
struct cgroup_subsys_state *blkcg_css;
|
|
|
|
struct cgroup_subsys_state *memcg_css;
|
block: loop: improve performance via blk-mq
The conversion is a bit straightforward, and use work queue to
dispatch requests of loop block, and one big change is that requests
is submitted to backend file/device concurrently with work queue,
so throughput may get improved much. Given write requests over same
file are often run exclusively, so don't handle them concurrently for
avoiding extra context switch cost, possible lock contention and work
schedule cost. Also with blk-mq, there is opportunity to get loop I/O
merged before submitting to backend file/device.
In the following test:
- base: v3.19-rc2-2041231
- loop over file in ext4 file system on SSD disk
- bs: 4k, libaio, io depth: 64, O_DIRECT, num of jobs: 1
- throughput: IOPS
------------------------------------------------------
| | base | base with loop-mq | delta |
------------------------------------------------------
| randread | 1740 | 25318 | +1355%|
------------------------------------------------------
| read | 42196 | 51771 | +22.6%|
-----------------------------------------------------
| randwrite | 35709 | 34624 | -3% |
-----------------------------------------------------
| write | 39137 | 40326 | +3% |
-----------------------------------------------------
So loop-mq can improve throughput for both read and randread, meantime,
performance of write and randwrite isn't hurted basically.
Another benefit is that loop driver code gets simplified
much after blk-mq conversion, and the patch can be thought as
cleanup too.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
2014-12-31 21:22:57 +08:00
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|