blk-mq: improve layout of blk_mq_hw_ctx
Various cache line optimizations: - Move delay_work towards the end. It's huge, and we don't use it a lot (only SCSI). - Move the atomic state into the same cacheline as the the dispatch list and lock. - Rearrange a few members to pack it better. - Shrink the max-order for dispatch accounting from 10 to 7. This means that ->dispatched[] and ->run now take up their own cacheline. This shrinks struct blk_mq_hw_ctx down to 8 cachelines. Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
27489a3c82
commit
8d354f133e
|
@ -22,11 +22,10 @@ struct blk_mq_hw_ctx {
|
|||
struct {
|
||||
spinlock_t lock;
|
||||
struct list_head dispatch;
|
||||
unsigned long state; /* BLK_MQ_S_* flags */
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
unsigned long state; /* BLK_MQ_S_* flags */
|
||||
struct work_struct run_work;
|
||||
struct delayed_work delay_work;
|
||||
cpumask_var_t cpumask;
|
||||
int next_cpu;
|
||||
int next_cpu_batch;
|
||||
|
@ -40,8 +39,8 @@ struct blk_mq_hw_ctx {
|
|||
|
||||
struct blk_mq_ctxmap ctx_map;
|
||||
|
||||
unsigned int nr_ctx;
|
||||
struct blk_mq_ctx **ctxs;
|
||||
unsigned int nr_ctx;
|
||||
|
||||
atomic_t wait_index;
|
||||
|
||||
|
@ -49,7 +48,7 @@ struct blk_mq_hw_ctx {
|
|||
|
||||
unsigned long queued;
|
||||
unsigned long run;
|
||||
#define BLK_MQ_MAX_DISPATCH_ORDER 10
|
||||
#define BLK_MQ_MAX_DISPATCH_ORDER 7
|
||||
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
|
||||
|
||||
unsigned int numa_node;
|
||||
|
@ -57,6 +56,8 @@ struct blk_mq_hw_ctx {
|
|||
|
||||
atomic_t nr_active;
|
||||
|
||||
struct delayed_work delay_work;
|
||||
|
||||
struct blk_mq_cpu_notifier cpu_notifier;
|
||||
struct kobject kobj;
|
||||
|
||||
|
|
Loading…
Reference in New Issue