diff --git a/block/bio.c b/block/bio.c index 265bff6b549a..1319dd2aa0b8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -25,6 +25,11 @@ #include "blk.h" #include "blk-rq-qos.h" +struct bio_alloc_cache { + struct bio_list free_list; + unsigned int nr; +}; + static struct biovec_slab { int nr_vecs; char *name; @@ -246,12 +251,40 @@ static void bio_free(struct bio *bio) void bio_init(struct bio *bio, struct bio_vec *table, unsigned short max_vecs) { - memset(bio, 0, sizeof(*bio)); + bio->bi_next = NULL; + bio->bi_bdev = NULL; + bio->bi_opf = 0; + bio->bi_flags = 0; + bio->bi_ioprio = 0; + bio->bi_write_hint = 0; + bio->bi_status = 0; + bio->bi_iter.bi_sector = 0; + bio->bi_iter.bi_size = 0; + bio->bi_iter.bi_idx = 0; + bio->bi_iter.bi_bvec_done = 0; + bio->bi_end_io = NULL; + bio->bi_private = NULL; +#ifdef CONFIG_BLK_CGROUP + bio->bi_blkg = NULL; + bio->bi_issue.value = 0; +#ifdef CONFIG_BLK_CGROUP_IOCOST + bio->bi_iocost_cost = 0; +#endif +#endif +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + bio->bi_crypt_context = NULL; +#endif +#ifdef CONFIG_BLK_DEV_INTEGRITY + bio->bi_integrity = NULL; +#endif + bio->bi_vcnt = 0; + atomic_set(&bio->__bi_remaining, 1); atomic_set(&bio->__bi_cnt, 1); - bio->bi_io_vec = table; bio->bi_max_vecs = max_vecs; + bio->bi_io_vec = table; + bio->bi_pool = NULL; } EXPORT_SYMBOL(bio_init); @@ -586,6 +619,53 @@ void guard_bio_eod(struct bio *bio) bio_truncate(bio, maxsector << 9); } +#define ALLOC_CACHE_MAX 512 +#define ALLOC_CACHE_SLACK 64 + +static void bio_alloc_cache_prune(struct bio_alloc_cache *cache, + unsigned int nr) +{ + unsigned int i = 0; + struct bio *bio; + + while ((bio = bio_list_pop(&cache->free_list)) != NULL) { + cache->nr--; + bio_free(bio); + if (++i == nr) + break; + } +} + +static int bio_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct bio_set *bs; + + bs = hlist_entry_safe(node, struct bio_set, cpuhp_dead); + if (bs->cache) { + struct bio_alloc_cache *cache = per_cpu_ptr(bs->cache, cpu); + + bio_alloc_cache_prune(cache, -1U); + } + return 0; +} + +static void bio_alloc_cache_destroy(struct bio_set *bs) +{ + int cpu; + + if (!bs->cache) + return; + + cpuhp_state_remove_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead); + for_each_possible_cpu(cpu) { + struct bio_alloc_cache *cache; + + cache = per_cpu_ptr(bs->cache, cpu); + bio_alloc_cache_prune(cache, -1U); + } + free_percpu(bs->cache); +} + /** * bio_put - release a reference to a bio * @bio: bio to release reference to @@ -596,16 +676,23 @@ void guard_bio_eod(struct bio *bio) **/ void bio_put(struct bio *bio) { - if (!bio_flagged(bio, BIO_REFFED)) - bio_free(bio); - else { + if (unlikely(bio_flagged(bio, BIO_REFFED))) { BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); + if (!atomic_dec_and_test(&bio->__bi_cnt)) + return; + } - /* - * last put frees it - */ - if (atomic_dec_and_test(&bio->__bi_cnt)) - bio_free(bio); + if (bio_flagged(bio, BIO_PERCPU_CACHE)) { + struct bio_alloc_cache *cache; + + bio_uninit(bio); + cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu()); + bio_list_add_head(&cache->free_list, bio); + if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK) + bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK); + put_cpu(); + } else { + bio_free(bio); } } EXPORT_SYMBOL(bio_put); @@ -1496,6 +1583,7 @@ int biovec_init_pool(mempool_t *pool, int pool_entries) */ void bioset_exit(struct bio_set *bs) { + bio_alloc_cache_destroy(bs); if (bs->rescue_workqueue) destroy_workqueue(bs->rescue_workqueue); bs->rescue_workqueue = NULL; @@ -1557,12 +1645,18 @@ int bioset_init(struct bio_set *bs, biovec_init_pool(&bs->bvec_pool, pool_size)) goto bad; - if (!(flags & BIOSET_NEED_RESCUER)) - return 0; - - bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); - if (!bs->rescue_workqueue) - goto bad; + if (flags & BIOSET_NEED_RESCUER) { + bs->rescue_workqueue = alloc_workqueue("bioset", + WQ_MEM_RECLAIM, 0); + if (!bs->rescue_workqueue) + goto bad; + } + if (flags & BIOSET_PERCPU_CACHE) { + bs->cache = alloc_percpu(struct bio_alloc_cache); + if (!bs->cache) + goto bad; + cpuhp_state_add_instance_nocalls(CPUHP_BIO_DEAD, &bs->cpuhp_dead); + } return 0; bad: @@ -1589,6 +1683,46 @@ int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) } EXPORT_SYMBOL(bioset_init_from_src); +/** + * bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb + * @kiocb: kiocb describing the IO + * @nr_iovecs: number of iovecs to pre-allocate + * @bs: bio_set to allocate from + * + * Description: + * Like @bio_alloc_bioset, but pass in the kiocb. The kiocb is only + * used to check if we should dip into the per-cpu bio_set allocation + * cache. The allocation uses GFP_KERNEL internally. On return, the + * bio is marked BIO_PERCPU_CACHEABLE, and the final put of the bio + * MUST be done from process context, not hard/soft IRQ. + * + */ +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, + struct bio_set *bs) +{ + struct bio_alloc_cache *cache; + struct bio *bio; + + if (!(kiocb->ki_flags & IOCB_ALLOC_CACHE) || nr_vecs > BIO_INLINE_VECS) + return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs); + + cache = per_cpu_ptr(bs->cache, get_cpu()); + bio = bio_list_pop(&cache->free_list); + if (bio) { + cache->nr--; + put_cpu(); + bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs); + bio->bi_pool = bs; + bio_set_flag(bio, BIO_PERCPU_CACHE); + return bio; + } + put_cpu(); + bio = bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs); + bio_set_flag(bio, BIO_PERCPU_CACHE); + return bio; +} +EXPORT_SYMBOL_GPL(bio_alloc_kiocb); + static int __init init_bio(void) { int i; @@ -1603,6 +1737,9 @@ static int __init init_bio(void) SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); } + cpuhp_setup_state_multi(CPUHP_BIO_DEAD, "block/bio:dead", NULL, + bio_cpu_dead); + if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) panic("bio: can't allocate bios\n"); diff --git a/block/blk-core.c b/block/blk-core.c index b5098739f72a..5454db2fa263 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -821,7 +821,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) } if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) - bio->bi_opf &= ~REQ_HIPRI; + bio_clear_hipri(bio); switch (bio_op(bio)) { case REQ_OP_DISCARD: diff --git a/block/blk-merge.c b/block/blk-merge.c index eeba8422ae82..7a5c81c02c80 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -285,7 +285,7 @@ split: * iopoll in direct IO routine. Given performance gain of iopoll for * big IO can be trival, disable iopoll when split needed. */ - bio->bi_opf &= ~REQ_HIPRI; + bio_clear_hipri(bio); return bio_split(bio, sectors, GFP_NOIO, bs); } diff --git a/block/blk.h b/block/blk.h index 346d184c82b0..8c96b0c90c48 100644 --- a/block/blk.h +++ b/block/blk.h @@ -366,4 +366,11 @@ extern struct device_attribute dev_attr_events; extern struct device_attribute dev_attr_events_async; extern struct device_attribute dev_attr_events_poll_msecs; +static inline void bio_clear_hipri(struct bio *bio) +{ + /* can't support alloc cache if we turn off polling */ + bio_clear_flag(bio, BIO_PERCPU_CACHE); + bio->bi_opf &= ~REQ_HIPRI; +} + #endif /* BLK_INTERNAL_H */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 1f21ac984253..45df6cbccf12 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -386,7 +386,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); @@ -514,7 +514,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static __init int blkdev_init(void) { - return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS); + return bioset_init(&blkdev_dio_pool, 4, + offsetof(struct blkdev_dio, bio), + BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); } module_init(blkdev_init); diff --git a/fs/io_uring.c b/fs/io_uring.c index 7cc458e0b636..73928d957691 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2835,7 +2835,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) !kiocb->ki_filp->f_op->iopoll) return -EOPNOTSUPP; - kiocb->ki_flags |= IOCB_HIPRI; + kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; } else { diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b5f65a81f2b..3d67d0fbc868 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -400,6 +400,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, enum { BIOSET_NEED_BVECS = BIT(0), BIOSET_NEED_RESCUER = BIT(1), + BIOSET_PERCPU_CACHE = BIT(2), }; extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); @@ -408,6 +409,8 @@ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(gfp_t gfp, unsigned short nr_iovecs, struct bio_set *bs); +struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs, + struct bio_set *bs); struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned short nr_iovecs); extern void bio_put(struct bio *); @@ -657,6 +660,11 @@ struct bio_set { struct kmem_cache *bio_slab; unsigned int front_pad; + /* + * per-cpu bio alloc cache + */ + struct bio_alloc_cache __percpu *cache; + mempool_t bio_pool; mempool_t bvec_pool; #if defined(CONFIG_BLK_DEV_INTEGRITY) @@ -673,6 +681,11 @@ struct bio_set { struct bio_list rescue_list; struct work_struct rescue_work; struct workqueue_struct *rescue_workqueue; + + /* + * Hot un-plug notifier for the per-cpu cache, if used + */ + struct hlist_node cpuhp_dead; }; static inline bool bioset_initialized(struct bio_set *bs) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1335efa8a1db..9e392daa1d7f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -297,6 +297,7 @@ enum { BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */ BIO_FLAG_LAST }; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6ac543d33b66..95f88edc8f09 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -46,6 +46,7 @@ enum cpuhp_state { CPUHP_ARM_OMAP_WAKE_DEAD, CPUHP_IRQ_POLL_DEAD, CPUHP_BLOCK_SOFTIRQ_DEAD, + CPUHP_BIO_DEAD, CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, diff --git a/include/linux/fs.h b/include/linux/fs.h index 7eae53fff7c5..ae29027d2ac8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -319,6 +319,8 @@ enum rw_hint { /* iocb->ki_waitq is valid */ #define IOCB_WAITQ (1 << 19) #define IOCB_NOIO (1 << 20) +/* can use bio alloc cache */ +#define IOCB_ALLOC_CACHE (1 << 21) struct kiocb { struct file *ki_filp;