2019-02-18 16:36:08 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2011-01-21 01:50:14 +08:00
|
|
|
/*
|
|
|
|
* NVM Express device driver
|
2014-03-24 22:11:22 +08:00
|
|
|
* Copyright (c) 2011-2014, Intel Corporation.
|
2011-01-21 01:50:14 +08:00
|
|
|
*/
|
|
|
|
|
2020-07-10 02:43:33 +08:00
|
|
|
#include <linux/acpi.h>
|
2015-12-08 06:30:31 +08:00
|
|
|
#include <linux/aer.h>
|
2018-04-28 03:42:52 +08:00
|
|
|
#include <linux/async.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
#include <linux/blkdev.h>
|
2014-11-04 23:20:14 +08:00
|
|
|
#include <linux/blk-mq.h>
|
2016-09-14 22:18:57 +08:00
|
|
|
#include <linux/blk-mq-pci.h>
|
2021-09-20 20:33:27 +08:00
|
|
|
#include <linux/blk-integrity.h>
|
2017-04-21 04:37:55 +08:00
|
|
|
#include <linux/dmi.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/io.h>
|
2022-02-16 12:31:36 +08:00
|
|
|
#include <linux/memremap.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/module.h>
|
2015-11-26 19:21:29 +08:00
|
|
|
#include <linux/mutex.h>
|
2017-09-16 01:05:38 +08:00
|
|
|
#include <linux/once.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
#include <linux/pci.h>
|
2019-05-23 23:27:35 +08:00
|
|
|
#include <linux/suspend.h>
|
NVMe: Metadata format support
Adds support for NVMe metadata formats and exposes block devices for
all namespaces regardless of their format. Namespace formats that are
unusable will have disk capacity set to 0, but a handle to the block
device is created to simplify device management. A namespace is not
usable when the format requires host interleave block and metadata in
single buffer, has no provisioned storage, or has better data but failed
to register with blk integrity.
The namespace has to be scanned in two phases to support separate
metadata formats. The first establishes the sector size and capacity
prior to invoking add_disk. If metadata is required, the capacity will
be temporarilly set to 0 until it can be revalidated and registered with
the integrity extenstions after add_disk completes.
The driver relies on the integrity extensions to provide the metadata
buffer. NVMe requires this be a single physically contiguous region,
so only one integrity segment is allowed per command. If the metadata
is used for T10 PI, the driver provides mappings to save and restore
the reftag physical block translation. The driver provides no-op
functions for generate and verify if metadata is not used for protection
information. This way the setup is always provided by the block layer.
If a request does not supply a required metadata buffer, the command
is failed with bad address. This could only happen if a user manually
disables verify/generate on such a disk. The only exception to where
this is okay is if the controller is capable of stripping/generating
the metadata, which is possible on some types of formats.
The metadata scatter gather list now occupies the spot in the nvme_iod
that used to be used to link retryable IOD's, but we don't do that
anymore, so the field was unused.
Signed-off-by: Keith Busch <keith.busch@intel.com>
2015-02-20 04:39:03 +08:00
|
|
|
#include <linux/t10-pi.h>
|
2011-01-21 01:50:14 +08:00
|
|
|
#include <linux/types.h>
|
2015-08-28 15:27:14 +08:00
|
|
|
#include <linux/io-64-nonatomic-lo-hi.h>
|
2021-01-15 14:30:46 +08:00
|
|
|
#include <linux/io-64-nonatomic-hi-lo.h>
|
2017-02-04 03:50:32 +08:00
|
|
|
#include <linux/sed-opal.h>
|
2018-10-05 05:27:43 +08:00
|
|
|
#include <linux/pci-p2pdma.h>
|
2012-02-07 10:45:33 +08:00
|
|
|
|
2018-12-19 00:59:53 +08:00
|
|
|
#include "trace.h"
|
2015-10-03 21:46:41 +08:00
|
|
|
#include "nvme.h"
|
|
|
|
|
2019-08-07 15:51:20 +08:00
|
|
|
#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes)
|
2019-08-07 15:51:19 +08:00
|
|
|
#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion))
|
2016-12-17 02:54:50 +08:00
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
|
2014-05-14 01:42:02 +08:00
|
|
|
|
2018-06-21 23:49:37 +08:00
|
|
|
/*
|
|
|
|
* These can be higher, but we need to ensure that any command doesn't
|
|
|
|
* require an sg allocation that needs more than a page of data.
|
|
|
|
*/
|
|
|
|
#define NVME_MAX_KB_SZ 4096
|
|
|
|
#define NVME_MAX_SEGS 127
|
|
|
|
|
2011-02-06 20:28:06 +08:00
|
|
|
static int use_threaded_interrupts;
|
2022-03-22 10:35:12 +08:00
|
|
|
module_param(use_threaded_interrupts, int, 0444);
|
2011-02-06 20:28:06 +08:00
|
|
|
|
2015-07-21 00:14:09 +08:00
|
|
|
static bool use_cmb_sqes = true;
|
2018-06-06 22:13:09 +08:00
|
|
|
module_param(use_cmb_sqes, bool, 0444);
|
2015-07-21 00:14:09 +08:00
|
|
|
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
|
|
|
|
|
2017-05-12 23:02:58 +08:00
|
|
|
static unsigned int max_host_mem_size_mb = 128;
|
|
|
|
module_param(max_host_mem_size_mb, uint, 0444);
|
|
|
|
MODULE_PARM_DESC(max_host_mem_size_mb,
|
|
|
|
"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
|
2011-03-03 07:37:18 +08:00
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
static unsigned int sgl_threshold = SZ_32K;
|
|
|
|
module_param(sgl_threshold, uint, 0644);
|
|
|
|
MODULE_PARM_DESC(sgl_threshold,
|
|
|
|
"Use SGLs when average request segment size is larger or equal to "
|
|
|
|
"this size. Use 0 to disable SGLs.");
|
|
|
|
|
2021-06-17 05:19:34 +08:00
|
|
|
#define NVME_PCI_MIN_QUEUE_SIZE 2
|
|
|
|
#define NVME_PCI_MAX_QUEUE_SIZE 4095
|
2017-07-10 16:46:59 +08:00
|
|
|
static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
|
|
|
|
static const struct kernel_param_ops io_queue_depth_ops = {
|
|
|
|
.set = io_queue_depth_set,
|
2020-06-17 16:05:13 +08:00
|
|
|
.get = param_get_uint,
|
2017-07-10 16:46:59 +08:00
|
|
|
};
|
|
|
|
|
2020-06-17 16:05:13 +08:00
|
|
|
static unsigned int io_queue_depth = 1024;
|
2017-07-10 16:46:59 +08:00
|
|
|
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
|
2021-06-17 05:19:34 +08:00
|
|
|
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2 and < 4096");
|
2017-07-10 16:46:59 +08:00
|
|
|
|
2020-05-09 14:22:08 +08:00
|
|
|
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
|
|
|
|
{
|
|
|
|
unsigned int n;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kstrtouint(val, 10, &n);
|
|
|
|
if (ret != 0 || n > num_possible_cpus())
|
|
|
|
return -EINVAL;
|
|
|
|
return param_set_uint(val, kp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct kernel_param_ops io_queue_count_ops = {
|
|
|
|
.set = io_queue_count_set,
|
|
|
|
.get = param_get_uint,
|
|
|
|
};
|
|
|
|
|
2019-12-07 00:51:54 +08:00
|
|
|
static unsigned int write_queues;
|
2020-05-09 14:22:08 +08:00
|
|
|
module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
|
2018-10-31 22:36:31 +08:00
|
|
|
MODULE_PARM_DESC(write_queues,
|
|
|
|
"Number of queues to use for writes. If not set, reads and writes "
|
|
|
|
"will share a queue set.");
|
|
|
|
|
2019-12-07 00:51:54 +08:00
|
|
|
static unsigned int poll_queues;
|
2020-05-09 14:22:08 +08:00
|
|
|
module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
|
2018-11-06 03:44:33 +08:00
|
|
|
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
|
|
|
|
|
2020-07-10 02:43:33 +08:00
|
|
|
static bool noacpi;
|
|
|
|
module_param(noacpi, bool, 0444);
|
|
|
|
MODULE_PARM_DESC(noacpi, "disable acpi bios quirks");
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
struct nvme_dev;
|
|
|
|
struct nvme_queue;
|
2015-02-04 02:21:42 +08:00
|
|
|
|
2016-01-13 05:41:18 +08:00
|
|
|
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
|
2019-01-05 06:04:33 +08:00
|
|
|
static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
|
2013-12-11 04:10:37 +08:00
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
/*
|
|
|
|
* Represents an NVM Express device. Each nvme_dev is a PCI function.
|
|
|
|
*/
|
|
|
|
struct nvme_dev {
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *queues;
|
2015-11-26 17:06:56 +08:00
|
|
|
struct blk_mq_tag_set tagset;
|
|
|
|
struct blk_mq_tag_set admin_tagset;
|
|
|
|
u32 __iomem *dbs;
|
|
|
|
struct device *dev;
|
|
|
|
struct dma_pool *prp_page_pool;
|
|
|
|
struct dma_pool *prp_small_pool;
|
|
|
|
unsigned online_queues;
|
|
|
|
unsigned max_qid;
|
2018-12-03 00:46:16 +08:00
|
|
|
unsigned io_queues[HCTX_MAX_TYPES];
|
2018-04-12 23:16:10 +08:00
|
|
|
unsigned int num_vecs;
|
nvme-pci: Use u32 for nvme_dev.q_depth and nvme_queue.q_depth
Recently nvme_dev.q_depth was changed from an int to u16 type.
This falls over for the queue depth calculation in nvme_pci_enable(),
where NVME_CAP_MQES(dev->ctrl.cap) + 1 may overflow as a u16, as
NVME_CAP_MQES() is a 16b number also. That happens for me, and this is the
result:
root@ubuntu:/home/john# [148.272996] Unable to handle kernel NULL pointer
dereference at virtual address 0000000000000010
Mem abort info:
ESR = 0x96000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=00000a27bf3c9000
[0000000000000010] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT SMP
Modules linked in: nvme nvme_core
CPU: 56 PID: 256 Comm: kworker/u195:0 Not tainted
5.8.0-next-20200812 #27
Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 -
V1.16.01 03/15/2019
Workqueue: nvme-reset-wq nvme_reset_work [nvme]
pstate: 80c00009 (Nzcv daif +PAN +UAO BTYPE=--)
pc : __sg_alloc_table_from_pages+0xec/0x238
lr : __sg_alloc_table_from_pages+0xc8/0x238
sp : ffff800013ccbad0
x29: ffff800013ccbad0 x28: ffff0a27b3d380a8
x27: 0000000000000000 x26: 0000000000002dc2
x25: 0000000000000dc0 x24: 0000000000000000
x23: 0000000000000000 x22: ffff800013ccbbe8
x21: 0000000000000010 x20: 0000000000000000
x19: 00000000fffff000 x18: ffffffffffffffff
x17: 00000000000000c0 x16: fffffe289eaf6380
x15: ffff800011b59948 x14: ffff002bc8fe98f8
x13: ff00000000000000 x12: ffff8000114ca000
x11: 0000000000000000 x10: ffffffffffffffff
x9 : ffffffffffffffc0 x8 : ffff0a27b5f9b6a0
x7 : 0000000000000000 x6 : 0000000000000001
x5 : ffff0a27b5f9b680 x4 : 0000000000000000
x3 : ffff0a27b5f9b680 x2 : 0000000000000000
x1 : 0000000000000001 x0 : 0000000000000000
Call trace:
__sg_alloc_table_from_pages+0xec/0x238
sg_alloc_table_from_pages+0x18/0x28
iommu_dma_alloc+0x474/0x678
dma_alloc_attrs+0xd8/0xf0
nvme_alloc_queue+0x114/0x160 [nvme]
nvme_reset_work+0xb34/0x14b4 [nvme]
process_one_work+0x1e8/0x360
worker_thread+0x44/0x478
kthread+0x150/0x158
ret_from_fork+0x10/0x34
Code: f94002c3 6b01017f 540007c2 11000486 (f8645aa5)
---[ end trace 89bb2b72d59bf925 ]---
Fix by making onto a u32.
Also use u32 for nvme_dev.q_depth, as we assign this value from
nvme_dev.q_depth, and nvme_dev.q_depth will possibly hold 65536 - this
avoids the same crash as above.
Fixes: 61f3b8963097 ("nvme-pci: use unsigned for io queue depth")
Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-08-14 23:34:25 +08:00
|
|
|
u32 q_depth;
|
2019-08-07 15:51:20 +08:00
|
|
|
int io_sqes;
|
2015-11-26 17:06:56 +08:00
|
|
|
u32 db_stride;
|
|
|
|
void __iomem *bar;
|
2017-05-24 16:39:55 +08:00
|
|
|
unsigned long bar_mapped_size;
|
2015-11-26 19:35:49 +08:00
|
|
|
struct work_struct remove_work;
|
2015-11-26 19:21:29 +08:00
|
|
|
struct mutex shutdown_lock;
|
2015-11-26 17:06:56 +08:00
|
|
|
bool subsystem;
|
|
|
|
u64 cmb_size;
|
2018-10-05 05:27:43 +08:00
|
|
|
bool cmb_use_sqes;
|
2015-11-26 17:06:56 +08:00
|
|
|
u32 cmbsz;
|
2016-10-06 10:01:12 +08:00
|
|
|
u32 cmbloc;
|
2015-11-26 17:06:56 +08:00
|
|
|
struct nvme_ctrl ctrl;
|
2019-05-23 23:27:35 +08:00
|
|
|
u32 last_ps;
|
2021-07-28 00:40:43 +08:00
|
|
|
bool hmb;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
2018-06-21 23:49:37 +08:00
|
|
|
mempool_t *iod_mempool;
|
|
|
|
|
2017-05-12 23:02:58 +08:00
|
|
|
/* shadow doorbell buffer support: */
|
2017-04-10 23:51:07 +08:00
|
|
|
u32 *dbbuf_dbs;
|
|
|
|
dma_addr_t dbbuf_dbs_dma_addr;
|
|
|
|
u32 *dbbuf_eis;
|
|
|
|
dma_addr_t dbbuf_eis_dma_addr;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
|
|
|
/* host memory buffer support: */
|
|
|
|
u64 host_mem_size;
|
|
|
|
u32 nr_host_mem_descs;
|
2017-08-28 16:47:18 +08:00
|
|
|
dma_addr_t host_mem_descs_dma;
|
2017-05-12 23:02:58 +08:00
|
|
|
struct nvme_host_mem_buf_desc *host_mem_descs;
|
|
|
|
void **host_mem_desc_bufs;
|
2020-05-02 15:29:41 +08:00
|
|
|
unsigned int nr_allocated_queues;
|
|
|
|
unsigned int nr_write_queues;
|
|
|
|
unsigned int nr_poll_queues;
|
2021-07-15 05:02:37 +08:00
|
|
|
|
|
|
|
bool attrs_added;
|
2013-12-11 04:10:40 +08:00
|
|
|
};
|
2011-03-03 07:37:18 +08:00
|
|
|
|
2017-07-10 16:46:59 +08:00
|
|
|
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
|
|
|
|
{
|
2021-06-17 05:19:34 +08:00
|
|
|
return param_set_uint_minmax(val, kp, NVME_PCI_MIN_QUEUE_SIZE,
|
|
|
|
NVME_PCI_MAX_QUEUE_SIZE);
|
2017-07-10 16:46:59 +08:00
|
|
|
}
|
|
|
|
|
2017-04-10 23:51:07 +08:00
|
|
|
static inline unsigned int sq_idx(unsigned int qid, u32 stride)
|
|
|
|
{
|
|
|
|
return qid * 2 * stride;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int cq_idx(unsigned int qid, u32 stride)
|
|
|
|
{
|
|
|
|
return (qid * 2 + 1) * stride;
|
|
|
|
}
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
|
|
|
|
{
|
|
|
|
return container_of(ctrl, struct nvme_dev, ctrl);
|
|
|
|
}
|
|
|
|
|
2011-01-21 01:50:14 +08:00
|
|
|
/*
|
|
|
|
* An NVM Express queue. Each device has at least two (one for admin
|
|
|
|
* commands and one for I/O commands).
|
|
|
|
*/
|
|
|
|
struct nvme_queue {
|
2011-02-10 22:56:01 +08:00
|
|
|
struct nvme_dev *dev;
|
2018-05-18 00:31:51 +08:00
|
|
|
spinlock_t sq_lock;
|
2019-08-07 15:51:20 +08:00
|
|
|
void *sq_cmds;
|
2018-12-03 00:46:23 +08:00
|
|
|
/* only used for poll queues: */
|
|
|
|
spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
|
2020-04-28 22:21:56 +08:00
|
|
|
struct nvme_completion *cqes;
|
2011-01-21 01:50:14 +08:00
|
|
|
dma_addr_t sq_dma_addr;
|
|
|
|
dma_addr_t cq_dma_addr;
|
|
|
|
u32 __iomem *q_db;
|
nvme-pci: Use u32 for nvme_dev.q_depth and nvme_queue.q_depth
Recently nvme_dev.q_depth was changed from an int to u16 type.
This falls over for the queue depth calculation in nvme_pci_enable(),
where NVME_CAP_MQES(dev->ctrl.cap) + 1 may overflow as a u16, as
NVME_CAP_MQES() is a 16b number also. That happens for me, and this is the
result:
root@ubuntu:/home/john# [148.272996] Unable to handle kernel NULL pointer
dereference at virtual address 0000000000000010
Mem abort info:
ESR = 0x96000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=00000a27bf3c9000
[0000000000000010] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT SMP
Modules linked in: nvme nvme_core
CPU: 56 PID: 256 Comm: kworker/u195:0 Not tainted
5.8.0-next-20200812 #27
Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 -
V1.16.01 03/15/2019
Workqueue: nvme-reset-wq nvme_reset_work [nvme]
pstate: 80c00009 (Nzcv daif +PAN +UAO BTYPE=--)
pc : __sg_alloc_table_from_pages+0xec/0x238
lr : __sg_alloc_table_from_pages+0xc8/0x238
sp : ffff800013ccbad0
x29: ffff800013ccbad0 x28: ffff0a27b3d380a8
x27: 0000000000000000 x26: 0000000000002dc2
x25: 0000000000000dc0 x24: 0000000000000000
x23: 0000000000000000 x22: ffff800013ccbbe8
x21: 0000000000000010 x20: 0000000000000000
x19: 00000000fffff000 x18: ffffffffffffffff
x17: 00000000000000c0 x16: fffffe289eaf6380
x15: ffff800011b59948 x14: ffff002bc8fe98f8
x13: ff00000000000000 x12: ffff8000114ca000
x11: 0000000000000000 x10: ffffffffffffffff
x9 : ffffffffffffffc0 x8 : ffff0a27b5f9b6a0
x7 : 0000000000000000 x6 : 0000000000000001
x5 : ffff0a27b5f9b680 x4 : 0000000000000000
x3 : ffff0a27b5f9b680 x2 : 0000000000000000
x1 : 0000000000000001 x0 : 0000000000000000
Call trace:
__sg_alloc_table_from_pages+0xec/0x238
sg_alloc_table_from_pages+0x18/0x28
iommu_dma_alloc+0x474/0x678
dma_alloc_attrs+0xd8/0xf0
nvme_alloc_queue+0x114/0x160 [nvme]
nvme_reset_work+0xb34/0x14b4 [nvme]
process_one_work+0x1e8/0x360
worker_thread+0x44/0x478
kthread+0x150/0x158
ret_from_fork+0x10/0x34
Code: f94002c3 6b01017f 540007c2 11000486 (f8645aa5)
---[ end trace 89bb2b72d59bf925 ]---
Fix by making onto a u32.
Also use u32 for nvme_dev.q_depth, as we assign this value from
nvme_dev.q_depth, and nvme_dev.q_depth will possibly hold 65536 - this
avoids the same crash as above.
Fixes: 61f3b8963097 ("nvme-pci: use unsigned for io queue depth")
Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-08-14 23:34:25 +08:00
|
|
|
u32 q_depth;
|
2019-03-09 01:43:06 +08:00
|
|
|
u16 cq_vector;
|
2011-01-21 01:50:14 +08:00
|
|
|
u16 sq_tail;
|
2020-10-31 01:28:54 +08:00
|
|
|
u16 last_sq_tail;
|
2011-01-21 01:50:14 +08:00
|
|
|
u16 cq_head;
|
2013-12-11 04:10:38 +08:00
|
|
|
u16 qid;
|
2013-06-24 23:47:34 +08:00
|
|
|
u8 cq_phase;
|
2019-08-07 15:51:20 +08:00
|
|
|
u8 sqes;
|
2018-12-03 00:46:17 +08:00
|
|
|
unsigned long flags;
|
|
|
|
#define NVMEQ_ENABLED 0
|
2018-12-03 00:46:18 +08:00
|
|
|
#define NVMEQ_SQ_CMB 1
|
2018-12-03 00:46:22 +08:00
|
|
|
#define NVMEQ_DELETE_ERROR 2
|
2019-03-09 01:43:06 +08:00
|
|
|
#define NVMEQ_POLLED 3
|
2017-04-10 23:51:07 +08:00
|
|
|
u32 *dbbuf_sq_db;
|
|
|
|
u32 *dbbuf_cq_db;
|
|
|
|
u32 *dbbuf_sq_ei;
|
|
|
|
u32 *dbbuf_cq_ei;
|
2018-12-03 00:46:22 +08:00
|
|
|
struct completion delete_done;
|
2011-01-21 01:50:14 +08:00
|
|
|
};
|
|
|
|
|
2015-10-16 13:58:32 +08:00
|
|
|
/*
|
2019-03-03 23:04:01 +08:00
|
|
|
* The nvme_iod describes the data in an I/O.
|
|
|
|
*
|
|
|
|
* The sg pointer contains the list of PRP/SGL chunk allocations in addition
|
|
|
|
* to the actual struct scatterlist.
|
2015-10-16 13:58:32 +08:00
|
|
|
*/
|
|
|
|
struct nvme_iod {
|
2016-11-10 23:32:33 +08:00
|
|
|
struct nvme_request req;
|
2021-03-18 04:37:02 +08:00
|
|
|
struct nvme_command cmd;
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_queue *nvmeq;
|
2017-10-17 09:24:20 +08:00
|
|
|
bool use_sgl;
|
2015-11-28 22:43:10 +08:00
|
|
|
int aborted;
|
2015-10-16 13:58:32 +08:00
|
|
|
int npages; /* In the PRP list. 0 means small pool in use */
|
|
|
|
int nents; /* Used in scatterlist */
|
|
|
|
dma_addr_t first_dma;
|
2019-03-05 20:49:34 +08:00
|
|
|
unsigned int dma_len; /* length of single DMA segment mapping */
|
2019-03-03 23:19:18 +08:00
|
|
|
dma_addr_t meta_dma;
|
2015-11-28 22:43:10 +08:00
|
|
|
struct scatterlist *sg;
|
2011-01-21 01:50:14 +08:00
|
|
|
};
|
|
|
|
|
2020-05-02 15:29:41 +08:00
|
|
|
static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
|
2018-10-31 22:36:31 +08:00
|
|
|
{
|
2020-05-02 15:29:41 +08:00
|
|
|
return dev->nr_allocated_queues * 8 * dev->db_stride;
|
2017-04-10 23:51:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
|
|
|
|
{
|
2020-05-02 15:29:41 +08:00
|
|
|
unsigned int mem_size = nvme_dbbuf_size(dev);
|
2017-04-10 23:51:07 +08:00
|
|
|
|
2021-10-15 00:45:42 +08:00
|
|
|
if (dev->dbbuf_dbs) {
|
|
|
|
/*
|
|
|
|
* Clear the dbbuf memory so the driver doesn't observe stale
|
|
|
|
* values from the previous instantiation.
|
|
|
|
*/
|
|
|
|
memset(dev->dbbuf_dbs, 0, mem_size);
|
|
|
|
memset(dev->dbbuf_eis, 0, mem_size);
|
2017-04-10 23:51:07 +08:00
|
|
|
return 0;
|
2021-10-15 00:45:42 +08:00
|
|
|
}
|
2017-04-10 23:51:07 +08:00
|
|
|
|
|
|
|
dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
|
|
|
|
&dev->dbbuf_dbs_dma_addr,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!dev->dbbuf_dbs)
|
|
|
|
return -ENOMEM;
|
|
|
|
dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
|
|
|
|
&dev->dbbuf_eis_dma_addr,
|
|
|
|
GFP_KERNEL);
|
|
|
|
if (!dev->dbbuf_eis) {
|
|
|
|
dma_free_coherent(dev->dev, mem_size,
|
|
|
|
dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
|
|
|
|
dev->dbbuf_dbs = NULL;
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
|
|
|
|
{
|
2020-05-02 15:29:41 +08:00
|
|
|
unsigned int mem_size = nvme_dbbuf_size(dev);
|
2017-04-10 23:51:07 +08:00
|
|
|
|
|
|
|
if (dev->dbbuf_dbs) {
|
|
|
|
dma_free_coherent(dev->dev, mem_size,
|
|
|
|
dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
|
|
|
|
dev->dbbuf_dbs = NULL;
|
|
|
|
}
|
|
|
|
if (dev->dbbuf_eis) {
|
|
|
|
dma_free_coherent(dev->dev, mem_size,
|
|
|
|
dev->dbbuf_eis, dev->dbbuf_eis_dma_addr);
|
|
|
|
dev->dbbuf_eis = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_dbbuf_init(struct nvme_dev *dev,
|
|
|
|
struct nvme_queue *nvmeq, int qid)
|
|
|
|
{
|
|
|
|
if (!dev->dbbuf_dbs || !qid)
|
|
|
|
return;
|
|
|
|
|
|
|
|
nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
|
|
|
|
nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
|
|
|
|
nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
|
|
|
|
nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
|
|
|
|
}
|
|
|
|
|
2020-11-05 22:28:47 +08:00
|
|
|
static void nvme_dbbuf_free(struct nvme_queue *nvmeq)
|
|
|
|
{
|
|
|
|
if (!nvmeq->qid)
|
|
|
|
return;
|
|
|
|
|
|
|
|
nvmeq->dbbuf_sq_db = NULL;
|
|
|
|
nvmeq->dbbuf_cq_db = NULL;
|
|
|
|
nvmeq->dbbuf_sq_ei = NULL;
|
|
|
|
nvmeq->dbbuf_cq_ei = NULL;
|
|
|
|
}
|
|
|
|
|
2017-04-10 23:51:07 +08:00
|
|
|
static void nvme_dbbuf_set(struct nvme_dev *dev)
|
|
|
|
{
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2020-11-05 22:28:47 +08:00
|
|
|
unsigned int i;
|
2017-04-10 23:51:07 +08:00
|
|
|
|
|
|
|
if (!dev->dbbuf_dbs)
|
|
|
|
return;
|
|
|
|
|
|
|
|
c.dbbuf.opcode = nvme_admin_dbbuf;
|
|
|
|
c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
|
|
|
|
c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
|
|
|
|
|
|
|
|
if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
|
2017-05-20 21:14:43 +08:00
|
|
|
dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
|
2017-04-10 23:51:07 +08:00
|
|
|
/* Free memory and continue on */
|
|
|
|
nvme_dbbuf_dma_free(dev);
|
2020-11-05 22:28:47 +08:00
|
|
|
|
|
|
|
for (i = 1; i <= dev->online_queues; i++)
|
|
|
|
nvme_dbbuf_free(&dev->queues[i]);
|
2017-04-10 23:51:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
|
|
|
|
{
|
|
|
|
return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update dbbuf and return true if an MMIO is required */
|
|
|
|
static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
|
|
|
|
volatile u32 *dbbuf_ei)
|
|
|
|
{
|
|
|
|
if (dbbuf_db) {
|
|
|
|
u16 old_value;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure that the queue is written before updating
|
|
|
|
* the doorbell in memory
|
|
|
|
*/
|
|
|
|
wmb();
|
|
|
|
|
|
|
|
old_value = *dbbuf_db;
|
|
|
|
*dbbuf_db = value;
|
|
|
|
|
2018-08-16 06:51:57 +08:00
|
|
|
/*
|
|
|
|
* Ensure that the doorbell is updated before reading the event
|
|
|
|
* index from memory. The controller needs to provide similar
|
|
|
|
* ordering to ensure the envent index is updated before reading
|
|
|
|
* the doorbell.
|
|
|
|
*/
|
|
|
|
mb();
|
|
|
|
|
2017-04-10 23:51:07 +08:00
|
|
|
if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2015-01-23 03:07:58 +08:00
|
|
|
/*
|
|
|
|
* Will slightly overestimate the number of pages needed. This is OK
|
|
|
|
* as it only leads to a small amount of wasted memory for the lifetime of
|
|
|
|
* the I/O.
|
|
|
|
*/
|
2020-07-20 21:23:37 +08:00
|
|
|
static int nvme_pci_npages_prp(void)
|
2015-01-23 03:07:58 +08:00
|
|
|
{
|
2020-07-20 21:23:37 +08:00
|
|
|
unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE,
|
2020-07-17 08:51:37 +08:00
|
|
|
NVME_CTRL_PAGE_SIZE);
|
2015-01-23 03:07:58 +08:00
|
|
|
return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
|
|
|
|
}
|
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
/*
|
|
|
|
* Calculates the number of pages needed for the SGL segments. For example a 4k
|
|
|
|
* page can accommodate 256 SGL descriptors.
|
|
|
|
*/
|
2020-07-20 21:23:37 +08:00
|
|
|
static int nvme_pci_npages_sgl(void)
|
2015-01-23 03:07:58 +08:00
|
|
|
{
|
2020-07-20 21:23:37 +08:00
|
|
|
return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
|
|
|
|
PAGE_SIZE);
|
2015-11-28 22:43:10 +08:00
|
|
|
}
|
2015-01-23 03:07:58 +08:00
|
|
|
|
2020-07-20 21:23:37 +08:00
|
|
|
static size_t nvme_pci_iod_alloc_size(void)
|
2015-11-28 22:43:10 +08:00
|
|
|
{
|
2020-07-20 21:23:37 +08:00
|
|
|
size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
|
2017-10-17 09:24:20 +08:00
|
|
|
|
2020-07-20 21:23:37 +08:00
|
|
|
return sizeof(__le64 *) * npages +
|
|
|
|
sizeof(struct scatterlist) * NVME_MAX_SEGS;
|
2015-11-28 22:43:10 +08:00
|
|
|
}
|
2015-01-23 03:07:58 +08:00
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
|
|
unsigned int hctx_idx)
|
2011-02-07 07:30:16 +08:00
|
|
|
{
|
2014-11-04 23:20:14 +08:00
|
|
|
struct nvme_dev *dev = data;
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[0];
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2015-06-01 23:29:54 +08:00
|
|
|
WARN_ON(hctx_idx != 0);
|
|
|
|
WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
|
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
hctx->driver_data = nvmeq;
|
|
|
|
return 0;
|
2011-02-07 07:30:16 +08:00
|
|
|
}
|
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
|
|
|
unsigned int hctx_idx)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2014-11-04 23:20:14 +08:00
|
|
|
struct nvme_dev *dev = data;
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2015-06-01 23:29:54 +08:00
|
|
|
WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
|
2014-11-04 23:20:14 +08:00
|
|
|
hctx->driver_data = nvmeq;
|
|
|
|
return 0;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2022-03-15 22:53:59 +08:00
|
|
|
static int nvme_pci_init_request(struct blk_mq_tag_set *set,
|
|
|
|
struct request *req, unsigned int hctx_idx,
|
|
|
|
unsigned int numa_node)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2017-05-02 00:19:08 +08:00
|
|
|
struct nvme_dev *dev = set->driver_data;
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2017-06-13 15:15:18 +08:00
|
|
|
int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[queue_idx];
|
2014-11-04 23:20:14 +08:00
|
|
|
|
|
|
|
BUG_ON(!nvmeq);
|
2015-11-28 22:43:10 +08:00
|
|
|
iod->nvmeq = nvmeq;
|
2018-06-30 06:50:00 +08:00
|
|
|
|
|
|
|
nvme_req(req)->ctrl = &dev->ctrl;
|
2021-03-18 04:37:03 +08:00
|
|
|
nvme_req(req)->cmd = &iod->cmd;
|
2014-11-04 23:20:14 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-10-31 22:36:31 +08:00
|
|
|
static int queue_irq_offset(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
/* if we have more than 1 vec, admin queue offsets us by 1 */
|
|
|
|
if (dev->num_vecs > 1)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-09-14 22:18:57 +08:00
|
|
|
static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
|
|
|
|
{
|
|
|
|
struct nvme_dev *dev = set->driver_data;
|
2018-10-31 22:36:31 +08:00
|
|
|
int i, qoff, offset;
|
|
|
|
|
|
|
|
offset = queue_irq_offset(dev);
|
|
|
|
for (i = 0, qoff = 0; i < set->nr_maps; i++) {
|
|
|
|
struct blk_mq_queue_map *map = &set->map[i];
|
|
|
|
|
|
|
|
map->nr_queues = dev->io_queues[i];
|
|
|
|
if (!map->nr_queues) {
|
2018-12-03 00:46:16 +08:00
|
|
|
BUG_ON(i == HCTX_TYPE_DEFAULT);
|
2018-12-17 19:16:27 +08:00
|
|
|
continue;
|
2018-10-31 22:36:31 +08:00
|
|
|
}
|
|
|
|
|
2018-11-06 03:44:33 +08:00
|
|
|
/*
|
|
|
|
* The poll queue(s) doesn't have an IRQ (and hence IRQ
|
|
|
|
* affinity), so use the regular blk-mq cpu mapping
|
|
|
|
*/
|
2018-10-31 22:36:31 +08:00
|
|
|
map->queue_offset = qoff;
|
2019-05-22 00:56:43 +08:00
|
|
|
if (i != HCTX_TYPE_POLL && offset)
|
2018-11-06 03:44:33 +08:00
|
|
|
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
|
|
|
|
else
|
|
|
|
blk_mq_map_queues(map);
|
2018-10-31 22:36:31 +08:00
|
|
|
qoff += map->nr_queues;
|
|
|
|
offset += map->nr_queues;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2016-09-14 22:18:57 +08:00
|
|
|
}
|
|
|
|
|
2020-10-31 01:28:54 +08:00
|
|
|
/*
|
|
|
|
* Write sq tail if we are asked to, or if the next command would wrap.
|
|
|
|
*/
|
|
|
|
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
|
2018-11-30 01:02:29 +08:00
|
|
|
{
|
2020-10-31 01:28:54 +08:00
|
|
|
if (!write_sq) {
|
|
|
|
u16 next_tail = nvmeq->sq_tail + 1;
|
|
|
|
|
|
|
|
if (next_tail == nvmeq->q_depth)
|
|
|
|
next_tail = 0;
|
|
|
|
if (next_tail != nvmeq->last_sq_tail)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-11-30 01:02:29 +08:00
|
|
|
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
|
|
|
|
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
|
|
|
|
writel(nvmeq->sq_tail, nvmeq->q_db);
|
2020-10-31 01:28:54 +08:00
|
|
|
nvmeq->last_sq_tail = nvmeq->sq_tail;
|
2018-11-30 01:02:29 +08:00
|
|
|
}
|
|
|
|
|
2021-10-30 04:32:44 +08:00
|
|
|
static inline void nvme_sq_copy_cmd(struct nvme_queue *nvmeq,
|
|
|
|
struct nvme_command *cmd)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2019-08-07 15:51:20 +08:00
|
|
|
memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
|
2021-10-30 04:32:44 +08:00
|
|
|
absolute_pointer(cmd), sizeof(*cmd));
|
2018-05-26 19:45:55 +08:00
|
|
|
if (++nvmeq->sq_tail == nvmeq->q_depth)
|
|
|
|
nvmeq->sq_tail = 0;
|
2018-11-30 01:02:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
|
|
|
|
{
|
|
|
|
struct nvme_queue *nvmeq = hctx->driver_data;
|
|
|
|
|
|
|
|
spin_lock(&nvmeq->sq_lock);
|
2020-10-31 01:28:54 +08:00
|
|
|
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
|
|
|
|
nvme_write_sq_db(nvmeq, true);
|
2018-05-26 19:45:55 +08:00
|
|
|
spin_unlock(&nvmeq->sq_lock);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
static void **nvme_pci_iod_list(struct request *req)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2017-10-17 09:24:20 +08:00
|
|
|
return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2017-12-20 15:30:50 +08:00
|
|
|
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2018-01-18 05:04:37 +08:00
|
|
|
int nseg = blk_rq_nr_phys_segments(req);
|
2017-12-20 15:30:50 +08:00
|
|
|
unsigned int avg_seg_size;
|
|
|
|
|
2018-01-18 05:04:37 +08:00
|
|
|
avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
|
2017-12-20 15:30:50 +08:00
|
|
|
|
2021-06-10 09:28:25 +08:00
|
|
|
if (!nvme_ctrl_sgl_supported(&dev->ctrl))
|
2017-12-20 15:30:50 +08:00
|
|
|
return false;
|
|
|
|
if (!iod->nvmeq->qid)
|
|
|
|
return false;
|
|
|
|
if (!sgl_threshold || avg_seg_size < sgl_threshold)
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2020-07-17 08:51:37 +08:00
|
|
|
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
|
2021-01-20 16:33:52 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
dma_addr_t dma_addr = iod->first_dma;
|
2011-12-21 02:34:52 +08:00
|
|
|
int i;
|
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
for (i = 0; i < iod->npages; i++) {
|
|
|
|
__le64 *prp_list = nvme_pci_iod_list(req)[i];
|
|
|
|
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
|
|
|
|
|
|
|
|
dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
|
|
|
|
dma_addr = next_dma_addr;
|
2019-03-03 23:15:19 +08:00
|
|
|
}
|
2021-01-20 16:33:52 +08:00
|
|
|
}
|
2019-03-05 20:49:34 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
|
|
|
|
{
|
|
|
|
const int last_sg = SGES_PER_PAGE - 1;
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
dma_addr_t dma_addr = iod->first_dma;
|
|
|
|
int i;
|
2019-03-05 20:49:34 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
for (i = 0; i < iod->npages; i++) {
|
|
|
|
struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
|
|
|
|
dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);
|
2019-03-05 20:49:34 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
|
|
|
|
dma_addr = next_dma_addr;
|
|
|
|
}
|
|
|
|
}
|
2017-10-17 09:24:20 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2017-10-17 09:24:20 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
if (is_pci_p2pdma_page(sg_page(iod->sg)))
|
|
|
|
pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
|
|
|
|
rq_dma_dir(req));
|
|
|
|
else
|
|
|
|
dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
|
|
|
|
}
|
2017-10-17 09:24:20 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2017-10-17 09:24:20 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
if (iod->dma_len) {
|
|
|
|
dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len,
|
|
|
|
rq_dma_dir(req));
|
|
|
|
return;
|
2011-12-21 02:34:52 +08:00
|
|
|
}
|
2015-01-23 03:07:58 +08:00
|
|
|
|
2021-01-20 16:33:52 +08:00
|
|
|
WARN_ON_ONCE(!iod->nents);
|
|
|
|
|
|
|
|
nvme_unmap_sg(dev, req);
|
|
|
|
if (iod->npages == 0)
|
|
|
|
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
|
|
|
|
iod->first_dma);
|
|
|
|
else if (iod->use_sgl)
|
|
|
|
nvme_free_sgls(dev, req);
|
|
|
|
else
|
|
|
|
nvme_free_prps(dev, req);
|
2019-03-05 20:46:58 +08:00
|
|
|
mempool_free(iod->sg, dev->iod_mempool);
|
2014-08-29 23:06:12 +08:00
|
|
|
}
|
|
|
|
|
2017-09-16 01:05:38 +08:00
|
|
|
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
|
|
|
|
for_each_sg(sgl, sg, nents, i) {
|
|
|
|
dma_addr_t phys = sg_phys(sg);
|
|
|
|
pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
|
|
|
|
"dma_address:%pad dma_length:%d\n",
|
|
|
|
i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
|
|
|
|
sg_dma_len(sg));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
|
|
|
struct request *req, struct nvme_rw_command *cmnd)
|
2011-01-26 23:02:29 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2011-02-10 23:30:34 +08:00
|
|
|
struct dma_pool *pool;
|
2017-01-13 19:29:12 +08:00
|
|
|
int length = blk_rq_payload_bytes(req);
|
2011-12-21 02:34:52 +08:00
|
|
|
struct scatterlist *sg = iod->sg;
|
2011-01-26 23:02:29 +08:00
|
|
|
int dma_len = sg_dma_len(sg);
|
|
|
|
u64 dma_addr = sg_dma_address(sg);
|
2020-07-17 08:51:37 +08:00
|
|
|
int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
|
2011-02-10 21:51:24 +08:00
|
|
|
__le64 *prp_list;
|
2017-10-17 09:24:20 +08:00
|
|
|
void **list = nvme_pci_iod_list(req);
|
2011-02-10 21:51:24 +08:00
|
|
|
dma_addr_t prp_dma;
|
2011-12-21 02:34:52 +08:00
|
|
|
int nprps, i;
|
2011-01-26 23:02:29 +08:00
|
|
|
|
2020-07-17 08:51:37 +08:00
|
|
|
length -= (NVME_CTRL_PAGE_SIZE - offset);
|
2017-08-27 21:56:37 +08:00
|
|
|
if (length <= 0) {
|
|
|
|
iod->first_dma = 0;
|
2017-10-17 09:24:20 +08:00
|
|
|
goto done;
|
2017-08-27 21:56:37 +08:00
|
|
|
}
|
2011-01-26 23:02:29 +08:00
|
|
|
|
2020-07-17 08:51:37 +08:00
|
|
|
dma_len -= (NVME_CTRL_PAGE_SIZE - offset);
|
2011-01-26 23:02:29 +08:00
|
|
|
if (dma_len) {
|
2020-07-17 08:51:37 +08:00
|
|
|
dma_addr += (NVME_CTRL_PAGE_SIZE - offset);
|
2011-01-26 23:02:29 +08:00
|
|
|
} else {
|
|
|
|
sg = sg_next(sg);
|
|
|
|
dma_addr = sg_dma_address(sg);
|
|
|
|
dma_len = sg_dma_len(sg);
|
|
|
|
}
|
|
|
|
|
2020-07-17 08:51:37 +08:00
|
|
|
if (length <= NVME_CTRL_PAGE_SIZE) {
|
2014-04-04 06:45:23 +08:00
|
|
|
iod->first_dma = dma_addr;
|
2017-10-17 09:24:20 +08:00
|
|
|
goto done;
|
2011-02-10 21:51:24 +08:00
|
|
|
}
|
|
|
|
|
2020-07-17 08:51:37 +08:00
|
|
|
nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
|
2011-02-10 23:30:34 +08:00
|
|
|
if (nprps <= (256 / 8)) {
|
|
|
|
pool = dev->prp_small_pool;
|
2011-12-21 02:34:52 +08:00
|
|
|
iod->npages = 0;
|
2011-02-10 23:30:34 +08:00
|
|
|
} else {
|
|
|
|
pool = dev->prp_page_pool;
|
2011-12-21 02:34:52 +08:00
|
|
|
iod->npages = 1;
|
2011-02-10 23:30:34 +08:00
|
|
|
}
|
|
|
|
|
2015-10-16 13:58:37 +08:00
|
|
|
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
2011-05-13 01:51:41 +08:00
|
|
|
if (!prp_list) {
|
2014-04-04 06:45:23 +08:00
|
|
|
iod->first_dma = dma_addr;
|
2011-12-21 02:34:52 +08:00
|
|
|
iod->npages = -1;
|
2017-07-13 03:59:07 +08:00
|
|
|
return BLK_STS_RESOURCE;
|
2011-05-13 01:51:41 +08:00
|
|
|
}
|
2011-12-21 02:34:52 +08:00
|
|
|
list[0] = prp_list;
|
|
|
|
iod->first_dma = prp_dma;
|
2011-02-10 21:51:24 +08:00
|
|
|
i = 0;
|
|
|
|
for (;;) {
|
2020-07-17 08:51:37 +08:00
|
|
|
if (i == NVME_CTRL_PAGE_SIZE >> 3) {
|
2011-02-10 21:51:24 +08:00
|
|
|
__le64 *old_prp_list = prp_list;
|
2015-10-16 13:58:37 +08:00
|
|
|
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
2011-12-21 02:34:52 +08:00
|
|
|
if (!prp_list)
|
2021-01-20 16:35:01 +08:00
|
|
|
goto free_prps;
|
2011-12-21 02:34:52 +08:00
|
|
|
list[iod->npages++] = prp_list;
|
2011-03-17 04:43:40 +08:00
|
|
|
prp_list[0] = old_prp_list[i - 1];
|
|
|
|
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
|
|
|
|
i = 1;
|
2011-02-10 21:51:24 +08:00
|
|
|
}
|
|
|
|
prp_list[i++] = cpu_to_le64(dma_addr);
|
2020-07-17 08:51:37 +08:00
|
|
|
dma_len -= NVME_CTRL_PAGE_SIZE;
|
|
|
|
dma_addr += NVME_CTRL_PAGE_SIZE;
|
|
|
|
length -= NVME_CTRL_PAGE_SIZE;
|
2011-02-10 21:51:24 +08:00
|
|
|
if (length <= 0)
|
|
|
|
break;
|
|
|
|
if (dma_len > 0)
|
|
|
|
continue;
|
2017-07-13 03:59:07 +08:00
|
|
|
if (unlikely(dma_len < 0))
|
|
|
|
goto bad_sgl;
|
2011-02-10 21:51:24 +08:00
|
|
|
sg = sg_next(sg);
|
|
|
|
dma_addr = sg_dma_address(sg);
|
|
|
|
dma_len = sg_dma_len(sg);
|
2011-01-26 23:02:29 +08:00
|
|
|
}
|
2017-10-17 09:24:20 +08:00
|
|
|
done:
|
|
|
|
cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
|
|
|
|
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
|
2017-07-13 03:59:07 +08:00
|
|
|
return BLK_STS_OK;
|
2021-01-20 16:35:01 +08:00
|
|
|
free_prps:
|
|
|
|
nvme_free_prps(dev, req);
|
|
|
|
return BLK_STS_RESOURCE;
|
|
|
|
bad_sgl:
|
2017-09-16 01:05:38 +08:00
|
|
|
WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
|
|
|
|
"Invalid SGL for payload:%d nents:%d\n",
|
|
|
|
blk_rq_payload_bytes(req), iod->nents);
|
2017-07-13 03:59:07 +08:00
|
|
|
return BLK_STS_IOERR;
|
2011-01-26 23:02:29 +08:00
|
|
|
}
|
|
|
|
|
2017-10-17 09:24:20 +08:00
|
|
|
static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge,
|
|
|
|
struct scatterlist *sg)
|
|
|
|
{
|
|
|
|
sge->addr = cpu_to_le64(sg_dma_address(sg));
|
|
|
|
sge->length = cpu_to_le32(sg_dma_len(sg));
|
|
|
|
sge->type = NVME_SGL_FMT_DATA_DESC << 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
|
|
|
|
dma_addr_t dma_addr, int entries)
|
|
|
|
{
|
|
|
|
sge->addr = cpu_to_le64(dma_addr);
|
|
|
|
if (entries < SGES_PER_PAGE) {
|
|
|
|
sge->length = cpu_to_le32(entries * sizeof(*sge));
|
|
|
|
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
|
|
|
|
} else {
|
|
|
|
sge->length = cpu_to_le32(PAGE_SIZE);
|
|
|
|
sge->type = NVME_SGL_FMT_SEG_DESC << 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
|
2018-01-18 05:04:38 +08:00
|
|
|
struct request *req, struct nvme_rw_command *cmd, int entries)
|
2017-10-17 09:24:20 +08:00
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
struct dma_pool *pool;
|
|
|
|
struct nvme_sgl_desc *sg_list;
|
|
|
|
struct scatterlist *sg = iod->sg;
|
|
|
|
dma_addr_t sgl_dma;
|
2018-01-18 05:04:38 +08:00
|
|
|
int i = 0;
|
2017-10-17 09:24:20 +08:00
|
|
|
|
|
|
|
/* setting the transfer type as SGL */
|
|
|
|
cmd->flags = NVME_CMD_SGL_METABUF;
|
|
|
|
|
2018-01-18 05:04:38 +08:00
|
|
|
if (entries == 1) {
|
2017-10-17 09:24:20 +08:00
|
|
|
nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
|
|
|
|
return BLK_STS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
|
|
|
|
pool = dev->prp_small_pool;
|
|
|
|
iod->npages = 0;
|
|
|
|
} else {
|
|
|
|
pool = dev->prp_page_pool;
|
|
|
|
iod->npages = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
|
|
|
|
if (!sg_list) {
|
|
|
|
iod->npages = -1;
|
|
|
|
return BLK_STS_RESOURCE;
|
|
|
|
}
|
|
|
|
|
|
|
|
nvme_pci_iod_list(req)[0] = sg_list;
|
|
|
|
iod->first_dma = sgl_dma;
|
|
|
|
|
|
|
|
nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (i == SGES_PER_PAGE) {
|
|
|
|
struct nvme_sgl_desc *old_sg_desc = sg_list;
|
|
|
|
struct nvme_sgl_desc *link = &old_sg_desc[i - 1];
|
|
|
|
|
|
|
|
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
|
|
|
|
if (!sg_list)
|
2021-01-20 16:35:01 +08:00
|
|
|
goto free_sgls;
|
2017-10-17 09:24:20 +08:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
nvme_pci_iod_list(req)[iod->npages++] = sg_list;
|
|
|
|
sg_list[i++] = *link;
|
|
|
|
nvme_pci_sgl_set_seg(link, sgl_dma, entries);
|
|
|
|
}
|
|
|
|
|
|
|
|
nvme_pci_sgl_set_data(&sg_list[i++], sg);
|
|
|
|
sg = sg_next(sg);
|
2018-01-18 05:04:38 +08:00
|
|
|
} while (--entries > 0);
|
2017-10-17 09:24:20 +08:00
|
|
|
|
|
|
|
return BLK_STS_OK;
|
2021-01-20 16:35:01 +08:00
|
|
|
free_sgls:
|
|
|
|
nvme_free_sgls(dev, req);
|
|
|
|
return BLK_STS_RESOURCE;
|
2017-10-17 09:24:20 +08:00
|
|
|
}
|
|
|
|
|
2019-03-05 20:49:34 +08:00
|
|
|
static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
|
|
|
|
struct request *req, struct nvme_rw_command *cmnd,
|
|
|
|
struct bio_vec *bv)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2020-07-17 08:51:37 +08:00
|
|
|
unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
|
|
|
|
unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset;
|
2019-03-05 20:49:34 +08:00
|
|
|
|
|
|
|
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
|
|
|
|
if (dma_mapping_error(dev->dev, iod->first_dma))
|
|
|
|
return BLK_STS_RESOURCE;
|
|
|
|
iod->dma_len = bv->bv_len;
|
|
|
|
|
|
|
|
cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
|
|
|
|
if (bv->bv_len > first_prp_len)
|
|
|
|
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
|
2020-07-03 10:49:24 +08:00
|
|
|
return BLK_STS_OK;
|
2019-03-05 20:49:34 +08:00
|
|
|
}
|
|
|
|
|
2019-03-05 20:54:18 +08:00
|
|
|
static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
|
|
|
|
struct request *req, struct nvme_rw_command *cmnd,
|
|
|
|
struct bio_vec *bv)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
|
|
|
|
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
|
|
|
|
if (dma_mapping_error(dev->dev, iod->first_dma))
|
|
|
|
return BLK_STS_RESOURCE;
|
|
|
|
iod->dma_len = bv->bv_len;
|
|
|
|
|
2019-05-01 00:53:29 +08:00
|
|
|
cmnd->flags = NVME_CMD_SGL_METABUF;
|
2019-03-05 20:54:18 +08:00
|
|
|
cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
|
|
|
|
cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
|
|
|
|
cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
|
2020-07-03 10:49:24 +08:00
|
|
|
return BLK_STS_OK;
|
2019-03-05 20:54:18 +08:00
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:05 +08:00
|
|
|
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
2017-01-13 19:29:12 +08:00
|
|
|
struct nvme_command *cmnd)
|
2015-05-22 17:12:46 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2019-03-05 20:59:02 +08:00
|
|
|
blk_status_t ret = BLK_STS_RESOURCE;
|
2018-01-18 05:04:38 +08:00
|
|
|
int nr_mapped;
|
2015-05-22 17:12:46 +08:00
|
|
|
|
2019-03-05 20:49:34 +08:00
|
|
|
if (blk_rq_nr_phys_segments(req) == 1) {
|
|
|
|
struct bio_vec bv = req_bvec(req);
|
|
|
|
|
|
|
|
if (!is_pci_p2pdma_page(bv.bv_page)) {
|
2020-07-17 08:51:37 +08:00
|
|
|
if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
|
2019-03-05 20:49:34 +08:00
|
|
|
return nvme_setup_prp_simple(dev, req,
|
|
|
|
&cmnd->rw, &bv);
|
2019-03-05 20:54:18 +08:00
|
|
|
|
2021-04-10 02:12:55 +08:00
|
|
|
if (iod->nvmeq->qid && sgl_threshold &&
|
2021-06-10 09:28:25 +08:00
|
|
|
nvme_ctrl_sgl_supported(&dev->ctrl))
|
2019-03-05 20:54:18 +08:00
|
|
|
return nvme_setup_sgl_simple(dev, req,
|
|
|
|
&cmnd->rw, &bv);
|
2019-03-05 20:49:34 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
iod->dma_len = 0;
|
2019-03-05 20:46:58 +08:00
|
|
|
iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
|
|
|
|
if (!iod->sg)
|
|
|
|
return BLK_STS_RESOURCE;
|
2016-12-09 06:20:32 +08:00
|
|
|
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
|
2019-03-05 20:59:02 +08:00
|
|
|
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
|
2015-10-16 13:58:38 +08:00
|
|
|
if (!iod->nents)
|
2021-01-20 16:35:01 +08:00
|
|
|
goto out_free_sg;
|
2015-05-22 17:12:46 +08:00
|
|
|
|
2018-10-05 05:27:44 +08:00
|
|
|
if (is_pci_p2pdma_page(sg_page(iod->sg)))
|
2019-08-13 01:30:42 +08:00
|
|
|
nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
|
|
|
|
iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
|
2018-10-05 05:27:44 +08:00
|
|
|
else
|
|
|
|
nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
|
2019-03-05 20:59:02 +08:00
|
|
|
rq_dma_dir(req), DMA_ATTR_NO_WARN);
|
2018-01-18 05:04:38 +08:00
|
|
|
if (!nr_mapped)
|
2021-01-20 16:35:01 +08:00
|
|
|
goto out_free_sg;
|
2015-05-22 17:12:46 +08:00
|
|
|
|
2019-03-05 20:59:02 +08:00
|
|
|
iod->use_sgl = nvme_pci_use_sgls(dev, req);
|
2017-12-20 15:30:50 +08:00
|
|
|
if (iod->use_sgl)
|
2018-01-18 05:04:38 +08:00
|
|
|
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
|
2017-10-17 09:24:20 +08:00
|
|
|
else
|
|
|
|
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
|
2017-07-13 03:59:07 +08:00
|
|
|
if (ret != BLK_STS_OK)
|
2021-01-20 16:35:01 +08:00
|
|
|
goto out_unmap_sg;
|
|
|
|
return BLK_STS_OK;
|
|
|
|
|
|
|
|
out_unmap_sg:
|
|
|
|
nvme_unmap_sg(dev, req);
|
|
|
|
out_free_sg:
|
|
|
|
mempool_free(iod->sg, dev->iod_mempool);
|
2019-03-04 00:46:28 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2018-10-18 02:34:15 +08:00
|
|
|
|
2019-03-04 00:46:28 +08:00
|
|
|
static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
|
|
|
|
struct nvme_command *cmnd)
|
|
|
|
{
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2011-02-23 03:18:30 +08:00
|
|
|
|
2019-03-04 00:46:28 +08:00
|
|
|
iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
|
|
|
|
rq_dma_dir(req), 0);
|
|
|
|
if (dma_mapping_error(dev->dev, iod->meta_dma))
|
|
|
|
return BLK_STS_IOERR;
|
|
|
|
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
|
2020-07-03 10:49:24 +08:00
|
|
|
return BLK_STS_OK;
|
2011-02-23 03:18:30 +08:00
|
|
|
}
|
|
|
|
|
2021-10-30 04:34:11 +08:00
|
|
|
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
|
2014-04-04 06:45:23 +08:00
|
|
|
{
|
2019-03-03 23:04:01 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2017-06-13 00:36:32 +08:00
|
|
|
blk_status_t ret;
|
NVMe: Metadata format support
Adds support for NVMe metadata formats and exposes block devices for
all namespaces regardless of their format. Namespace formats that are
unusable will have disk capacity set to 0, but a handle to the block
device is created to simplify device management. A namespace is not
usable when the format requires host interleave block and metadata in
single buffer, has no provisioned storage, or has better data but failed
to register with blk integrity.
The namespace has to be scanned in two phases to support separate
metadata formats. The first establishes the sector size and capacity
prior to invoking add_disk. If metadata is required, the capacity will
be temporarilly set to 0 until it can be revalidated and registered with
the integrity extenstions after add_disk completes.
The driver relies on the integrity extensions to provide the metadata
buffer. NVMe requires this be a single physically contiguous region,
so only one integrity segment is allowed per command. If the metadata
is used for T10 PI, the driver provides mappings to save and restore
the reftag physical block translation. The driver provides no-op
functions for generate and verify if metadata is not used for protection
information. This way the setup is always provided by the block layer.
If a request does not supply a required metadata buffer, the command
is failed with bad address. This could only happen if a user manually
disables verify/generate on such a disk. The only exception to where
this is okay is if the controller is capable of stripping/generating
the metadata, which is possible on some types of formats.
The metadata scatter gather list now occupies the spot in the nvme_iod
that used to be used to link retryable IOD's, but we don't do that
anymore, so the field was unused.
Signed-off-by: Keith Busch <keith.busch@intel.com>
2015-02-20 04:39:03 +08:00
|
|
|
|
2019-03-03 23:04:01 +08:00
|
|
|
iod->aborted = 0;
|
|
|
|
iod->npages = -1;
|
|
|
|
iod->nents = 0;
|
|
|
|
|
2021-10-30 04:34:11 +08:00
|
|
|
ret = nvme_setup_cmd(req->q->queuedata, req);
|
2017-06-03 15:38:05 +08:00
|
|
|
if (ret)
|
2015-11-28 22:43:10 +08:00
|
|
|
return ret;
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2017-06-03 15:38:05 +08:00
|
|
|
if (blk_rq_nr_phys_segments(req)) {
|
2021-10-30 04:34:11 +08:00
|
|
|
ret = nvme_map_data(dev, req, &iod->cmd);
|
2017-06-03 15:38:05 +08:00
|
|
|
if (ret)
|
2019-03-03 23:04:01 +08:00
|
|
|
goto out_free_cmd;
|
2017-06-03 15:38:05 +08:00
|
|
|
}
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2019-03-04 00:46:28 +08:00
|
|
|
if (blk_integrity_rq(req)) {
|
2021-10-30 04:34:11 +08:00
|
|
|
ret = nvme_map_metadata(dev, req, &iod->cmd);
|
2019-03-04 00:46:28 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_unmap_data;
|
|
|
|
}
|
|
|
|
|
2015-11-26 19:59:50 +08:00
|
|
|
blk_mq_start_request(req);
|
2017-06-03 15:38:05 +08:00
|
|
|
return BLK_STS_OK;
|
2019-03-04 00:46:28 +08:00
|
|
|
out_unmap_data:
|
|
|
|
nvme_unmap_data(dev, req);
|
2016-12-09 06:20:32 +08:00
|
|
|
out_free_cmd:
|
|
|
|
nvme_cleanup_cmd(req);
|
2015-10-16 13:58:38 +08:00
|
|
|
return ret;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
NVMe: Metadata format support
Adds support for NVMe metadata formats and exposes block devices for
all namespaces regardless of their format. Namespace formats that are
unusable will have disk capacity set to 0, but a handle to the block
device is created to simplify device management. A namespace is not
usable when the format requires host interleave block and metadata in
single buffer, has no provisioned storage, or has better data but failed
to register with blk integrity.
The namespace has to be scanned in two phases to support separate
metadata formats. The first establishes the sector size and capacity
prior to invoking add_disk. If metadata is required, the capacity will
be temporarilly set to 0 until it can be revalidated and registered with
the integrity extenstions after add_disk completes.
The driver relies on the integrity extensions to provide the metadata
buffer. NVMe requires this be a single physically contiguous region,
so only one integrity segment is allowed per command. If the metadata
is used for T10 PI, the driver provides mappings to save and restore
the reftag physical block translation. The driver provides no-op
functions for generate and verify if metadata is not used for protection
information. This way the setup is always provided by the block layer.
If a request does not supply a required metadata buffer, the command
is failed with bad address. This could only happen if a user manually
disables verify/generate on such a disk. The only exception to where
this is okay is if the controller is capable of stripping/generating
the metadata, which is possible on some types of formats.
The metadata scatter gather list now occupies the spot in the nvme_iod
that used to be used to link retryable IOD's, but we don't do that
anymore, so the field was unused.
Signed-off-by: Keith Busch <keith.busch@intel.com>
2015-02-20 04:39:03 +08:00
|
|
|
|
2021-10-30 04:34:11 +08:00
|
|
|
/*
|
|
|
|
* NOTE: ns is NULL when called on the admin queue.
|
|
|
|
*/
|
|
|
|
static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
|
const struct blk_mq_queue_data *bd)
|
|
|
|
{
|
|
|
|
struct nvme_queue *nvmeq = hctx->driver_data;
|
|
|
|
struct nvme_dev *dev = nvmeq->dev;
|
|
|
|
struct request *req = bd->rq;
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
blk_status_t ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We should not need to do this, but we're still using this to
|
|
|
|
* ensure we can drain requests on a dying queue.
|
|
|
|
*/
|
|
|
|
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
|
|
|
|
return BLK_STS_IOERR;
|
|
|
|
|
|
|
|
if (unlikely(!nvme_check_ready(&dev->ctrl, req, true)))
|
|
|
|
return nvme_fail_nonready_command(&dev->ctrl, req);
|
|
|
|
|
|
|
|
ret = nvme_prep_rq(dev, req);
|
|
|
|
if (unlikely(ret))
|
|
|
|
return ret;
|
|
|
|
spin_lock(&nvmeq->sq_lock);
|
|
|
|
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
|
|
|
|
nvme_write_sq_db(nvmeq, bd->last);
|
|
|
|
spin_unlock(&nvmeq->sq_lock);
|
|
|
|
return BLK_STS_OK;
|
|
|
|
}
|
|
|
|
|
2021-11-18 23:37:30 +08:00
|
|
|
static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
|
|
|
|
{
|
|
|
|
spin_lock(&nvmeq->sq_lock);
|
|
|
|
while (!rq_list_empty(*rqlist)) {
|
|
|
|
struct request *req = rq_list_pop(rqlist);
|
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
|
|
|
|
nvme_sq_copy_cmd(nvmeq, &iod->cmd);
|
|
|
|
}
|
|
|
|
nvme_write_sq_db(nvmeq, true);
|
|
|
|
spin_unlock(&nvmeq->sq_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* We should not need to do this, but we're still using this to
|
|
|
|
* ensure we can drain requests on a dying queue.
|
|
|
|
*/
|
|
|
|
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
|
|
|
|
return false;
|
|
|
|
if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
req->mq_hctx->tags->rqs[req->tag] = req;
|
|
|
|
return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_queue_rqs(struct request **rqlist)
|
|
|
|
{
|
2022-01-06 01:05:18 +08:00
|
|
|
struct request *req, *next, *prev = NULL;
|
2021-11-18 23:37:30 +08:00
|
|
|
struct request *requeue_list = NULL;
|
|
|
|
|
2022-01-06 01:05:18 +08:00
|
|
|
rq_list_for_each_safe(rqlist, req, next) {
|
2021-11-18 23:37:30 +08:00
|
|
|
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
|
|
|
|
|
|
|
if (!nvme_prep_rq_batch(nvmeq, req)) {
|
|
|
|
/* detach 'req' and add to remainder list */
|
2022-01-06 01:05:18 +08:00
|
|
|
rq_list_move(rqlist, &requeue_list, req, prev);
|
|
|
|
|
|
|
|
req = prev;
|
|
|
|
if (!req)
|
|
|
|
continue;
|
2021-11-18 23:37:30 +08:00
|
|
|
}
|
|
|
|
|
2022-01-06 01:05:18 +08:00
|
|
|
if (!next || req->mq_hctx != next->mq_hctx) {
|
2021-11-18 23:37:30 +08:00
|
|
|
/* detach rest of list, and submit */
|
2022-01-06 01:05:18 +08:00
|
|
|
req->rq_next = NULL;
|
2021-11-18 23:37:30 +08:00
|
|
|
nvme_submit_cmds(nvmeq, rqlist);
|
2022-01-06 01:05:18 +08:00
|
|
|
*rqlist = next;
|
|
|
|
prev = NULL;
|
|
|
|
} else
|
|
|
|
prev = req;
|
|
|
|
}
|
2021-11-18 23:37:30 +08:00
|
|
|
|
|
|
|
*rqlist = requeue_list;
|
|
|
|
}
|
|
|
|
|
2021-10-08 19:59:37 +08:00
|
|
|
static __always_inline void nvme_pci_unmap_rq(struct request *req)
|
2015-11-26 20:03:13 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
2019-03-04 00:46:28 +08:00
|
|
|
struct nvme_dev *dev = iod->nvmeq->dev;
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2019-03-04 00:46:28 +08:00
|
|
|
if (blk_integrity_rq(req))
|
|
|
|
dma_unmap_page(dev->dev, iod->meta_dma,
|
|
|
|
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
|
2019-03-03 23:52:21 +08:00
|
|
|
if (blk_rq_nr_phys_segments(req))
|
2019-03-04 00:46:28 +08:00
|
|
|
nvme_unmap_data(dev, req);
|
2021-10-08 19:59:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_pci_complete_rq(struct request *req)
|
|
|
|
{
|
|
|
|
nvme_pci_unmap_rq(req);
|
2017-03-30 19:41:32 +08:00
|
|
|
nvme_complete_rq(req);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2021-10-08 19:59:37 +08:00
|
|
|
static void nvme_pci_complete_batch(struct io_comp_batch *iob)
|
|
|
|
{
|
|
|
|
nvme_complete_batch(iob, nvme_pci_unmap_rq);
|
|
|
|
}
|
|
|
|
|
2016-03-22 23:02:06 +08:00
|
|
|
/* We read the CQE phase first to check if the rest of the entry is valid */
|
2018-05-18 22:37:04 +08:00
|
|
|
static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
|
2016-03-22 23:02:06 +08:00
|
|
|
{
|
2020-04-28 22:21:56 +08:00
|
|
|
struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head];
|
|
|
|
|
|
|
|
return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase;
|
2016-03-22 23:02:06 +08:00
|
|
|
}
|
|
|
|
|
2017-06-18 22:28:07 +08:00
|
|
|
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2017-06-18 22:28:07 +08:00
|
|
|
u16 head = nvmeq->cq_head;
|
2015-11-28 22:42:28 +08:00
|
|
|
|
2018-06-06 22:13:05 +08:00
|
|
|
if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
|
|
|
|
nvmeq->dbbuf_cq_ei))
|
|
|
|
writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
|
2017-06-18 22:28:07 +08:00
|
|
|
}
|
2015-11-26 19:59:50 +08:00
|
|
|
|
2020-01-31 02:40:24 +08:00
|
|
|
static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
|
|
|
|
{
|
|
|
|
if (!nvmeq->qid)
|
|
|
|
return nvmeq->dev->admin_tagset.tags[0];
|
|
|
|
return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
|
|
|
|
}
|
|
|
|
|
2021-10-08 19:59:37 +08:00
|
|
|
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
|
|
|
|
struct io_comp_batch *iob, u16 idx)
|
2017-06-18 22:28:08 +08:00
|
|
|
{
|
2020-04-28 22:21:56 +08:00
|
|
|
struct nvme_completion *cqe = &nvmeq->cqes[idx];
|
2020-12-24 06:09:00 +08:00
|
|
|
__u16 command_id = READ_ONCE(cqe->command_id);
|
2017-06-18 22:28:08 +08:00
|
|
|
struct request *req;
|
2015-11-28 22:42:28 +08:00
|
|
|
|
2017-06-18 22:28:08 +08:00
|
|
|
/*
|
|
|
|
* AEN requests are special as they don't time out and can
|
|
|
|
* survive any kind of queue freeze and often don't respond to
|
|
|
|
* aborts. We don't even bother to allocate a struct request
|
|
|
|
* for them but rather special case them here.
|
|
|
|
*/
|
2020-12-24 06:09:00 +08:00
|
|
|
if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) {
|
2017-06-18 22:28:08 +08:00
|
|
|
nvme_complete_async_event(&nvmeq->dev->ctrl,
|
|
|
|
cqe->status, &cqe->result);
|
2015-11-04 11:37:26 +08:00
|
|
|
return;
|
2017-06-18 22:28:08 +08:00
|
|
|
}
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2021-06-17 05:19:36 +08:00
|
|
|
req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
|
2020-09-22 14:25:17 +08:00
|
|
|
if (unlikely(!req)) {
|
|
|
|
dev_warn(nvmeq->dev->ctrl.device,
|
|
|
|
"invalid id %d completed on queue %d\n",
|
2020-12-24 06:09:00 +08:00
|
|
|
command_id, le16_to_cpu(cqe->sq_id));
|
2020-09-22 14:25:17 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-12-19 00:59:53 +08:00
|
|
|
trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
|
2021-10-08 19:59:37 +08:00
|
|
|
if (!nvme_try_complete_req(req, cqe->status, cqe->result) &&
|
|
|
|
!blk_mq_add_to_batch(req, iob, nvme_req(req)->status,
|
|
|
|
nvme_pci_complete_batch))
|
2020-06-11 14:44:52 +08:00
|
|
|
nvme_pci_complete_rq(req);
|
2017-06-18 22:28:08 +08:00
|
|
|
}
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-05-18 00:31:50 +08:00
|
|
|
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
|
|
|
|
{
|
2021-06-17 14:02:17 +08:00
|
|
|
u32 tmp = nvmeq->cq_head + 1;
|
2020-05-08 04:07:04 +08:00
|
|
|
|
|
|
|
if (tmp == nvmeq->q_depth) {
|
2018-05-18 00:31:50 +08:00
|
|
|
nvmeq->cq_head = 0;
|
2020-02-29 02:45:19 +08:00
|
|
|
nvmeq->cq_phase ^= 1;
|
2020-05-08 04:07:04 +08:00
|
|
|
} else {
|
|
|
|
nvmeq->cq_head = tmp;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
2015-11-04 11:37:26 +08:00
|
|
|
}
|
|
|
|
|
2021-10-08 19:59:37 +08:00
|
|
|
static inline int nvme_poll_cq(struct nvme_queue *nvmeq,
|
|
|
|
struct io_comp_batch *iob)
|
2015-11-04 11:37:26 +08:00
|
|
|
{
|
2018-11-26 23:21:49 +08:00
|
|
|
int found = 0;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-11-26 23:21:49 +08:00
|
|
|
while (nvme_cqe_pending(nvmeq)) {
|
2020-03-03 00:45:04 +08:00
|
|
|
found++;
|
2020-05-09 04:04:06 +08:00
|
|
|
/*
|
|
|
|
* load-load control dependency between phase and the rest of
|
|
|
|
* the cqe requires a full read memory barrier
|
|
|
|
*/
|
|
|
|
dma_rmb();
|
2021-10-08 19:59:37 +08:00
|
|
|
nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head);
|
2018-05-18 00:31:50 +08:00
|
|
|
nvme_update_cq_head(nvmeq);
|
2017-06-18 22:28:09 +08:00
|
|
|
}
|
2017-06-18 22:28:07 +08:00
|
|
|
|
2020-03-03 00:56:53 +08:00
|
|
|
if (found)
|
2017-06-18 22:28:09 +08:00
|
|
|
nvme_ring_cq_doorbell(nvmeq);
|
2018-05-18 00:31:50 +08:00
|
|
|
return found;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t nvme_irq(int irq, void *data)
|
2011-02-06 20:28:06 +08:00
|
|
|
{
|
|
|
|
struct nvme_queue *nvmeq = data;
|
nvme: wire up completion batching for the IRQ path
Trivial to do now, just need our own io_comp_batch on the stack and pass
that in to the usual command completion handling.
I pondered making this dependent on how many entries we had to process,
but even for a single entry there's no discernable difference in
performance or latency. Running a sync workload over io_uring:
t/io_uring -b512 -d1 -s1 -c1 -p0 -F1 -B1 -n2 /dev/nvme1n1 /dev/nvme2n1
yields the below performance before the patch:
IOPS=254820, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251174, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=250806, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
and the following after:
IOPS=255972, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251920, BW=123MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251794, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
which definitely isn't slower, about the same if you factor in a bit of
variance. For peak performance workloads, benchmarking shows a 2%
improvement.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-10-18 22:45:39 +08:00
|
|
|
DEFINE_IO_COMP_BATCH(iob);
|
2018-05-18 00:31:50 +08:00
|
|
|
|
nvme: wire up completion batching for the IRQ path
Trivial to do now, just need our own io_comp_batch on the stack and pass
that in to the usual command completion handling.
I pondered making this dependent on how many entries we had to process,
but even for a single entry there's no discernable difference in
performance or latency. Running a sync workload over io_uring:
t/io_uring -b512 -d1 -s1 -c1 -p0 -F1 -B1 -n2 /dev/nvme1n1 /dev/nvme2n1
yields the below performance before the patch:
IOPS=254820, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251174, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=250806, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
and the following after:
IOPS=255972, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251920, BW=123MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251794, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
which definitely isn't slower, about the same if you factor in a bit of
variance. For peak performance workloads, benchmarking shows a 2%
improvement.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-10-18 22:45:39 +08:00
|
|
|
if (nvme_poll_cq(nvmeq, &iob)) {
|
|
|
|
if (!rq_list_empty(iob.req_list))
|
|
|
|
nvme_pci_complete_batch(&iob);
|
2021-02-24 04:47:41 +08:00
|
|
|
return IRQ_HANDLED;
|
nvme: wire up completion batching for the IRQ path
Trivial to do now, just need our own io_comp_batch on the stack and pass
that in to the usual command completion handling.
I pondered making this dependent on how many entries we had to process,
but even for a single entry there's no discernable difference in
performance or latency. Running a sync workload over io_uring:
t/io_uring -b512 -d1 -s1 -c1 -p0 -F1 -B1 -n2 /dev/nvme1n1 /dev/nvme2n1
yields the below performance before the patch:
IOPS=254820, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251174, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=250806, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
and the following after:
IOPS=255972, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251920, BW=123MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251794, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
which definitely isn't slower, about the same if you factor in a bit of
variance. For peak performance workloads, benchmarking shows a 2%
improvement.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2021-10-18 22:45:39 +08:00
|
|
|
}
|
2021-02-24 04:47:41 +08:00
|
|
|
return IRQ_NONE;
|
2011-02-06 20:28:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static irqreturn_t nvme_irq_check(int irq, void *data)
|
|
|
|
{
|
|
|
|
struct nvme_queue *nvmeq = data;
|
2020-07-03 10:49:21 +08:00
|
|
|
|
2018-05-18 22:37:04 +08:00
|
|
|
if (nvme_cqe_pending(nvmeq))
|
2016-03-22 23:02:06 +08:00
|
|
|
return IRQ_WAKE_THREAD;
|
|
|
|
return IRQ_NONE;
|
2011-02-06 20:28:06 +08:00
|
|
|
}
|
|
|
|
|
2018-12-03 00:46:20 +08:00
|
|
|
/*
|
2020-03-05 01:17:01 +08:00
|
|
|
* Poll for completions for any interrupt driven queue
|
2018-12-03 00:46:20 +08:00
|
|
|
* Can be called from any context.
|
|
|
|
*/
|
2020-03-05 01:17:01 +08:00
|
|
|
static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
|
2015-11-04 11:37:26 +08:00
|
|
|
{
|
2018-12-03 00:46:23 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
|
2015-11-04 11:37:26 +08:00
|
|
|
|
2020-03-05 01:17:01 +08:00
|
|
|
WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
|
2017-06-18 22:28:10 +08:00
|
|
|
|
2020-03-05 01:17:01 +08:00
|
|
|
disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
|
2021-10-08 19:59:37 +08:00
|
|
|
nvme_poll_cq(nvmeq, NULL);
|
2020-03-05 01:17:01 +08:00
|
|
|
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
|
2015-11-04 11:37:26 +08:00
|
|
|
}
|
|
|
|
|
2021-10-12 23:24:29 +08:00
|
|
|
static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
|
2018-11-15 00:38:28 +08:00
|
|
|
{
|
|
|
|
struct nvme_queue *nvmeq = hctx->driver_data;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
if (!nvme_cqe_pending(nvmeq))
|
|
|
|
return 0;
|
|
|
|
|
2018-12-03 00:46:23 +08:00
|
|
|
spin_lock(&nvmeq->cq_poll_lock);
|
2021-10-08 19:59:37 +08:00
|
|
|
found = nvme_poll_cq(nvmeq, iob);
|
2018-12-03 00:46:23 +08:00
|
|
|
spin_unlock(&nvmeq->cq_poll_lock);
|
2018-11-15 00:38:28 +08:00
|
|
|
|
|
|
|
return found;
|
|
|
|
}
|
|
|
|
|
2017-11-08 06:13:12 +08:00
|
|
|
static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2016-04-26 19:52:00 +08:00
|
|
|
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[0];
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
c.common.opcode = nvme_admin_async_event;
|
2017-11-08 06:13:12 +08:00
|
|
|
c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
|
2021-10-30 04:32:44 +08:00
|
|
|
|
|
|
|
spin_lock(&nvmeq->sq_lock);
|
|
|
|
nvme_sq_copy_cmd(nvmeq, &c);
|
|
|
|
nvme_write_sq_db(nvmeq, true);
|
|
|
|
spin_unlock(&nvmeq->sq_lock);
|
2015-05-22 17:12:38 +08:00
|
|
|
}
|
|
|
|
|
2011-01-21 01:50:14 +08:00
|
|
|
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
|
2015-05-22 17:12:38 +08:00
|
|
|
{
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2011-01-21 01:50:14 +08:00
|
|
|
|
|
|
|
c.delete_queue.opcode = opcode;
|
|
|
|
c.delete_queue.qid = cpu_to_le16(id);
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
|
2018-05-24 17:51:33 +08:00
|
|
|
struct nvme_queue *nvmeq, s16 vector)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2018-11-06 03:44:33 +08:00
|
|
|
int flags = NVME_QUEUE_PHYS_CONTIG;
|
|
|
|
|
2019-03-09 01:43:06 +08:00
|
|
|
if (!test_bit(NVMEQ_POLLED, &nvmeq->flags))
|
2018-11-06 03:44:33 +08:00
|
|
|
flags |= NVME_CQ_IRQ_ENABLED;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2015-05-22 17:12:46 +08:00
|
|
|
/*
|
2017-10-18 21:56:09 +08:00
|
|
|
* Note: we (ab)use the fact that the prp fields survive if no data
|
2015-05-22 17:12:46 +08:00
|
|
|
* is attached to the request.
|
|
|
|
*/
|
2011-01-21 01:50:14 +08:00
|
|
|
c.create_cq.opcode = nvme_admin_create_cq;
|
|
|
|
c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
|
|
|
|
c.create_cq.cqid = cpu_to_le16(qid);
|
|
|
|
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
|
|
|
|
c.create_cq.cq_flags = cpu_to_le16(flags);
|
2019-03-09 01:43:06 +08:00
|
|
|
c.create_cq.irq_vector = cpu_to_le16(vector);
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
|
|
|
|
struct nvme_queue *nvmeq)
|
|
|
|
{
|
2018-05-09 00:25:15 +08:00
|
|
|
struct nvme_ctrl *ctrl = &dev->ctrl;
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2017-04-05 06:18:12 +08:00
|
|
|
int flags = NVME_QUEUE_PHYS_CONTIG;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-05-09 00:25:15 +08:00
|
|
|
/*
|
|
|
|
* Some drives have a bug that auto-enables WRRU if MEDIUM isn't
|
|
|
|
* set. Since URGENT priority is zeroes, it makes all queues
|
|
|
|
* URGENT.
|
|
|
|
*/
|
|
|
|
if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ)
|
|
|
|
flags |= NVME_SQ_PRIO_MEDIUM;
|
|
|
|
|
2015-05-22 17:12:46 +08:00
|
|
|
/*
|
2017-10-18 21:56:09 +08:00
|
|
|
* Note: we (ab)use the fact that the prp fields survive if no data
|
2015-05-22 17:12:46 +08:00
|
|
|
* is attached to the request.
|
|
|
|
*/
|
2011-01-21 01:50:14 +08:00
|
|
|
c.create_sq.opcode = nvme_admin_create_sq;
|
|
|
|
c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
|
|
|
|
c.create_sq.sqid = cpu_to_le16(qid);
|
|
|
|
c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
|
|
|
|
c.create_sq.sq_flags = cpu_to_le16(flags);
|
|
|
|
c.create_sq.cqid = cpu_to_le16(qid);
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
|
|
|
|
{
|
|
|
|
return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
|
|
|
|
{
|
|
|
|
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
|
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:04 +08:00
|
|
|
static void abort_endio(struct request *req, blk_status_t error)
|
2011-09-20 05:08:14 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
struct nvme_queue *nvmeq = iod->nvmeq;
|
2015-06-28 02:20:34 +08:00
|
|
|
|
2017-04-20 22:02:57 +08:00
|
|
|
dev_warn(nvmeq->dev->ctrl.device,
|
|
|
|
"Abort status: 0x%x", nvme_req(req)->status);
|
2015-11-16 17:39:48 +08:00
|
|
|
atomic_inc(&nvmeq->dev->ctrl.abort_limit);
|
|
|
|
blk_mq_free_request(req);
|
2011-09-20 05:08:14 +08:00
|
|
|
}
|
|
|
|
|
2017-06-08 02:32:50 +08:00
|
|
|
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
|
|
|
|
{
|
|
|
|
/* If true, indicates loss of adapter communication, possibly by a
|
|
|
|
* NVMe Subsystem reset.
|
|
|
|
*/
|
|
|
|
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
|
|
|
|
|
2018-01-22 22:03:16 +08:00
|
|
|
/* If there is a reset/reinit ongoing, we shouldn't reset again. */
|
|
|
|
switch (dev->ctrl.state) {
|
|
|
|
case NVME_CTRL_RESETTING:
|
2018-02-01 00:31:24 +08:00
|
|
|
case NVME_CTRL_CONNECTING:
|
2017-06-08 02:32:50 +08:00
|
|
|
return false;
|
2018-01-22 22:03:16 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2017-06-08 02:32:50 +08:00
|
|
|
|
|
|
|
/* We shouldn't reset unless the controller is on fatal error state
|
|
|
|
* _or_ if we lost the communication with it.
|
|
|
|
*/
|
|
|
|
if (!(csts & NVME_CSTS_CFS) && !nssro)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
|
|
|
|
{
|
|
|
|
/* Read a config register to help see what died. */
|
|
|
|
u16 pci_status;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
|
|
|
|
&pci_status);
|
|
|
|
if (result == PCIBIOS_SUCCESSFUL)
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
|
|
|
|
csts, pci_status);
|
|
|
|
else
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
|
|
|
|
csts, result);
|
|
|
|
}
|
|
|
|
|
2015-10-22 20:03:35 +08:00
|
|
|
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
2013-12-11 04:10:38 +08:00
|
|
|
{
|
2015-11-28 22:43:10 +08:00
|
|
|
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
|
|
|
struct nvme_queue *nvmeq = iod->nvmeq;
|
2013-12-11 04:10:38 +08:00
|
|
|
struct nvme_dev *dev = nvmeq->dev;
|
2014-11-04 23:20:14 +08:00
|
|
|
struct request *abort_req;
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command cmd = { };
|
2017-06-08 02:32:50 +08:00
|
|
|
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
|
|
|
|
2018-02-16 04:05:10 +08:00
|
|
|
/* If PCI error recovery process is happening, we cannot reset or
|
|
|
|
* the recovery mechanism will surely fail.
|
|
|
|
*/
|
|
|
|
mb();
|
|
|
|
if (pci_channel_offline(to_pci_dev(dev->dev)))
|
|
|
|
return BLK_EH_RESET_TIMER;
|
|
|
|
|
2017-06-08 02:32:50 +08:00
|
|
|
/*
|
|
|
|
* Reset immediately if the controller is failed
|
|
|
|
*/
|
|
|
|
if (nvme_should_reset(dev, csts)) {
|
|
|
|
nvme_warn_reset(dev, csts);
|
|
|
|
nvme_dev_disable(dev, false);
|
2017-06-15 21:41:08 +08:00
|
|
|
nvme_reset_ctrl(&dev->ctrl);
|
2018-05-29 21:52:30 +08:00
|
|
|
return BLK_EH_DONE;
|
2017-06-08 02:32:50 +08:00
|
|
|
}
|
2013-12-11 04:10:38 +08:00
|
|
|
|
2017-02-25 06:59:28 +08:00
|
|
|
/*
|
|
|
|
* Did we miss an interrupt?
|
|
|
|
*/
|
2020-03-05 01:17:01 +08:00
|
|
|
if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
|
2021-10-12 23:24:29 +08:00
|
|
|
nvme_poll(req->mq_hctx, NULL);
|
2020-03-05 01:17:01 +08:00
|
|
|
else
|
|
|
|
nvme_poll_irqdisable(nvmeq);
|
|
|
|
|
2020-03-03 00:45:04 +08:00
|
|
|
if (blk_mq_request_completed(req)) {
|
2017-02-25 06:59:28 +08:00
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"I/O %d QID %d timeout, completion polled\n",
|
|
|
|
req->tag, nvmeq->qid);
|
2018-05-29 21:52:30 +08:00
|
|
|
return BLK_EH_DONE;
|
2017-02-25 06:59:28 +08:00
|
|
|
}
|
|
|
|
|
2015-10-22 20:03:35 +08:00
|
|
|
/*
|
2015-11-26 19:42:26 +08:00
|
|
|
* Shutdown immediately if controller times out while starting. The
|
|
|
|
* reset work will see the pci device disabled when it gets the forced
|
|
|
|
* cancellation error. All outstanding requests are completed on
|
2018-05-29 21:52:30 +08:00
|
|
|
* shutdown, so we return BLK_EH_DONE.
|
2015-11-26 19:42:26 +08:00
|
|
|
*/
|
2018-02-08 23:55:34 +08:00
|
|
|
switch (dev->ctrl.state) {
|
|
|
|
case NVME_CTRL_CONNECTING:
|
2019-05-15 04:27:53 +08:00
|
|
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
2020-08-24 06:36:59 +08:00
|
|
|
fallthrough;
|
2019-05-15 04:27:53 +08:00
|
|
|
case NVME_CTRL_DELETING:
|
2018-05-25 04:34:55 +08:00
|
|
|
dev_warn_ratelimited(dev->ctrl.device,
|
2015-11-26 19:42:26 +08:00
|
|
|
"I/O %d QID %d timeout, disable controller\n",
|
|
|
|
req->tag, nvmeq->qid);
|
2017-04-20 22:02:57 +08:00
|
|
|
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
|
2020-08-28 22:17:08 +08:00
|
|
|
nvme_dev_disable(dev, true);
|
2018-05-29 21:52:30 +08:00
|
|
|
return BLK_EH_DONE;
|
2019-05-15 04:10:41 +08:00
|
|
|
case NVME_CTRL_RESETTING:
|
|
|
|
return BLK_EH_RESET_TIMER;
|
2018-02-08 23:55:34 +08:00
|
|
|
default:
|
|
|
|
break;
|
2013-12-11 04:10:38 +08:00
|
|
|
}
|
|
|
|
|
2015-11-26 19:42:26 +08:00
|
|
|
/*
|
2020-07-03 10:49:20 +08:00
|
|
|
* Shutdown the controller immediately and schedule a reset if the
|
|
|
|
* command was already aborted once before and still hasn't been
|
|
|
|
* returned to the driver, or if this is the admin queue.
|
2015-10-22 20:03:35 +08:00
|
|
|
*/
|
2015-11-28 22:43:10 +08:00
|
|
|
if (!nvmeq->qid || iod->aborted) {
|
2016-02-10 23:51:15 +08:00
|
|
|
dev_warn(dev->ctrl.device,
|
2015-11-26 19:11:07 +08:00
|
|
|
"I/O %d QID %d timeout, reset controller\n",
|
|
|
|
req->tag, nvmeq->qid);
|
2020-08-28 22:17:08 +08:00
|
|
|
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
|
2016-01-13 05:41:18 +08:00
|
|
|
nvme_dev_disable(dev, false);
|
2017-06-15 21:41:08 +08:00
|
|
|
nvme_reset_ctrl(&dev->ctrl);
|
2013-12-11 04:10:38 +08:00
|
|
|
|
2018-05-29 21:52:30 +08:00
|
|
|
return BLK_EH_DONE;
|
2013-12-11 04:10:38 +08:00
|
|
|
}
|
|
|
|
|
2015-11-16 17:39:48 +08:00
|
|
|
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
|
2015-11-20 16:36:44 +08:00
|
|
|
atomic_inc(&dev->ctrl.abort_limit);
|
2015-10-22 20:03:35 +08:00
|
|
|
return BLK_EH_RESET_TIMER;
|
2015-11-20 16:36:44 +08:00
|
|
|
}
|
2017-01-25 07:07:00 +08:00
|
|
|
iod->aborted = 1;
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2013-12-11 04:10:38 +08:00
|
|
|
cmd.abort.opcode = nvme_admin_abort_cmd;
|
2021-10-07 14:50:31 +08:00
|
|
|
cmd.abort.cid = nvme_cid(req);
|
2013-12-11 04:10:38 +08:00
|
|
|
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
|
|
|
|
|
2016-02-10 23:51:15 +08:00
|
|
|
dev_warn(nvmeq->dev->ctrl.device,
|
|
|
|
"I/O %d QID %d timeout, aborting\n",
|
|
|
|
req->tag, nvmeq->qid);
|
2015-11-16 17:39:48 +08:00
|
|
|
|
2022-03-15 22:53:59 +08:00
|
|
|
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
|
|
|
|
BLK_MQ_REQ_NOWAIT);
|
2015-11-16 17:39:48 +08:00
|
|
|
if (IS_ERR(abort_req)) {
|
|
|
|
atomic_inc(&dev->ctrl.abort_limit);
|
|
|
|
return BLK_EH_RESET_TIMER;
|
|
|
|
}
|
2022-03-15 22:53:59 +08:00
|
|
|
nvme_init_request(abort_req, &cmd);
|
2015-11-16 17:39:48 +08:00
|
|
|
|
|
|
|
abort_req->end_io_data = NULL;
|
2022-04-20 06:53:51 +08:00
|
|
|
abort_req->rq_flags |= RQF_QUIET;
|
2021-11-26 20:18:01 +08:00
|
|
|
blk_execute_rq_nowait(abort_req, false, abort_endio);
|
2013-12-11 04:10:38 +08:00
|
|
|
|
2015-10-22 20:03:35 +08:00
|
|
|
/*
|
|
|
|
* The aborted req will be completed on receiving the abort req.
|
|
|
|
* We enable the timer again. If hit twice, it'll cause a device reset,
|
|
|
|
* as the device then is in a faulty state.
|
|
|
|
*/
|
|
|
|
return BLK_EH_RESET_TIMER;
|
2013-12-11 04:10:38 +08:00
|
|
|
}
|
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
static void nvme_free_queue(struct nvme_queue *nvmeq)
|
|
|
|
{
|
2019-08-07 15:51:19 +08:00
|
|
|
dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
|
2012-08-04 01:55:56 +08:00
|
|
|
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
|
2018-12-03 00:46:18 +08:00
|
|
|
if (!nvmeq->sq_cmds)
|
|
|
|
return;
|
2018-10-05 05:27:43 +08:00
|
|
|
|
2018-12-03 00:46:18 +08:00
|
|
|
if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
|
2019-03-09 01:43:11 +08:00
|
|
|
pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
|
2019-08-07 15:51:19 +08:00
|
|
|
nvmeq->sq_cmds, SQ_SIZE(nvmeq));
|
2018-12-03 00:46:18 +08:00
|
|
|
} else {
|
2019-08-07 15:51:19 +08:00
|
|
|
dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
|
2018-12-03 00:46:18 +08:00
|
|
|
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
|
2018-10-05 05:27:43 +08:00
|
|
|
}
|
2012-08-04 01:55:56 +08:00
|
|
|
}
|
|
|
|
|
2013-12-17 02:50:00 +08:00
|
|
|
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
|
2013-07-16 05:02:20 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
|
|
|
|
dev->ctrl.queue_count--;
|
2018-01-14 18:39:01 +08:00
|
|
|
nvme_free_queue(&dev->queues[i]);
|
2015-01-15 12:01:58 +08:00
|
|
|
}
|
2013-07-16 05:02:20 +08:00
|
|
|
}
|
|
|
|
|
2013-12-11 04:10:40 +08:00
|
|
|
/**
|
|
|
|
* nvme_suspend_queue - put queue into suspended state
|
2018-10-09 05:28:43 +08:00
|
|
|
* @nvmeq: queue to suspend
|
2013-12-11 04:10:40 +08:00
|
|
|
*/
|
|
|
|
static int nvme_suspend_queue(struct nvme_queue *nvmeq)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2018-12-03 00:46:17 +08:00
|
|
|
if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
|
2014-12-23 03:59:04 +08:00
|
|
|
return 1;
|
2012-08-08 03:56:23 +08:00
|
|
|
|
2018-12-03 00:46:17 +08:00
|
|
|
/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
|
2018-05-18 00:31:49 +08:00
|
|
|
mb();
|
2012-08-08 03:56:23 +08:00
|
|
|
|
2018-12-03 00:46:17 +08:00
|
|
|
nvmeq->dev->online_queues--;
|
2015-11-26 17:06:56 +08:00
|
|
|
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
|
2021-10-14 16:17:06 +08:00
|
|
|
nvme_stop_admin_queue(&nvmeq->dev->ctrl);
|
2019-03-09 01:43:06 +08:00
|
|
|
if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
|
|
|
|
pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
|
2013-12-11 04:10:40 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2019-01-05 06:04:33 +08:00
|
|
|
static void nvme_suspend_io_queues(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = dev->ctrl.queue_count - 1; i > 0; i--)
|
|
|
|
nvme_suspend_queue(&dev->queues[i]);
|
|
|
|
}
|
|
|
|
|
2016-01-13 05:41:18 +08:00
|
|
|
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
|
2013-12-11 04:10:40 +08:00
|
|
|
{
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[0];
|
2013-12-11 04:10:40 +08:00
|
|
|
|
2016-01-13 05:41:18 +08:00
|
|
|
if (shutdown)
|
|
|
|
nvme_shutdown_ctrl(&dev->ctrl);
|
|
|
|
else
|
2019-07-23 08:06:54 +08:00
|
|
|
nvme_disable_ctrl(&dev->ctrl);
|
2015-02-20 01:34:48 +08:00
|
|
|
|
2020-03-03 00:45:04 +08:00
|
|
|
nvme_poll_irqdisable(nvmeq);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2020-02-13 00:41:05 +08:00
|
|
|
/*
|
|
|
|
* Called only on a device that has been disabled and after all other threads
|
2020-05-28 00:13:52 +08:00
|
|
|
* that can check this device's completion queues have synced, except
|
|
|
|
* nvme_poll(). This is the last chance for the driver to see a natural
|
|
|
|
* completion before nvme_cancel_request() terminates all incomplete requests.
|
2020-02-13 00:41:05 +08:00
|
|
|
*/
|
|
|
|
static void nvme_reap_pending_cqes(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-05-28 00:13:52 +08:00
|
|
|
for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
|
|
|
|
spin_lock(&dev->queues[i].cq_poll_lock);
|
2021-10-08 19:59:37 +08:00
|
|
|
nvme_poll_cq(&dev->queues[i], NULL);
|
2020-05-28 00:13:52 +08:00
|
|
|
spin_unlock(&dev->queues[i].cq_poll_lock);
|
|
|
|
}
|
2020-02-13 00:41:05 +08:00
|
|
|
}
|
|
|
|
|
2015-07-21 00:14:09 +08:00
|
|
|
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
|
|
|
|
int entry_size)
|
|
|
|
{
|
|
|
|
int q_depth = dev->q_depth;
|
2015-11-28 22:03:49 +08:00
|
|
|
unsigned q_size_aligned = roundup(q_depth * entry_size,
|
2020-07-17 08:51:37 +08:00
|
|
|
NVME_CTRL_PAGE_SIZE);
|
2015-07-21 00:14:09 +08:00
|
|
|
|
|
|
|
if (q_size_aligned * nr_io_queues > dev->cmb_size) {
|
2015-07-22 05:08:13 +08:00
|
|
|
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
|
2020-07-03 10:49:21 +08:00
|
|
|
|
2020-07-17 08:51:37 +08:00
|
|
|
mem_per_q = round_down(mem_per_q, NVME_CTRL_PAGE_SIZE);
|
2015-07-22 05:08:13 +08:00
|
|
|
q_depth = div_u64(mem_per_q, entry_size);
|
2015-07-21 00:14:09 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure the reduced q_depth is above some threshold where it
|
|
|
|
* would be better to map queues in system memory with the
|
|
|
|
* original depth
|
|
|
|
*/
|
|
|
|
if (q_depth < 64)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
return q_depth;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
|
2019-08-07 15:51:19 +08:00
|
|
|
int qid)
|
2015-07-21 00:14:09 +08:00
|
|
|
{
|
2018-10-05 05:27:43 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
|
|
|
|
|
|
|
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
|
2019-08-07 15:51:19 +08:00
|
|
|
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
|
2019-07-09 01:05:11 +08:00
|
|
|
if (nvmeq->sq_cmds) {
|
|
|
|
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
|
|
|
|
nvmeq->sq_cmds);
|
|
|
|
if (nvmeq->sq_dma_addr) {
|
|
|
|
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-07 15:51:19 +08:00
|
|
|
pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq));
|
2018-12-03 00:46:18 +08:00
|
|
|
}
|
2018-10-05 05:27:43 +08:00
|
|
|
}
|
2015-07-21 00:14:09 +08:00
|
|
|
|
2019-08-07 15:51:19 +08:00
|
|
|
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
|
2018-12-03 00:46:18 +08:00
|
|
|
&nvmeq->sq_dma_addr, GFP_KERNEL);
|
2018-02-13 20:44:44 +08:00
|
|
|
if (!nvmeq->sq_cmds)
|
|
|
|
return -ENOMEM;
|
2015-07-21 00:14:09 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-12 23:16:09 +08:00
|
|
|
static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *nvmeq = &dev->queues[qid];
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-01-24 00:16:19 +08:00
|
|
|
if (dev->ctrl.queue_count > qid)
|
|
|
|
return 0;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2019-08-07 15:51:20 +08:00
|
|
|
nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES;
|
2019-08-07 15:51:19 +08:00
|
|
|
nvmeq->q_depth = depth;
|
|
|
|
nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
|
cross-tree: phase out dma_zalloc_coherent()
We already need to zero out memory for dma_alloc_coherent(), as such
using dma_zalloc_coherent() is superflous. Phase it out.
This change was generated with the following Coccinelle SmPL patch:
@ replace_dma_zalloc_coherent @
expression dev, size, data, handle, flags;
@@
-dma_zalloc_coherent(dev, size, handle, flags)
+dma_alloc_coherent(dev, size, handle, flags)
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
[hch: re-ran the script on the latest tree]
Signed-off-by: Christoph Hellwig <hch@lst.de>
2019-01-04 16:23:09 +08:00
|
|
|
&nvmeq->cq_dma_addr, GFP_KERNEL);
|
2011-01-21 01:50:14 +08:00
|
|
|
if (!nvmeq->cqes)
|
|
|
|
goto free_nvmeq;
|
|
|
|
|
2019-08-07 15:51:19 +08:00
|
|
|
if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
|
2011-01-21 01:50:14 +08:00
|
|
|
goto free_cqdma;
|
|
|
|
|
2011-02-10 22:56:01 +08:00
|
|
|
nvmeq->dev = dev;
|
2018-05-18 00:31:51 +08:00
|
|
|
spin_lock_init(&nvmeq->sq_lock);
|
2018-12-03 00:46:23 +08:00
|
|
|
spin_lock_init(&nvmeq->cq_poll_lock);
|
2011-01-21 01:50:14 +08:00
|
|
|
nvmeq->cq_head = 0;
|
2011-01-21 02:24:06 +08:00
|
|
|
nvmeq->cq_phase = 1;
|
2013-09-10 11:25:37 +08:00
|
|
|
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
|
2013-12-11 04:10:38 +08:00
|
|
|
nvmeq->qid = qid;
|
2017-04-24 15:58:29 +08:00
|
|
|
dev->ctrl.queue_count++;
|
2015-05-28 02:26:23 +08:00
|
|
|
|
2018-01-14 18:39:01 +08:00
|
|
|
return 0;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
|
|
|
free_cqdma:
|
2019-08-07 15:51:19 +08:00
|
|
|
dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
|
|
|
|
nvmeq->cq_dma_addr);
|
2011-01-21 01:50:14 +08:00
|
|
|
free_nvmeq:
|
2018-01-14 18:39:01 +08:00
|
|
|
return -ENOMEM;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2016-09-14 22:18:57 +08:00
|
|
|
static int queue_request_irq(struct nvme_queue *nvmeq)
|
2011-01-20 22:10:15 +08:00
|
|
|
{
|
2017-04-13 15:06:43 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
|
|
|
|
int nr = nvmeq->dev->ctrl.instance;
|
|
|
|
|
|
|
|
if (use_threaded_interrupts) {
|
|
|
|
return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq_check,
|
|
|
|
nvme_irq, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
|
|
|
|
} else {
|
|
|
|
return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq,
|
|
|
|
NULL, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
|
|
|
|
}
|
2011-01-20 22:10:15 +08:00
|
|
|
}
|
|
|
|
|
2013-07-16 05:02:20 +08:00
|
|
|
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2013-07-16 05:02:20 +08:00
|
|
|
struct nvme_dev *dev = nvmeq->dev;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2013-07-16 05:02:20 +08:00
|
|
|
nvmeq->sq_tail = 0;
|
2020-10-31 01:28:54 +08:00
|
|
|
nvmeq->last_sq_tail = 0;
|
2013-07-16 05:02:20 +08:00
|
|
|
nvmeq->cq_head = 0;
|
|
|
|
nvmeq->cq_phase = 1;
|
2013-09-10 11:25:37 +08:00
|
|
|
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
|
2019-08-07 15:51:19 +08:00
|
|
|
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
|
2017-04-10 23:51:07 +08:00
|
|
|
nvme_dbbuf_init(dev, nvmeq, qid);
|
2014-03-25 00:46:25 +08:00
|
|
|
dev->online_queues++;
|
2018-12-03 00:46:23 +08:00
|
|
|
wmb(); /* ensure the first interrupt sees the initialization */
|
2013-07-16 05:02:20 +08:00
|
|
|
}
|
|
|
|
|
2021-07-08 05:14:31 +08:00
|
|
|
/*
|
|
|
|
* Try getting shutdown_lock while setting up IO queues.
|
|
|
|
*/
|
|
|
|
static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Give up if the lock is being held by nvme_dev_disable.
|
|
|
|
*/
|
|
|
|
if (!mutex_trylock(&dev->shutdown_lock))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Controller is in wrong state, fail early.
|
|
|
|
*/
|
|
|
|
if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
|
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-11-06 03:44:33 +08:00
|
|
|
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
|
2013-07-16 05:02:20 +08:00
|
|
|
{
|
|
|
|
struct nvme_dev *dev = nvmeq->dev;
|
|
|
|
int result;
|
2019-03-09 01:43:06 +08:00
|
|
|
u16 vector = 0;
|
2011-02-01 21:39:04 +08:00
|
|
|
|
2018-12-03 00:46:22 +08:00
|
|
|
clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
|
|
|
|
|
2018-04-12 23:16:10 +08:00
|
|
|
/*
|
|
|
|
* A queue's vector matches the queue identifier unless the controller
|
|
|
|
* has only one vector available.
|
|
|
|
*/
|
2018-11-06 03:44:33 +08:00
|
|
|
if (!polled)
|
|
|
|
vector = dev->num_vecs == 1 ? 0 : qid;
|
|
|
|
else
|
2019-03-09 01:43:06 +08:00
|
|
|
set_bit(NVMEQ_POLLED, &nvmeq->flags);
|
2018-11-06 03:44:33 +08:00
|
|
|
|
2018-05-24 17:51:33 +08:00
|
|
|
result = adapter_alloc_cq(dev, qid, nvmeq, vector);
|
2018-06-06 22:13:06 +08:00
|
|
|
if (result)
|
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
|
|
|
result = adapter_alloc_sq(dev, qid, nvmeq);
|
|
|
|
if (result < 0)
|
2018-06-06 22:13:06 +08:00
|
|
|
return result;
|
2019-11-26 00:06:12 +08:00
|
|
|
if (result)
|
2011-01-21 01:50:14 +08:00
|
|
|
goto release_cq;
|
|
|
|
|
2018-05-24 17:51:33 +08:00
|
|
|
nvmeq->cq_vector = vector;
|
2018-11-06 03:44:33 +08:00
|
|
|
|
2021-07-08 05:14:31 +08:00
|
|
|
result = nvme_setup_io_queues_trylock(dev);
|
|
|
|
if (result)
|
|
|
|
return result;
|
|
|
|
nvme_init_queue(nvmeq, qid);
|
2019-03-09 01:43:06 +08:00
|
|
|
if (!polled) {
|
2018-11-06 03:44:33 +08:00
|
|
|
result = queue_request_irq(nvmeq);
|
|
|
|
if (result < 0)
|
|
|
|
goto release_sq;
|
|
|
|
}
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-12-03 00:46:17 +08:00
|
|
|
set_bit(NVMEQ_ENABLED, &nvmeq->flags);
|
2021-07-08 05:14:31 +08:00
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
2013-07-16 05:02:20 +08:00
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-05-24 17:51:33 +08:00
|
|
|
release_sq:
|
2018-02-15 19:13:41 +08:00
|
|
|
dev->online_queues--;
|
2021-07-08 05:14:31 +08:00
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
2011-01-21 01:50:14 +08:00
|
|
|
adapter_delete_sq(dev, qid);
|
2018-05-24 17:51:33 +08:00
|
|
|
release_cq:
|
2011-01-21 01:50:14 +08:00
|
|
|
adapter_delete_cq(dev, qid);
|
2013-07-16 05:02:20 +08:00
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2017-03-31 04:39:16 +08:00
|
|
|
static const struct blk_mq_ops nvme_mq_admin_ops = {
|
2015-05-22 17:12:46 +08:00
|
|
|
.queue_rq = nvme_queue_rq,
|
2017-03-30 19:41:32 +08:00
|
|
|
.complete = nvme_pci_complete_rq,
|
2014-11-04 23:20:14 +08:00
|
|
|
.init_hctx = nvme_admin_init_hctx,
|
2022-03-15 22:53:59 +08:00
|
|
|
.init_request = nvme_pci_init_request,
|
2014-11-04 23:20:14 +08:00
|
|
|
.timeout = nvme_timeout,
|
|
|
|
};
|
|
|
|
|
2017-03-31 04:39:16 +08:00
|
|
|
static const struct blk_mq_ops nvme_mq_ops = {
|
2018-12-03 00:46:27 +08:00
|
|
|
.queue_rq = nvme_queue_rq,
|
2021-11-18 23:37:30 +08:00
|
|
|
.queue_rqs = nvme_queue_rqs,
|
2018-12-03 00:46:27 +08:00
|
|
|
.complete = nvme_pci_complete_rq,
|
|
|
|
.commit_rqs = nvme_commit_rqs,
|
|
|
|
.init_hctx = nvme_init_hctx,
|
2022-03-15 22:53:59 +08:00
|
|
|
.init_request = nvme_pci_init_request,
|
2018-12-03 00:46:27 +08:00
|
|
|
.map_queues = nvme_pci_map_queues,
|
|
|
|
.timeout = nvme_timeout,
|
|
|
|
.poll = nvme_poll,
|
2018-11-15 00:38:28 +08:00
|
|
|
};
|
|
|
|
|
2015-01-08 09:55:49 +08:00
|
|
|
static void nvme_dev_remove_admin(struct nvme_dev *dev)
|
|
|
|
{
|
2015-11-26 17:06:56 +08:00
|
|
|
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
|
2016-02-25 00:15:56 +08:00
|
|
|
/*
|
|
|
|
* If the controller was reset during removal, it's possible
|
|
|
|
* user requests may be waiting on a stopped queue. Start the
|
|
|
|
* queue to flush these to completion.
|
|
|
|
*/
|
2021-10-14 16:17:06 +08:00
|
|
|
nvme_start_admin_queue(&dev->ctrl);
|
2015-11-26 17:06:56 +08:00
|
|
|
blk_cleanup_queue(dev->ctrl.admin_q);
|
2015-01-08 09:55:49 +08:00
|
|
|
blk_mq_free_tag_set(&dev->admin_tagset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
|
|
|
{
|
2015-11-26 17:06:56 +08:00
|
|
|
if (!dev->ctrl.admin_q) {
|
2014-11-04 23:20:14 +08:00
|
|
|
dev->admin_tagset.ops = &nvme_mq_admin_ops;
|
|
|
|
dev->admin_tagset.nr_hw_queues = 1;
|
2016-01-05 00:10:55 +08:00
|
|
|
|
2017-11-08 06:13:10 +08:00
|
|
|
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
|
2020-11-10 08:33:45 +08:00
|
|
|
dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT;
|
2020-06-16 17:34:23 +08:00
|
|
|
dev->admin_tagset.numa_node = dev->ctrl.numa_node;
|
2019-03-05 20:46:58 +08:00
|
|
|
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
|
2017-01-14 05:43:58 +08:00
|
|
|
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
|
2014-11-04 23:20:14 +08:00
|
|
|
dev->admin_tagset.driver_data = dev;
|
|
|
|
|
|
|
|
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
|
|
|
|
return -ENOMEM;
|
2017-07-10 14:22:29 +08:00
|
|
|
dev->ctrl.admin_tagset = &dev->admin_tagset;
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
|
|
|
|
if (IS_ERR(dev->ctrl.admin_q)) {
|
2014-11-04 23:20:14 +08:00
|
|
|
blk_mq_free_tag_set(&dev->admin_tagset);
|
2022-04-22 22:40:32 +08:00
|
|
|
dev->ctrl.admin_q = NULL;
|
2014-11-04 23:20:14 +08:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2015-11-26 17:06:56 +08:00
|
|
|
if (!blk_get_queue(dev->ctrl.admin_q)) {
|
2015-01-08 09:55:49 +08:00
|
|
|
nvme_dev_remove_admin(dev);
|
2015-11-26 17:06:56 +08:00
|
|
|
dev->ctrl.admin_q = NULL;
|
2015-01-08 09:55:49 +08:00
|
|
|
return -ENODEV;
|
|
|
|
}
|
2015-01-08 09:55:50 +08:00
|
|
|
} else
|
2021-10-14 16:17:06 +08:00
|
|
|
nvme_start_admin_queue(&dev->ctrl);
|
2014-11-04 23:20:14 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-24 16:39:55 +08:00
|
|
|
static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
|
|
|
|
{
|
|
|
|
return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
|
|
|
|
|
|
|
if (size <= dev->bar_mapped_size)
|
|
|
|
return 0;
|
|
|
|
if (size > pci_resource_len(pdev, 0))
|
|
|
|
return -ENOMEM;
|
|
|
|
if (dev->bar)
|
|
|
|
iounmap(dev->bar);
|
|
|
|
dev->bar = ioremap(pci_resource_start(pdev, 0), size);
|
|
|
|
if (!dev->bar) {
|
|
|
|
dev->bar_mapped_size = 0;
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
dev->bar_mapped_size = size;
|
|
|
|
dev->dbs = dev->bar + NVME_REG_DBS;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-05-01 05:27:17 +08:00
|
|
|
static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2013-05-04 18:43:16 +08:00
|
|
|
int result;
|
2011-01-21 01:50:14 +08:00
|
|
|
u32 aqa;
|
|
|
|
struct nvme_queue *nvmeq;
|
|
|
|
|
2017-05-24 16:39:55 +08:00
|
|
|
result = nvme_remap_bar(dev, db_bar_size(dev, 0));
|
|
|
|
if (result < 0)
|
|
|
|
return result;
|
|
|
|
|
2016-10-19 23:51:05 +08:00
|
|
|
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
|
2017-06-28 03:16:38 +08:00
|
|
|
NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
|
2015-08-11 05:20:40 +08:00
|
|
|
|
2015-11-20 15:58:10 +08:00
|
|
|
if (dev->subsystem &&
|
|
|
|
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
|
|
|
|
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
|
2015-08-11 05:20:40 +08:00
|
|
|
|
2019-07-23 08:06:54 +08:00
|
|
|
result = nvme_disable_ctrl(&dev->ctrl);
|
2013-05-04 18:43:16 +08:00
|
|
|
if (result < 0)
|
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-04-12 23:16:09 +08:00
|
|
|
result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
|
2018-01-14 18:39:01 +08:00
|
|
|
if (result)
|
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2020-06-16 17:34:22 +08:00
|
|
|
dev->ctrl.numa_node = dev_to_node(dev->dev);
|
|
|
|
|
2018-01-14 18:39:01 +08:00
|
|
|
nvmeq = &dev->queues[0];
|
2011-01-21 01:50:14 +08:00
|
|
|
aqa = nvmeq->q_depth - 1;
|
|
|
|
aqa |= aqa << 16;
|
|
|
|
|
2015-11-20 15:58:10 +08:00
|
|
|
writel(aqa, dev->bar + NVME_REG_AQA);
|
|
|
|
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
|
|
|
|
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2019-07-23 08:06:53 +08:00
|
|
|
result = nvme_enable_ctrl(&dev->ctrl);
|
2013-05-02 03:07:51 +08:00
|
|
|
if (result)
|
2016-11-16 04:56:26 +08:00
|
|
|
return result;
|
2014-11-04 23:20:14 +08:00
|
|
|
|
2014-12-23 03:59:04 +08:00
|
|
|
nvmeq->cq_vector = 0;
|
2017-09-15 01:54:39 +08:00
|
|
|
nvme_init_queue(nvmeq, 0);
|
2016-09-14 22:18:57 +08:00
|
|
|
result = queue_request_irq(nvmeq);
|
2015-07-01 01:22:52 +08:00
|
|
|
if (result) {
|
2019-03-09 01:43:06 +08:00
|
|
|
dev->online_queues--;
|
2016-11-16 04:56:26 +08:00
|
|
|
return result;
|
2015-07-01 01:22:52 +08:00
|
|
|
}
|
2013-05-02 03:07:51 +08:00
|
|
|
|
2018-12-03 00:46:17 +08:00
|
|
|
set_bit(NVMEQ_ENABLED, &nvmeq->flags);
|
2011-01-21 01:50:14 +08:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2015-11-26 18:46:39 +08:00
|
|
|
static int nvme_create_io_queues(struct nvme_dev *dev)
|
2014-03-25 00:46:25 +08:00
|
|
|
{
|
2018-11-06 03:44:33 +08:00
|
|
|
unsigned i, max, rw_queues;
|
2015-11-26 18:46:39 +08:00
|
|
|
int ret = 0;
|
2014-03-25 00:46:25 +08:00
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
|
2018-04-12 23:16:09 +08:00
|
|
|
if (nvme_alloc_queue(dev, i, dev->q_depth)) {
|
2015-11-26 18:46:39 +08:00
|
|
|
ret = -ENOMEM;
|
2014-03-25 00:46:25 +08:00
|
|
|
break;
|
2015-11-26 18:46:39 +08:00
|
|
|
}
|
|
|
|
}
|
2014-03-25 00:46:25 +08:00
|
|
|
|
2017-04-24 15:58:29 +08:00
|
|
|
max = min(dev->max_qid, dev->ctrl.queue_count - 1);
|
2018-12-03 00:46:16 +08:00
|
|
|
if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
|
|
|
|
rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
|
|
|
|
dev->io_queues[HCTX_TYPE_READ];
|
2018-11-06 03:44:33 +08:00
|
|
|
} else {
|
|
|
|
rw_queues = max;
|
|
|
|
}
|
|
|
|
|
2015-12-18 08:08:15 +08:00
|
|
|
for (i = dev->online_queues; i <= max; i++) {
|
2018-11-06 03:44:33 +08:00
|
|
|
bool polled = i > rw_queues;
|
|
|
|
|
|
|
|
ret = nvme_create_queue(&dev->queues[i], i, polled);
|
2016-11-16 04:56:26 +08:00
|
|
|
if (ret)
|
2014-03-25 00:46:25 +08:00
|
|
|
break;
|
2014-04-11 23:58:45 +08:00
|
|
|
}
|
2015-11-26 18:46:39 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ignore failing Create SQ/CQ commands, we can continue with less
|
2018-01-14 15:14:27 +08:00
|
|
|
* than the desired amount of queues, and even a controller without
|
|
|
|
* I/O queues can still be used to issue admin commands. This might
|
2015-11-26 18:46:39 +08:00
|
|
|
* be useful to upgrade a buggy firmware for example.
|
|
|
|
*/
|
|
|
|
return ret >= 0 ? 0 : ret;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2017-12-20 21:50:00 +08:00
|
|
|
static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
|
2015-07-21 00:14:09 +08:00
|
|
|
{
|
2017-12-20 21:50:00 +08:00
|
|
|
u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
|
|
|
|
|
|
|
|
return 1ULL << (12 + 4 * szu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 nvme_cmb_size(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK;
|
|
|
|
}
|
|
|
|
|
2017-12-20 21:25:11 +08:00
|
|
|
static void nvme_map_cmb(struct nvme_dev *dev)
|
2015-07-21 00:14:09 +08:00
|
|
|
{
|
2017-12-20 21:50:00 +08:00
|
|
|
u64 size, offset;
|
2015-07-21 00:14:09 +08:00
|
|
|
resource_size_t bar_size;
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
2017-10-01 15:37:35 +08:00
|
|
|
int bar;
|
2015-07-21 00:14:09 +08:00
|
|
|
|
2018-11-01 03:15:29 +08:00
|
|
|
if (dev->cmb_size)
|
|
|
|
return;
|
|
|
|
|
2021-01-15 14:30:46 +08:00
|
|
|
if (NVME_CAP_CMBS(dev->ctrl.cap))
|
|
|
|
writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC);
|
|
|
|
|
2015-11-20 15:58:10 +08:00
|
|
|
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
|
2017-12-20 21:25:11 +08:00
|
|
|
if (!dev->cmbsz)
|
|
|
|
return;
|
2016-10-06 10:01:12 +08:00
|
|
|
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
|
2015-07-21 00:14:09 +08:00
|
|
|
|
2017-12-20 21:50:00 +08:00
|
|
|
size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
|
|
|
|
offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
|
2017-10-01 15:37:35 +08:00
|
|
|
bar = NVME_CMB_BIR(dev->cmbloc);
|
|
|
|
bar_size = pci_resource_len(pdev, bar);
|
2015-07-21 00:14:09 +08:00
|
|
|
|
|
|
|
if (offset > bar_size)
|
2017-12-20 21:25:11 +08:00
|
|
|
return;
|
2015-07-21 00:14:09 +08:00
|
|
|
|
2021-01-15 14:30:46 +08:00
|
|
|
/*
|
|
|
|
* Tell the controller about the host side address mapping the CMB,
|
|
|
|
* and enable CMB decoding for the NVMe 1.4+ scheme:
|
|
|
|
*/
|
|
|
|
if (NVME_CAP_CMBS(dev->ctrl.cap)) {
|
|
|
|
hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE |
|
|
|
|
(pci_bus_address(pdev, bar) + offset),
|
|
|
|
dev->bar + NVME_REG_CMBMSC);
|
|
|
|
}
|
|
|
|
|
2015-07-21 00:14:09 +08:00
|
|
|
/*
|
|
|
|
* Controllers may support a CMB size larger than their BAR,
|
|
|
|
* for example, due to being behind a bridge. Reduce the CMB to
|
|
|
|
* the reported size of the BAR
|
|
|
|
*/
|
|
|
|
if (size > bar_size - offset)
|
|
|
|
size = bar_size - offset;
|
|
|
|
|
2018-10-05 05:27:43 +08:00
|
|
|
if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"failed to register the CMB\n");
|
2017-12-20 21:25:11 +08:00
|
|
|
return;
|
2018-10-05 05:27:43 +08:00
|
|
|
}
|
|
|
|
|
2015-07-21 00:14:09 +08:00
|
|
|
dev->cmb_size = size;
|
2018-10-05 05:27:43 +08:00
|
|
|
dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
|
|
|
|
|
|
|
|
if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
|
|
|
|
(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
|
|
|
|
pci_p2pmem_publish(pdev, true);
|
2015-07-21 00:14:09 +08:00
|
|
|
}
|
|
|
|
|
2017-05-12 23:02:58 +08:00
|
|
|
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
|
|
|
|
{
|
2020-07-17 08:51:37 +08:00
|
|
|
u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT;
|
2017-08-28 16:47:18 +08:00
|
|
|
u64 dma_addr = dev->host_mem_descs_dma;
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command c = { };
|
2017-05-12 23:02:58 +08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
c.features.opcode = nvme_admin_set_features;
|
|
|
|
c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
|
|
|
|
c.features.dword11 = cpu_to_le32(bits);
|
2020-07-17 08:51:37 +08:00
|
|
|
c.features.dword12 = cpu_to_le32(host_mem_size);
|
2017-05-12 23:02:58 +08:00
|
|
|
c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
|
|
|
|
c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
|
|
|
|
c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
|
|
|
|
|
|
|
|
ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
|
|
|
|
if (ret) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"failed to set host mem (err %d, flags %#x).\n",
|
|
|
|
ret, bits);
|
2021-07-28 00:40:43 +08:00
|
|
|
} else
|
|
|
|
dev->hmb = bits & NVME_HOST_MEM_ENABLE;
|
|
|
|
|
2017-05-12 23:02:58 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_free_host_mem(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < dev->nr_host_mem_descs; i++) {
|
|
|
|
struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
|
2020-07-17 08:51:37 +08:00
|
|
|
size_t size = le32_to_cpu(desc->size) * NVME_CTRL_PAGE_SIZE;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
2018-12-30 01:23:43 +08:00
|
|
|
dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
|
|
|
|
le64_to_cpu(desc->addr),
|
|
|
|
DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
|
2017-05-12 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
kfree(dev->host_mem_desc_bufs);
|
|
|
|
dev->host_mem_desc_bufs = NULL;
|
2017-08-28 16:47:18 +08:00
|
|
|
dma_free_coherent(dev->dev,
|
|
|
|
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
|
|
|
|
dev->host_mem_descs, dev->host_mem_descs_dma);
|
2017-05-12 23:02:58 +08:00
|
|
|
dev->host_mem_descs = NULL;
|
2017-11-25 02:03:00 +08:00
|
|
|
dev->nr_host_mem_descs = 0;
|
2017-05-12 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
2017-09-12 00:08:43 +08:00
|
|
|
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
|
|
|
|
u32 chunk_size)
|
2013-07-16 05:02:24 +08:00
|
|
|
{
|
2017-05-12 23:02:58 +08:00
|
|
|
struct nvme_host_mem_buf_desc *descs;
|
2017-09-12 00:08:43 +08:00
|
|
|
u32 max_entries, len;
|
2017-08-28 16:47:18 +08:00
|
|
|
dma_addr_t descs_dma;
|
2017-07-06 17:26:52 +08:00
|
|
|
int i = 0;
|
2017-05-12 23:02:58 +08:00
|
|
|
void **bufs;
|
2017-12-05 04:23:54 +08:00
|
|
|
u64 size, tmp;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
|
|
|
tmp = (preferred + chunk_size - 1);
|
|
|
|
do_div(tmp, chunk_size);
|
|
|
|
max_entries = tmp;
|
2017-09-12 00:09:28 +08:00
|
|
|
|
|
|
|
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
|
|
|
|
max_entries = dev->ctrl.hmmaxd;
|
|
|
|
|
cross-tree: phase out dma_zalloc_coherent()
We already need to zero out memory for dma_alloc_coherent(), as such
using dma_zalloc_coherent() is superflous. Phase it out.
This change was generated with the following Coccinelle SmPL patch:
@ replace_dma_zalloc_coherent @
expression dev, size, data, handle, flags;
@@
-dma_zalloc_coherent(dev, size, handle, flags)
+dma_alloc_coherent(dev, size, handle, flags)
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
[hch: re-ran the script on the latest tree]
Signed-off-by: Christoph Hellwig <hch@lst.de>
2019-01-04 16:23:09 +08:00
|
|
|
descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
|
|
|
|
&descs_dma, GFP_KERNEL);
|
2017-05-12 23:02:58 +08:00
|
|
|
if (!descs)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
|
|
|
|
if (!bufs)
|
|
|
|
goto out_free_descs;
|
|
|
|
|
2017-11-17 00:34:24 +08:00
|
|
|
for (size = 0; size < preferred && i < max_entries; size += len) {
|
2017-05-12 23:02:58 +08:00
|
|
|
dma_addr_t dma_addr;
|
|
|
|
|
2017-07-25 23:39:07 +08:00
|
|
|
len = min_t(u64, chunk_size, preferred - size);
|
2017-05-12 23:02:58 +08:00
|
|
|
bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
|
|
|
|
DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
|
|
|
|
if (!bufs[i])
|
|
|
|
break;
|
|
|
|
|
|
|
|
descs[i].addr = cpu_to_le64(dma_addr);
|
2020-07-17 08:51:37 +08:00
|
|
|
descs[i].size = cpu_to_le32(len / NVME_CTRL_PAGE_SIZE);
|
2017-05-12 23:02:58 +08:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
2017-09-12 00:08:43 +08:00
|
|
|
if (!size)
|
2017-05-12 23:02:58 +08:00
|
|
|
goto out_free_bufs;
|
|
|
|
|
|
|
|
dev->nr_host_mem_descs = i;
|
|
|
|
dev->host_mem_size = size;
|
|
|
|
dev->host_mem_descs = descs;
|
2017-08-28 16:47:18 +08:00
|
|
|
dev->host_mem_descs_dma = descs_dma;
|
2017-05-12 23:02:58 +08:00
|
|
|
dev->host_mem_desc_bufs = bufs;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free_bufs:
|
|
|
|
while (--i >= 0) {
|
2020-07-17 08:51:37 +08:00
|
|
|
size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
2018-12-30 01:23:43 +08:00
|
|
|
dma_free_attrs(dev->dev, size, bufs[i],
|
|
|
|
le64_to_cpu(descs[i].addr),
|
|
|
|
DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
|
2017-05-12 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
kfree(bufs);
|
|
|
|
out_free_descs:
|
2017-08-28 16:47:18 +08:00
|
|
|
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
|
|
|
|
descs_dma);
|
2017-05-12 23:02:58 +08:00
|
|
|
out:
|
|
|
|
dev->host_mem_descs = NULL;
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2017-09-12 00:08:43 +08:00
|
|
|
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
|
|
|
|
{
|
2020-06-02 10:41:14 +08:00
|
|
|
u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
|
|
|
|
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
|
|
|
|
u64 chunk_size;
|
2017-09-12 00:08:43 +08:00
|
|
|
|
|
|
|
/* start big and work our way down */
|
2020-06-02 10:41:14 +08:00
|
|
|
for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
|
2017-09-12 00:08:43 +08:00
|
|
|
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
|
|
|
|
if (!min || dev->host_mem_size >= min)
|
|
|
|
return 0;
|
|
|
|
nvme_free_host_mem(dev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2017-09-06 18:19:57 +08:00
|
|
|
static int nvme_setup_host_mem(struct nvme_dev *dev)
|
2017-05-12 23:02:58 +08:00
|
|
|
{
|
|
|
|
u64 max = (u64)max_host_mem_size_mb * SZ_1M;
|
|
|
|
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
|
|
|
|
u64 min = (u64)dev->ctrl.hmmin * 4096;
|
|
|
|
u32 enable_bits = NVME_HOST_MEM_ENABLE;
|
2017-12-05 04:23:54 +08:00
|
|
|
int ret;
|
2017-05-12 23:02:58 +08:00
|
|
|
|
|
|
|
preferred = min(preferred, max);
|
|
|
|
if (min > max) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"min host memory (%lld MiB) above limit (%d MiB).\n",
|
|
|
|
min >> ilog2(SZ_1M), max_host_mem_size_mb);
|
|
|
|
nvme_free_host_mem(dev);
|
2017-09-06 18:19:57 +08:00
|
|
|
return 0;
|
2017-05-12 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we already have a buffer allocated check if we can reuse it.
|
|
|
|
*/
|
|
|
|
if (dev->host_mem_descs) {
|
|
|
|
if (dev->host_mem_size >= min)
|
|
|
|
enable_bits |= NVME_HOST_MEM_RETURN;
|
|
|
|
else
|
|
|
|
nvme_free_host_mem(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!dev->host_mem_descs) {
|
2017-09-12 00:08:43 +08:00
|
|
|
if (nvme_alloc_host_mem(dev, min, preferred)) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"failed to allocate host memory buffer.\n");
|
2017-09-06 18:19:57 +08:00
|
|
|
return 0; /* controller must work without HMB */
|
2017-09-12 00:08:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_info(dev->ctrl.device,
|
|
|
|
"allocated %lld MiB host memory buffer.\n",
|
|
|
|
dev->host_mem_size >> ilog2(SZ_1M));
|
2017-05-12 23:02:58 +08:00
|
|
|
}
|
|
|
|
|
2017-09-06 18:19:57 +08:00
|
|
|
ret = nvme_set_host_mem(dev, enable_bits);
|
|
|
|
if (ret)
|
2017-05-12 23:02:58 +08:00
|
|
|
nvme_free_host_mem(dev);
|
2017-09-06 18:19:57 +08:00
|
|
|
return ret;
|
2013-07-16 05:02:24 +08:00
|
|
|
}
|
|
|
|
|
2021-07-15 05:02:37 +08:00
|
|
|
static ssize_t cmb_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "cmbloc : x%08x\ncmbsz : x%08x\n",
|
|
|
|
ndev->cmbloc, ndev->cmbsz);
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(cmb);
|
|
|
|
|
2021-07-16 15:22:49 +08:00
|
|
|
static ssize_t cmbloc_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%u\n", ndev->cmbloc);
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(cmbloc);
|
|
|
|
|
|
|
|
static ssize_t cmbsz_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%u\n", ndev->cmbsz);
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(cmbsz);
|
|
|
|
|
2021-07-28 00:40:43 +08:00
|
|
|
static ssize_t hmb_show(struct device *dev, struct device_attribute *attr,
|
|
|
|
char *buf)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
|
|
|
|
|
|
|
return sysfs_emit(buf, "%d\n", ndev->hmb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
|
|
|
|
const char *buf, size_t count)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
|
|
|
|
bool new;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (strtobool(buf, &new) < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (new == ndev->hmb)
|
|
|
|
return count;
|
|
|
|
|
|
|
|
if (new) {
|
|
|
|
ret = nvme_setup_host_mem(ndev);
|
|
|
|
} else {
|
|
|
|
ret = nvme_set_host_mem(ndev, 0);
|
|
|
|
if (!ret)
|
|
|
|
nvme_free_host_mem(ndev);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
static DEVICE_ATTR_RW(hmb);
|
|
|
|
|
2021-07-15 05:02:37 +08:00
|
|
|
static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
|
|
|
|
struct attribute *a, int n)
|
|
|
|
{
|
|
|
|
struct nvme_ctrl *ctrl =
|
|
|
|
dev_get_drvdata(container_of(kobj, struct device, kobj));
|
|
|
|
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
|
|
|
|
2021-07-16 15:22:49 +08:00
|
|
|
if (a == &dev_attr_cmb.attr ||
|
|
|
|
a == &dev_attr_cmbloc.attr ||
|
|
|
|
a == &dev_attr_cmbsz.attr) {
|
|
|
|
if (!dev->cmbsz)
|
|
|
|
return 0;
|
|
|
|
}
|
2021-07-28 00:40:43 +08:00
|
|
|
if (a == &dev_attr_hmb.attr && !ctrl->hmpre)
|
|
|
|
return 0;
|
|
|
|
|
2021-07-15 05:02:37 +08:00
|
|
|
return a->mode;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct attribute *nvme_pci_attrs[] = {
|
|
|
|
&dev_attr_cmb.attr,
|
2021-07-16 15:22:49 +08:00
|
|
|
&dev_attr_cmbloc.attr,
|
|
|
|
&dev_attr_cmbsz.attr,
|
2021-07-28 00:40:43 +08:00
|
|
|
&dev_attr_hmb.attr,
|
2021-07-15 05:02:37 +08:00
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct attribute_group nvme_pci_attr_group = {
|
|
|
|
.attrs = nvme_pci_attrs,
|
|
|
|
.is_visible = nvme_pci_attrs_are_visible,
|
|
|
|
};
|
|
|
|
|
2019-02-17 01:13:10 +08:00
|
|
|
/*
|
|
|
|
* nirqs is the number of interrupts available for write and read
|
|
|
|
* queues. The core already reserved an interrupt for the admin queue.
|
|
|
|
*/
|
|
|
|
static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
|
2018-10-31 22:36:31 +08:00
|
|
|
{
|
2019-02-17 01:13:10 +08:00
|
|
|
struct nvme_dev *dev = affd->priv;
|
2020-05-02 15:29:41 +08:00
|
|
|
unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
|
2018-10-31 22:36:31 +08:00
|
|
|
|
|
|
|
/*
|
2020-07-03 10:49:20 +08:00
|
|
|
* If there is no interrupt available for queues, ensure that
|
2019-02-17 01:13:10 +08:00
|
|
|
* the default queue is set to 1. The affinity set size is
|
|
|
|
* also set to one, but the irq core ignores it for this case.
|
|
|
|
*
|
|
|
|
* If only one interrupt is available or 'write_queue' == 0, combine
|
|
|
|
* write and read queues.
|
|
|
|
*
|
|
|
|
* If 'write_queues' > 0, ensure it leaves room for at least one read
|
|
|
|
* queue.
|
2018-10-31 22:36:31 +08:00
|
|
|
*/
|
2019-02-17 01:13:10 +08:00
|
|
|
if (!nrirqs) {
|
|
|
|
nrirqs = 1;
|
|
|
|
nr_read_queues = 0;
|
2020-05-02 15:29:41 +08:00
|
|
|
} else if (nrirqs == 1 || !nr_write_queues) {
|
2019-02-17 01:13:10 +08:00
|
|
|
nr_read_queues = 0;
|
2020-05-02 15:29:41 +08:00
|
|
|
} else if (nr_write_queues >= nrirqs) {
|
2019-02-17 01:13:10 +08:00
|
|
|
nr_read_queues = 1;
|
2018-10-31 22:36:31 +08:00
|
|
|
} else {
|
2020-05-02 15:29:41 +08:00
|
|
|
nr_read_queues = nrirqs - nr_write_queues;
|
2018-10-31 22:36:31 +08:00
|
|
|
}
|
2019-02-17 01:13:10 +08:00
|
|
|
|
|
|
|
dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
|
|
|
|
affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
|
|
|
|
dev->io_queues[HCTX_TYPE_READ] = nr_read_queues;
|
|
|
|
affd->set_size[HCTX_TYPE_READ] = nr_read_queues;
|
|
|
|
affd->nr_sets = nr_read_queues ? 2 : 1;
|
2018-10-31 22:36:31 +08:00
|
|
|
}
|
|
|
|
|
2018-12-10 02:21:45 +08:00
|
|
|
static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
|
2018-10-31 22:36:31 +08:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
|
|
|
struct irq_affinity affd = {
|
genirq/affinity: Store interrupt sets size in struct irq_affinity
The interrupt affinity spreading mechanism supports to spread out
affinities for one or more interrupt sets. A interrupt set contains one
or more interrupts. Each set is mapped to a specific functionality of a
device, e.g. general I/O queues and read I/O queus of multiqueue block
devices.
The number of interrupts per set is defined by the driver. It depends on
the total number of available interrupts for the device, which is
determined by the PCI capabilites and the availability of underlying CPU
resources, and the number of queues which the device provides and the
driver wants to instantiate.
The driver passes initial configuration for the interrupt allocation via
a pointer to struct irq_affinity.
Right now the allocation mechanism is complex as it requires to have a
loop in the driver to determine the maximum number of interrupts which
are provided by the PCI capabilities and the underlying CPU resources.
This loop would have to be replicated in every driver which wants to
utilize this mechanism. That's unwanted code duplication and error
prone.
In order to move this into generic facilities it is required to have a
mechanism, which allows the recalculation of the interrupt sets and
their size, in the core code. As the core code does not have any
knowledge about the underlying device, a driver specific callback will
be added to struct affinity_desc, which will be invoked by the core
code. The callback will get the number of available interupts as an
argument, so the driver can calculate the corresponding number and size
of interrupt sets.
To support this, two modifications for the handling of struct irq_affinity
are required:
1) The (optional) interrupt sets size information is contained in a
separate array of integers and struct irq_affinity contains a
pointer to it.
This is cumbersome and as the maximum number of interrupt sets is small,
there is no reason to have separate storage. Moving the size array into
struct affinity_desc avoids indirections and makes the code simpler.
2) At the moment the struct irq_affinity pointer which is handed in from
the driver and passed through to several core functions is marked
'const'.
With the upcoming callback to recalculate the number and size of
interrupt sets, it's necessary to remove the 'const'
qualifier. Otherwise the callback would not be able to update the data.
Implement #1 and store the interrupt sets size in 'struct irq_affinity'.
No functional change.
[ tglx: Fixed the memcpy() size so it won't copy beyond the size of the
source. Fixed the kernel doc comments for struct irq_affinity and
de-'This patch'-ed the changelog ]
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Cc: linux-pci@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com>
Link: https://lkml.kernel.org/r/20190216172228.423723127@linutronix.de
2019-02-17 01:13:08 +08:00
|
|
|
.pre_vectors = 1,
|
2019-02-17 01:13:10 +08:00
|
|
|
.calc_sets = nvme_calc_irq_sets,
|
|
|
|
.priv = dev,
|
2018-10-31 22:36:31 +08:00
|
|
|
};
|
2020-09-24 15:01:22 +08:00
|
|
|
unsigned int irq_queues, poll_queues;
|
2018-12-10 02:21:45 +08:00
|
|
|
|
|
|
|
/*
|
2020-09-24 15:01:22 +08:00
|
|
|
* Poll queues don't need interrupts, but we need at least one I/O queue
|
|
|
|
* left over for non-polled I/O.
|
2018-12-10 02:21:45 +08:00
|
|
|
*/
|
2020-09-24 15:01:22 +08:00
|
|
|
poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1);
|
|
|
|
dev->io_queues[HCTX_TYPE_POLL] = poll_queues;
|
2018-10-31 22:36:31 +08:00
|
|
|
|
2020-09-24 15:01:22 +08:00
|
|
|
/*
|
|
|
|
* Initialize for the single interrupt case, will be updated in
|
|
|
|
* nvme_calc_irq_sets().
|
|
|
|
*/
|
2019-02-17 01:13:10 +08:00
|
|
|
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
|
|
|
|
dev->io_queues[HCTX_TYPE_READ] = 0;
|
2018-10-31 22:36:31 +08:00
|
|
|
|
2019-08-07 15:51:21 +08:00
|
|
|
/*
|
2020-09-24 15:01:22 +08:00
|
|
|
* We need interrupts for the admin queue and each non-polled I/O queue,
|
|
|
|
* but some Apple controllers require all queues to use the first
|
|
|
|
* vector.
|
2019-08-07 15:51:21 +08:00
|
|
|
*/
|
2020-09-24 15:01:22 +08:00
|
|
|
irq_queues = 1;
|
|
|
|
if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR))
|
|
|
|
irq_queues += (nr_io_queues - poll_queues);
|
2019-02-17 01:13:10 +08:00
|
|
|
return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
|
|
|
|
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
|
2018-10-31 22:36:31 +08:00
|
|
|
}
|
|
|
|
|
2019-01-05 06:04:33 +08:00
|
|
|
static void nvme_disable_io_queues(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
|
|
|
|
__nvme_disable_io_queues(dev, nvme_admin_delete_cq);
|
|
|
|
}
|
|
|
|
|
2020-05-02 15:29:41 +08:00
|
|
|
static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
|
|
|
|
{
|
2020-11-12 16:23:02 +08:00
|
|
|
/*
|
|
|
|
* If tags are shared with admin queue (Apple bug), then
|
|
|
|
* make sure we only use one IO queue.
|
|
|
|
*/
|
|
|
|
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
|
|
|
return 1;
|
2020-05-02 15:29:41 +08:00
|
|
|
return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
|
|
|
|
}
|
|
|
|
|
2012-12-22 07:13:49 +08:00
|
|
|
static int nvme_setup_io_queues(struct nvme_dev *dev)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2018-01-14 18:39:01 +08:00
|
|
|
struct nvme_queue *adminq = &dev->queues[0];
|
2015-05-22 17:12:39 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
2020-05-02 15:29:41 +08:00
|
|
|
unsigned int nr_io_queues;
|
2017-05-24 16:39:55 +08:00
|
|
|
unsigned long size;
|
2020-05-02 15:29:41 +08:00
|
|
|
int result;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2020-05-02 15:29:41 +08:00
|
|
|
/*
|
|
|
|
* Sample the module parameters once at reset time so that we have
|
|
|
|
* stable values to work with.
|
|
|
|
*/
|
|
|
|
dev->nr_write_queues = write_queues;
|
|
|
|
dev->nr_poll_queues = poll_queues;
|
2019-08-07 15:51:22 +08:00
|
|
|
|
2020-11-12 16:23:02 +08:00
|
|
|
nr_io_queues = dev->nr_allocated_queues - 1;
|
2015-11-26 18:09:06 +08:00
|
|
|
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
|
|
|
|
if (result < 0)
|
2011-01-21 02:01:49 +08:00
|
|
|
return result;
|
2015-11-26 18:09:06 +08:00
|
|
|
|
2016-06-07 05:20:50 +08:00
|
|
|
if (nr_io_queues == 0)
|
2016-04-09 06:09:10 +08:00
|
|
|
return 0;
|
2021-04-11 04:15:43 +08:00
|
|
|
|
2021-07-08 05:14:31 +08:00
|
|
|
/*
|
|
|
|
* Free IRQ resources as soon as NVMEQ_ENABLED bit transitions
|
|
|
|
* from set to unset. If there is a window to it is truely freed,
|
|
|
|
* pci_free_irq_vectors() jumping into this window will crash.
|
|
|
|
* And take lock to avoid racing with pci_free_irq_vectors() in
|
|
|
|
* nvme_dev_disable() path.
|
|
|
|
*/
|
|
|
|
result = nvme_setup_io_queues_trylock(dev);
|
|
|
|
if (result)
|
|
|
|
return result;
|
|
|
|
if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
|
|
|
|
pci_free_irq(pdev, 0, adminq);
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2018-10-05 05:27:43 +08:00
|
|
|
if (dev->cmb_use_sqes) {
|
2015-07-21 00:14:09 +08:00
|
|
|
result = nvme_cmb_qdepth(dev, nr_io_queues,
|
|
|
|
sizeof(struct nvme_command));
|
|
|
|
if (result > 0)
|
|
|
|
dev->q_depth = result;
|
|
|
|
else
|
2018-10-05 05:27:43 +08:00
|
|
|
dev->cmb_use_sqes = false;
|
2015-07-21 00:14:09 +08:00
|
|
|
}
|
|
|
|
|
2017-05-24 16:39:55 +08:00
|
|
|
do {
|
|
|
|
size = db_bar_size(dev, nr_io_queues);
|
|
|
|
result = nvme_remap_bar(dev, size);
|
|
|
|
if (!result)
|
|
|
|
break;
|
2021-07-08 05:14:31 +08:00
|
|
|
if (!--nr_io_queues) {
|
|
|
|
result = -ENOMEM;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
2017-05-24 16:39:55 +08:00
|
|
|
} while (1);
|
|
|
|
adminq->q_db = dev->dbs;
|
2011-10-21 05:00:41 +08:00
|
|
|
|
2019-01-05 06:04:33 +08:00
|
|
|
retry:
|
2013-07-16 05:02:24 +08:00
|
|
|
/* Deregister the admin queue's interrupt */
|
2021-07-08 05:14:31 +08:00
|
|
|
if (test_and_clear_bit(NVMEQ_ENABLED, &adminq->flags))
|
|
|
|
pci_free_irq(pdev, 0, adminq);
|
2013-07-16 05:02:24 +08:00
|
|
|
|
2014-11-15 00:49:26 +08:00
|
|
|
/*
|
|
|
|
* If we enable msix early due to not intx, disable it again before
|
|
|
|
* setting up the full range we need.
|
|
|
|
*/
|
2016-09-14 22:18:57 +08:00
|
|
|
pci_free_irq_vectors(pdev);
|
2018-10-31 22:36:31 +08:00
|
|
|
|
|
|
|
result = nvme_setup_irqs(dev, nr_io_queues);
|
2021-07-08 05:14:31 +08:00
|
|
|
if (result <= 0) {
|
|
|
|
result = -EIO;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
2018-10-31 22:36:31 +08:00
|
|
|
|
2018-04-12 23:16:10 +08:00
|
|
|
dev->num_vecs = result;
|
2018-11-06 03:44:33 +08:00
|
|
|
result = max(result - 1, 1);
|
2018-12-03 00:46:16 +08:00
|
|
|
dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
|
2013-05-12 06:19:31 +08:00
|
|
|
|
2013-06-20 22:53:48 +08:00
|
|
|
/*
|
|
|
|
* Should investigate if there's a performance win from allocating
|
|
|
|
* more queues than interrupt vectors; it might allow the submission
|
|
|
|
* path to scale better, even if the receive path is limited by the
|
|
|
|
* number of interrupts.
|
|
|
|
*/
|
2016-09-14 22:18:57 +08:00
|
|
|
result = queue_request_irq(adminq);
|
2019-03-09 01:43:06 +08:00
|
|
|
if (result)
|
2021-07-08 05:14:31 +08:00
|
|
|
goto out_unlock;
|
2018-12-03 00:46:17 +08:00
|
|
|
set_bit(NVMEQ_ENABLED, &adminq->flags);
|
2021-07-08 05:14:31 +08:00
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
2019-01-05 06:04:33 +08:00
|
|
|
|
|
|
|
result = nvme_create_io_queues(dev);
|
|
|
|
if (result || dev->online_queues < 2)
|
|
|
|
return result;
|
|
|
|
|
|
|
|
if (dev->online_queues - 1 < dev->max_qid) {
|
|
|
|
nr_io_queues = dev->online_queues - 1;
|
|
|
|
nvme_disable_io_queues(dev);
|
2021-07-08 05:14:31 +08:00
|
|
|
result = nvme_setup_io_queues_trylock(dev);
|
|
|
|
if (result)
|
|
|
|
return result;
|
2019-01-05 06:04:33 +08:00
|
|
|
nvme_suspend_io_queues(dev);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
|
|
|
|
dev->io_queues[HCTX_TYPE_DEFAULT],
|
|
|
|
dev->io_queues[HCTX_TYPE_READ],
|
|
|
|
dev->io_queues[HCTX_TYPE_POLL]);
|
|
|
|
return 0;
|
2021-07-08 05:14:31 +08:00
|
|
|
out_unlock:
|
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
|
|
|
return result;
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:04 +08:00
|
|
|
static void nvme_del_queue_end(struct request *req, blk_status_t error)
|
2015-06-02 04:28:14 +08:00
|
|
|
{
|
2016-01-13 05:41:17 +08:00
|
|
|
struct nvme_queue *nvmeq = req->end_io_data;
|
2015-12-12 04:14:28 +08:00
|
|
|
|
2016-01-13 05:41:17 +08:00
|
|
|
blk_mq_free_request(req);
|
2018-12-03 00:46:22 +08:00
|
|
|
complete(&nvmeq->delete_done);
|
2015-06-02 04:28:14 +08:00
|
|
|
}
|
|
|
|
|
2017-06-03 15:38:04 +08:00
|
|
|
static void nvme_del_cq_end(struct request *req, blk_status_t error)
|
2015-06-02 04:28:14 +08:00
|
|
|
{
|
2016-01-13 05:41:17 +08:00
|
|
|
struct nvme_queue *nvmeq = req->end_io_data;
|
2015-06-02 04:28:14 +08:00
|
|
|
|
2018-12-03 00:46:22 +08:00
|
|
|
if (error)
|
|
|
|
set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
|
2016-01-13 05:41:17 +08:00
|
|
|
|
|
|
|
nvme_del_queue_end(req, error);
|
2015-06-02 04:28:14 +08:00
|
|
|
}
|
|
|
|
|
2016-01-13 05:41:17 +08:00
|
|
|
static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
|
2015-09-03 22:18:17 +08:00
|
|
|
{
|
2016-01-13 05:41:17 +08:00
|
|
|
struct request_queue *q = nvmeq->dev->ctrl.admin_q;
|
|
|
|
struct request *req;
|
2021-06-17 06:15:53 +08:00
|
|
|
struct nvme_command cmd = { };
|
2015-09-03 22:18:17 +08:00
|
|
|
|
2016-01-13 05:41:17 +08:00
|
|
|
cmd.delete_queue.opcode = opcode;
|
|
|
|
cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
|
2015-09-03 22:18:17 +08:00
|
|
|
|
2022-03-15 22:53:59 +08:00
|
|
|
req = blk_mq_alloc_request(q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT);
|
2016-01-13 05:41:17 +08:00
|
|
|
if (IS_ERR(req))
|
|
|
|
return PTR_ERR(req);
|
2022-03-15 22:53:59 +08:00
|
|
|
nvme_init_request(req, &cmd);
|
2015-09-03 22:18:17 +08:00
|
|
|
|
2016-01-13 05:41:17 +08:00
|
|
|
req->end_io_data = nvmeq;
|
|
|
|
|
2018-12-03 00:46:22 +08:00
|
|
|
init_completion(&nvmeq->delete_done);
|
2022-04-20 06:53:51 +08:00
|
|
|
req->rq_flags |= RQF_QUIET;
|
2021-11-26 20:18:01 +08:00
|
|
|
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
|
|
|
|
nvme_del_cq_end : nvme_del_queue_end);
|
2016-01-13 05:41:17 +08:00
|
|
|
return 0;
|
2015-09-03 22:18:17 +08:00
|
|
|
}
|
|
|
|
|
2019-01-05 06:04:33 +08:00
|
|
|
static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
|
2015-06-02 04:28:14 +08:00
|
|
|
{
|
2018-12-03 00:46:21 +08:00
|
|
|
int nr_queues = dev->online_queues - 1, sent = 0;
|
2016-01-13 05:41:17 +08:00
|
|
|
unsigned long timeout;
|
2015-06-02 04:28:14 +08:00
|
|
|
|
2016-01-13 05:41:17 +08:00
|
|
|
retry:
|
2020-11-10 08:33:45 +08:00
|
|
|
timeout = NVME_ADMIN_TIMEOUT;
|
2018-12-03 00:46:21 +08:00
|
|
|
while (nr_queues > 0) {
|
|
|
|
if (nvme_delete_queue(&dev->queues[nr_queues], opcode))
|
|
|
|
break;
|
|
|
|
nr_queues--;
|
|
|
|
sent++;
|
2016-01-13 05:41:17 +08:00
|
|
|
}
|
2018-12-03 00:46:22 +08:00
|
|
|
while (sent) {
|
|
|
|
struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent];
|
|
|
|
|
|
|
|
timeout = wait_for_completion_io_timeout(&nvmeq->delete_done,
|
2018-12-03 00:46:21 +08:00
|
|
|
timeout);
|
|
|
|
if (timeout == 0)
|
|
|
|
return false;
|
2018-12-03 00:46:22 +08:00
|
|
|
|
|
|
|
sent--;
|
2018-12-03 00:46:21 +08:00
|
|
|
if (nr_queues)
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
return true;
|
2015-06-02 04:28:14 +08:00
|
|
|
}
|
|
|
|
|
2019-09-03 23:22:24 +08:00
|
|
|
static void nvme_dev_add(struct nvme_dev *dev)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2018-01-06 08:01:58 +08:00
|
|
|
int ret;
|
|
|
|
|
2015-11-28 22:39:07 +08:00
|
|
|
if (!dev->ctrl.tagset) {
|
2018-12-03 00:46:27 +08:00
|
|
|
dev->tagset.ops = &nvme_mq_ops;
|
2015-06-09 00:08:15 +08:00
|
|
|
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
2019-07-23 11:23:13 +08:00
|
|
|
dev->tagset.nr_maps = 2; /* default + read */
|
2018-12-14 21:06:59 +08:00
|
|
|
if (dev->io_queues[HCTX_TYPE_POLL])
|
|
|
|
dev->tagset.nr_maps++;
|
2015-06-09 00:08:15 +08:00
|
|
|
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
2020-06-16 17:34:23 +08:00
|
|
|
dev->tagset.numa_node = dev->ctrl.numa_node;
|
2020-06-17 16:05:13 +08:00
|
|
|
dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
|
|
|
|
BLK_MQ_MAX_DEPTH) - 1;
|
2019-03-05 20:46:58 +08:00
|
|
|
dev->tagset.cmd_size = sizeof(struct nvme_iod);
|
2015-06-09 00:08:15 +08:00
|
|
|
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
|
|
|
|
dev->tagset.driver_data = dev;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2019-08-07 15:51:22 +08:00
|
|
|
/*
|
|
|
|
* Some Apple controllers requires tags to be unique
|
|
|
|
* across admin and IO queue, so reserve the first 32
|
|
|
|
* tags of the IO queue.
|
|
|
|
*/
|
|
|
|
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
|
|
|
|
dev->tagset.reserved_tags = NVME_AQ_DEPTH;
|
|
|
|
|
2018-01-06 08:01:58 +08:00
|
|
|
ret = blk_mq_alloc_tag_set(&dev->tagset);
|
|
|
|
if (ret) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"IO queues tagset allocation failed %d\n", ret);
|
2019-09-03 23:22:24 +08:00
|
|
|
return;
|
2018-01-06 08:01:58 +08:00
|
|
|
}
|
2015-11-28 22:39:07 +08:00
|
|
|
dev->ctrl.tagset = &dev->tagset;
|
2015-12-18 08:08:15 +08:00
|
|
|
} else {
|
|
|
|
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
|
|
|
|
|
|
|
|
/* Free previously allocated queues that are no longer usable */
|
|
|
|
nvme_free_queues(dev, dev->online_queues);
|
2015-06-09 00:08:15 +08:00
|
|
|
}
|
2015-12-18 08:08:15 +08:00
|
|
|
|
2019-05-02 19:31:33 +08:00
|
|
|
nvme_dbbuf_set(dev);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2016-02-25 00:15:52 +08:00
|
|
|
static int nvme_pci_enable(struct nvme_dev *dev)
|
2013-07-16 05:02:19 +08:00
|
|
|
{
|
2016-02-25 00:15:52 +08:00
|
|
|
int result = -ENOMEM;
|
2015-05-22 17:12:39 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
2021-02-10 08:39:42 +08:00
|
|
|
int dma_address_bits = 64;
|
2013-07-16 05:02:19 +08:00
|
|
|
|
|
|
|
if (pci_enable_device_mem(pdev))
|
|
|
|
return result;
|
|
|
|
|
|
|
|
pci_set_master(pdev);
|
|
|
|
|
2021-02-10 08:39:42 +08:00
|
|
|
if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48)
|
|
|
|
dma_address_bits = 48;
|
|
|
|
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(dma_address_bits)))
|
2013-06-27 06:49:11 +08:00
|
|
|
goto disable;
|
2013-07-16 05:02:19 +08:00
|
|
|
|
2015-11-20 15:58:10 +08:00
|
|
|
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
|
2013-12-11 04:10:39 +08:00
|
|
|
result = -ENODEV;
|
2016-02-25 00:15:52 +08:00
|
|
|
goto disable;
|
2013-12-11 04:10:39 +08:00
|
|
|
}
|
2014-11-15 00:49:26 +08:00
|
|
|
|
|
|
|
/*
|
2016-04-09 06:09:10 +08:00
|
|
|
* Some devices and/or platforms don't advertise or work with INTx
|
|
|
|
* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
|
|
|
|
* adjust this later.
|
2014-11-15 00:49:26 +08:00
|
|
|
*/
|
2016-09-14 22:18:57 +08:00
|
|
|
result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
|
|
|
|
if (result < 0)
|
|
|
|
return result;
|
2014-11-15 00:49:26 +08:00
|
|
|
|
2017-06-28 03:16:38 +08:00
|
|
|
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
|
2015-11-20 15:58:10 +08:00
|
|
|
|
nvme-pci: Use u32 for nvme_dev.q_depth and nvme_queue.q_depth
Recently nvme_dev.q_depth was changed from an int to u16 type.
This falls over for the queue depth calculation in nvme_pci_enable(),
where NVME_CAP_MQES(dev->ctrl.cap) + 1 may overflow as a u16, as
NVME_CAP_MQES() is a 16b number also. That happens for me, and this is the
result:
root@ubuntu:/home/john# [148.272996] Unable to handle kernel NULL pointer
dereference at virtual address 0000000000000010
Mem abort info:
ESR = 0x96000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=00000a27bf3c9000
[0000000000000010] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT SMP
Modules linked in: nvme nvme_core
CPU: 56 PID: 256 Comm: kworker/u195:0 Not tainted
5.8.0-next-20200812 #27
Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 -
V1.16.01 03/15/2019
Workqueue: nvme-reset-wq nvme_reset_work [nvme]
pstate: 80c00009 (Nzcv daif +PAN +UAO BTYPE=--)
pc : __sg_alloc_table_from_pages+0xec/0x238
lr : __sg_alloc_table_from_pages+0xc8/0x238
sp : ffff800013ccbad0
x29: ffff800013ccbad0 x28: ffff0a27b3d380a8
x27: 0000000000000000 x26: 0000000000002dc2
x25: 0000000000000dc0 x24: 0000000000000000
x23: 0000000000000000 x22: ffff800013ccbbe8
x21: 0000000000000010 x20: 0000000000000000
x19: 00000000fffff000 x18: ffffffffffffffff
x17: 00000000000000c0 x16: fffffe289eaf6380
x15: ffff800011b59948 x14: ffff002bc8fe98f8
x13: ff00000000000000 x12: ffff8000114ca000
x11: 0000000000000000 x10: ffffffffffffffff
x9 : ffffffffffffffc0 x8 : ffff0a27b5f9b6a0
x7 : 0000000000000000 x6 : 0000000000000001
x5 : ffff0a27b5f9b680 x4 : 0000000000000000
x3 : ffff0a27b5f9b680 x2 : 0000000000000000
x1 : 0000000000000001 x0 : 0000000000000000
Call trace:
__sg_alloc_table_from_pages+0xec/0x238
sg_alloc_table_from_pages+0x18/0x28
iommu_dma_alloc+0x474/0x678
dma_alloc_attrs+0xd8/0xf0
nvme_alloc_queue+0x114/0x160 [nvme]
nvme_reset_work+0xb34/0x14b4 [nvme]
process_one_work+0x1e8/0x360
worker_thread+0x44/0x478
kthread+0x150/0x158
ret_from_fork+0x10/0x34
Code: f94002c3 6b01017f 540007c2 11000486 (f8645aa5)
---[ end trace 89bb2b72d59bf925 ]---
Fix by making onto a u32.
Also use u32 for nvme_dev.q_depth, as we assign this value from
nvme_dev.q_depth, and nvme_dev.q_depth will possibly hold 65536 - this
avoids the same crash as above.
Fixes: 61f3b8963097 ("nvme-pci: use unsigned for io queue depth")
Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2020-08-14 23:34:25 +08:00
|
|
|
dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1,
|
2017-07-10 16:46:59 +08:00
|
|
|
io_queue_depth);
|
2019-08-23 01:51:17 +08:00
|
|
|
dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
|
2017-06-28 03:16:38 +08:00
|
|
|
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
|
2015-11-20 15:58:10 +08:00
|
|
|
dev->dbs = dev->bar + 4096;
|
2015-12-02 04:23:22 +08:00
|
|
|
|
2019-08-07 15:51:21 +08:00
|
|
|
/*
|
|
|
|
* Some Apple controllers require a non-standard SQE size.
|
|
|
|
* Interestingly they also seem to ignore the CC:IOSQES register
|
|
|
|
* so we don't bother updating it here.
|
|
|
|
*/
|
|
|
|
if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES)
|
|
|
|
dev->io_sqes = 7;
|
|
|
|
else
|
|
|
|
dev->io_sqes = NVME_NVM_IOSQES;
|
2015-12-02 04:23:22 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporary fix for the Apple controller found in the MacBook8,1 and
|
|
|
|
* some MacBook7,1 to avoid controller resets and data loss.
|
|
|
|
*/
|
|
|
|
if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
|
|
|
|
dev->q_depth = 2;
|
2017-05-20 21:14:43 +08:00
|
|
|
dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
|
|
|
|
"set queue depth=%u to work around controller resets\n",
|
2015-12-02 04:23:22 +08:00
|
|
|
dev->q_depth);
|
2017-06-28 10:27:57 +08:00
|
|
|
} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
|
|
|
|
(pdev->device == 0xa821 || pdev->device == 0xa822) &&
|
2017-06-28 03:16:38 +08:00
|
|
|
NVME_CAP_MQES(dev->ctrl.cap) == 0) {
|
2017-06-28 10:27:57 +08:00
|
|
|
dev->q_depth = 64;
|
|
|
|
dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
|
|
|
|
"set queue depth=%u\n", dev->q_depth);
|
2015-12-02 04:23:22 +08:00
|
|
|
}
|
|
|
|
|
2019-08-07 15:51:22 +08:00
|
|
|
/*
|
|
|
|
* Controllers with the shared tags quirk need the IO queue to be
|
|
|
|
* big enough so that we get 32 tags for the admin queue
|
|
|
|
*/
|
|
|
|
if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) &&
|
|
|
|
(dev->q_depth < (NVME_AQ_DEPTH + 2))) {
|
|
|
|
dev->q_depth = NVME_AQ_DEPTH + 2;
|
|
|
|
dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n",
|
|
|
|
dev->q_depth);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-12-20 21:25:11 +08:00
|
|
|
nvme_map_cmb(dev);
|
2016-10-06 10:01:12 +08:00
|
|
|
|
2015-12-08 06:30:31 +08:00
|
|
|
pci_enable_pcie_error_reporting(pdev);
|
|
|
|
pci_save_state(pdev);
|
2013-07-16 05:02:19 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
disable:
|
|
|
|
pci_disable_device(pdev);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_dev_unmap(struct nvme_dev *dev)
|
2016-02-25 00:15:52 +08:00
|
|
|
{
|
|
|
|
if (dev->bar)
|
|
|
|
iounmap(dev->bar);
|
2016-06-07 15:44:02 +08:00
|
|
|
pci_release_mem_regions(to_pci_dev(dev->dev));
|
2016-02-25 00:15:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_pci_disable(struct nvme_dev *dev)
|
2013-07-16 05:02:19 +08:00
|
|
|
{
|
2015-05-22 17:12:39 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
|
|
|
|
2016-09-14 22:18:57 +08:00
|
|
|
pci_free_irq_vectors(pdev);
|
2013-07-16 05:02:19 +08:00
|
|
|
|
2015-12-08 06:30:31 +08:00
|
|
|
if (pci_is_enabled(pdev)) {
|
|
|
|
pci_disable_pcie_error_reporting(pdev);
|
2015-05-22 17:12:39 +08:00
|
|
|
pci_disable_device(pdev);
|
2013-12-11 04:10:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-13 05:41:18 +08:00
|
|
|
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2019-05-15 04:07:38 +08:00
|
|
|
bool dead = true, freeze = false;
|
2017-03-02 03:22:12 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
2013-07-16 05:02:20 +08:00
|
|
|
|
2015-11-26 19:21:29 +08:00
|
|
|
mutex_lock(&dev->shutdown_lock);
|
2022-05-06 18:15:34 +08:00
|
|
|
if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
|
2017-03-02 03:22:12 +08:00
|
|
|
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
|
|
|
|
2017-06-28 07:44:05 +08:00
|
|
|
if (dev->ctrl.state == NVME_CTRL_LIVE ||
|
2019-05-15 04:07:38 +08:00
|
|
|
dev->ctrl.state == NVME_CTRL_RESETTING) {
|
|
|
|
freeze = true;
|
2017-03-02 03:22:12 +08:00
|
|
|
nvme_start_freeze(&dev->ctrl);
|
2019-05-15 04:07:38 +08:00
|
|
|
}
|
2017-03-02 03:22:12 +08:00
|
|
|
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
|
|
|
|
pdev->error_state != pci_channel_io_normal);
|
2015-01-08 09:55:52 +08:00
|
|
|
}
|
2016-08-11 23:35:57 +08:00
|
|
|
|
2017-03-02 03:22:12 +08:00
|
|
|
/*
|
|
|
|
* Give the controller a chance to complete all entered requests if
|
|
|
|
* doing a safe shutdown.
|
|
|
|
*/
|
2019-05-15 04:07:38 +08:00
|
|
|
if (!dead && shutdown && freeze)
|
|
|
|
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
|
2018-02-12 20:57:24 +08:00
|
|
|
|
|
|
|
nvme_stop_queues(&dev->ctrl);
|
2017-05-12 23:02:58 +08:00
|
|
|
|
2018-04-12 23:16:08 +08:00
|
|
|
if (!dead && dev->ctrl.queue_count > 0) {
|
2019-01-05 06:04:33 +08:00
|
|
|
nvme_disable_io_queues(dev);
|
2016-01-13 05:41:18 +08:00
|
|
|
nvme_disable_admin_queue(dev, shutdown);
|
2013-12-11 04:10:40 +08:00
|
|
|
}
|
2019-01-05 06:04:33 +08:00
|
|
|
nvme_suspend_io_queues(dev);
|
|
|
|
nvme_suspend_queue(&dev->queues[0]);
|
2016-02-25 00:15:52 +08:00
|
|
|
nvme_pci_disable(dev);
|
2020-02-13 00:41:05 +08:00
|
|
|
nvme_reap_pending_cqes(dev);
|
2015-02-20 01:34:48 +08:00
|
|
|
|
2016-05-19 05:05:01 +08:00
|
|
|
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
|
|
|
|
blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
|
2019-07-24 11:48:42 +08:00
|
|
|
blk_mq_tagset_wait_completed_request(&dev->tagset);
|
|
|
|
blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
|
2017-03-02 03:22:12 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The driver will not be starting up queues again if shutting down so
|
|
|
|
* must flush all entered requests to their failed completion to avoid
|
|
|
|
* deadlocking blk-mq hot-cpu notifier.
|
|
|
|
*/
|
2019-04-30 23:33:41 +08:00
|
|
|
if (shutdown) {
|
2017-03-02 03:22:12 +08:00
|
|
|
nvme_start_queues(&dev->ctrl);
|
2019-04-30 23:33:41 +08:00
|
|
|
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
|
2021-10-14 16:17:06 +08:00
|
|
|
nvme_start_admin_queue(&dev->ctrl);
|
2019-04-30 23:33:41 +08:00
|
|
|
}
|
2015-11-26 19:21:29 +08:00
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2019-09-05 00:06:11 +08:00
|
|
|
static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
|
|
|
|
{
|
|
|
|
if (!nvme_wait_reset(&dev->ctrl))
|
|
|
|
return -EBUSY;
|
|
|
|
nvme_dev_disable(dev, shutdown);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-02-10 22:56:01 +08:00
|
|
|
static int nvme_setup_prp_pools(struct nvme_dev *dev)
|
|
|
|
{
|
2015-05-22 17:12:39 +08:00
|
|
|
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
|
2020-08-19 01:51:59 +08:00
|
|
|
NVME_CTRL_PAGE_SIZE,
|
|
|
|
NVME_CTRL_PAGE_SIZE, 0);
|
2011-02-10 22:56:01 +08:00
|
|
|
if (!dev->prp_page_pool)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2011-02-10 23:30:34 +08:00
|
|
|
/* Optimisation for I/Os between 4k and 128k */
|
2015-05-22 17:12:39 +08:00
|
|
|
dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
|
2011-02-10 23:30:34 +08:00
|
|
|
256, 256, 0);
|
|
|
|
if (!dev->prp_small_pool) {
|
|
|
|
dma_pool_destroy(dev->prp_page_pool);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
2011-02-10 22:56:01 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_release_prp_pools(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
dma_pool_destroy(dev->prp_page_pool);
|
2011-02-10 23:30:34 +08:00
|
|
|
dma_pool_destroy(dev->prp_small_pool);
|
2011-02-10 22:56:01 +08:00
|
|
|
}
|
|
|
|
|
2019-09-05 21:52:33 +08:00
|
|
|
static void nvme_free_tagset(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
if (dev->tagset.tags)
|
|
|
|
blk_mq_free_tag_set(&dev->tagset);
|
|
|
|
dev->ctrl.tagset = NULL;
|
|
|
|
}
|
|
|
|
|
2015-11-26 17:54:19 +08:00
|
|
|
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
|
2013-02-20 01:17:58 +08:00
|
|
|
{
|
2015-11-26 17:54:19 +08:00
|
|
|
struct nvme_dev *dev = to_nvme_dev(ctrl);
|
2014-02-01 07:53:39 +08:00
|
|
|
|
2017-04-10 23:51:07 +08:00
|
|
|
nvme_dbbuf_dma_free(dev);
|
2019-09-05 21:52:33 +08:00
|
|
|
nvme_free_tagset(dev);
|
2015-11-26 17:06:56 +08:00
|
|
|
if (dev->ctrl.admin_q)
|
|
|
|
blk_put_queue(dev->ctrl.admin_q);
|
2017-02-23 01:15:07 +08:00
|
|
|
free_opal_dev(dev->ctrl.opal_dev);
|
2018-06-21 23:49:37 +08:00
|
|
|
mempool_destroy(dev->iod_mempool);
|
2020-03-24 23:29:40 +08:00
|
|
|
put_device(dev->dev);
|
|
|
|
kfree(dev->queues);
|
2013-02-20 01:17:58 +08:00
|
|
|
kfree(dev);
|
|
|
|
}
|
|
|
|
|
2019-06-09 04:16:32 +08:00
|
|
|
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
|
2016-02-25 00:15:55 +08:00
|
|
|
{
|
2019-09-05 00:06:11 +08:00
|
|
|
/*
|
|
|
|
* Set state to deleting now to avoid blocking nvme_wait_reset(), which
|
|
|
|
* may be holding this pci_dev's device lock.
|
|
|
|
*/
|
|
|
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
2017-10-18 19:25:42 +08:00
|
|
|
nvme_get_ctrl(&dev->ctrl);
|
2016-02-25 00:15:56 +08:00
|
|
|
nvme_dev_disable(dev, false);
|
2018-06-20 13:42:22 +08:00
|
|
|
nvme_kill_queues(&dev->ctrl);
|
2017-11-09 19:32:07 +08:00
|
|
|
if (!queue_work(nvme_wq, &dev->remove_work))
|
2016-02-25 00:15:55 +08:00
|
|
|
nvme_put_ctrl(&dev->ctrl);
|
|
|
|
}
|
|
|
|
|
2015-11-26 19:42:26 +08:00
|
|
|
static void nvme_reset_work(struct work_struct *work)
|
2013-02-20 01:17:58 +08:00
|
|
|
{
|
2017-06-15 21:41:08 +08:00
|
|
|
struct nvme_dev *dev =
|
|
|
|
container_of(work, struct nvme_dev, ctrl.reset_work);
|
2017-02-04 03:50:32 +08:00
|
|
|
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
|
2019-06-09 04:01:02 +08:00
|
|
|
int result;
|
2013-02-20 01:17:58 +08:00
|
|
|
|
2021-07-05 21:38:29 +08:00
|
|
|
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
|
|
|
|
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
|
|
|
|
dev->ctrl.state);
|
2019-06-09 04:01:02 +08:00
|
|
|
result = -ENODEV;
|
2015-11-26 19:42:26 +08:00
|
|
|
goto out;
|
2019-06-09 04:01:02 +08:00
|
|
|
}
|
2013-02-20 01:17:58 +08:00
|
|
|
|
2015-11-26 19:42:26 +08:00
|
|
|
/*
|
|
|
|
* If we're called to reset a live controller first shut it down before
|
|
|
|
* moving on.
|
|
|
|
*/
|
2016-02-25 00:15:52 +08:00
|
|
|
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
|
2016-01-13 05:41:18 +08:00
|
|
|
nvme_dev_disable(dev, false);
|
2019-05-15 04:46:09 +08:00
|
|
|
nvme_sync_queues(&dev->ctrl);
|
2013-02-20 01:17:58 +08:00
|
|
|
|
2019-01-24 09:46:11 +08:00
|
|
|
mutex_lock(&dev->shutdown_lock);
|
2016-02-25 00:15:52 +08:00
|
|
|
result = nvme_pci_enable(dev);
|
2013-07-16 05:02:21 +08:00
|
|
|
if (result)
|
2019-02-12 00:23:50 +08:00
|
|
|
goto out_unlock;
|
2013-07-16 05:02:21 +08:00
|
|
|
|
2017-05-01 05:27:17 +08:00
|
|
|
result = nvme_pci_configure_admin_queue(dev);
|
2013-07-16 05:02:21 +08:00
|
|
|
if (result)
|
2019-02-12 00:23:50 +08:00
|
|
|
goto out_unlock;
|
2013-07-16 05:02:21 +08:00
|
|
|
|
2015-01-08 09:55:50 +08:00
|
|
|
result = nvme_alloc_admin_tags(dev);
|
|
|
|
if (result)
|
2019-02-12 00:23:50 +08:00
|
|
|
goto out_unlock;
|
2014-04-08 07:10:11 +08:00
|
|
|
|
2018-06-21 23:49:37 +08:00
|
|
|
/*
|
|
|
|
* Limit the max command size to prevent iod->sg allocations going
|
|
|
|
* over a single page.
|
|
|
|
*/
|
2019-07-04 00:54:44 +08:00
|
|
|
dev->ctrl.max_hw_sectors = min_t(u32,
|
|
|
|
NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
|
2018-06-21 23:49:37 +08:00
|
|
|
dev->ctrl.max_segments = NVME_MAX_SEGS;
|
2019-06-06 03:08:24 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't limit the IOMMU merged segment size.
|
|
|
|
*/
|
|
|
|
dma_set_max_seg_size(dev->dev, 0xffffffff);
|
2021-02-02 02:30:17 +08:00
|
|
|
dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
|
2019-06-06 03:08:24 +08:00
|
|
|
|
2019-01-24 09:46:11 +08:00
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Introduce CONNECTING state from nvme-fc/rdma transports to mark the
|
|
|
|
* initializing procedure here.
|
|
|
|
*/
|
|
|
|
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"failed to mark controller CONNECTING\n");
|
2019-06-09 02:35:20 +08:00
|
|
|
result = -EBUSY;
|
2019-01-24 09:46:11 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2018-06-21 23:49:37 +08:00
|
|
|
|
2020-05-19 22:05:52 +08:00
|
|
|
/*
|
|
|
|
* We do not support an SGL for metadata (yet), so we are limited to a
|
|
|
|
* single integrity segment for the separate metadata pointer.
|
|
|
|
*/
|
|
|
|
dev->ctrl.max_integrity_segments = 1;
|
|
|
|
|
2021-03-01 10:06:04 +08:00
|
|
|
result = nvme_init_ctrl_finish(&dev->ctrl);
|
2015-10-16 13:58:46 +08:00
|
|
|
if (result)
|
2016-02-25 00:15:55 +08:00
|
|
|
goto out;
|
2015-10-16 13:58:46 +08:00
|
|
|
|
2017-02-23 01:15:07 +08:00
|
|
|
if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
|
|
|
|
if (!dev->ctrl.opal_dev)
|
|
|
|
dev->ctrl.opal_dev =
|
|
|
|
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
|
|
|
|
else if (was_suspend)
|
|
|
|
opal_unlock_from_suspend(dev->ctrl.opal_dev);
|
|
|
|
} else {
|
|
|
|
free_opal_dev(dev->ctrl.opal_dev);
|
|
|
|
dev->ctrl.opal_dev = NULL;
|
2017-02-17 20:59:39 +08:00
|
|
|
}
|
2017-02-04 03:50:32 +08:00
|
|
|
|
2017-04-10 23:51:07 +08:00
|
|
|
if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
|
|
|
|
result = nvme_dbbuf_dma_alloc(dev);
|
|
|
|
if (result)
|
|
|
|
dev_warn(dev->dev,
|
|
|
|
"unable to allocate dma for dbbuf\n");
|
|
|
|
}
|
|
|
|
|
2017-09-06 18:19:57 +08:00
|
|
|
if (dev->ctrl.hmpre) {
|
|
|
|
result = nvme_setup_host_mem(dev);
|
|
|
|
if (result < 0)
|
|
|
|
goto out;
|
|
|
|
}
|
2017-05-12 23:02:58 +08:00
|
|
|
|
2013-07-16 05:02:21 +08:00
|
|
|
result = nvme_setup_io_queues(dev);
|
2014-06-24 04:25:35 +08:00
|
|
|
if (result)
|
2016-02-25 00:15:55 +08:00
|
|
|
goto out;
|
2013-07-16 05:02:21 +08:00
|
|
|
|
2015-10-03 00:51:31 +08:00
|
|
|
/*
|
|
|
|
* Keep the controller around but remove all namespaces if we don't have
|
|
|
|
* any working I/O queue.
|
|
|
|
*/
|
2015-10-03 15:49:23 +08:00
|
|
|
if (dev->online_queues < 2) {
|
2016-02-10 23:51:15 +08:00
|
|
|
dev_warn(dev->ctrl.device, "IO queues not created\n");
|
2016-04-28 05:51:18 +08:00
|
|
|
nvme_kill_queues(&dev->ctrl);
|
2015-11-28 22:39:07 +08:00
|
|
|
nvme_remove_namespaces(&dev->ctrl);
|
2019-09-05 21:52:33 +08:00
|
|
|
nvme_free_tagset(dev);
|
2015-10-03 15:49:23 +08:00
|
|
|
} else {
|
2016-01-05 00:10:57 +08:00
|
|
|
nvme_start_queues(&dev->ctrl);
|
2017-03-02 03:22:12 +08:00
|
|
|
nvme_wait_freeze(&dev->ctrl);
|
2019-09-03 23:22:24 +08:00
|
|
|
nvme_dev_add(dev);
|
2017-03-02 03:22:12 +08:00
|
|
|
nvme_unfreeze(&dev->ctrl);
|
2015-10-03 15:49:23 +08:00
|
|
|
}
|
|
|
|
|
2018-01-06 08:01:58 +08:00
|
|
|
/*
|
|
|
|
* If only admin queue live, keep it to do further investigation or
|
|
|
|
* recovery.
|
|
|
|
*/
|
2019-09-03 23:22:24 +08:00
|
|
|
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
|
2018-01-06 08:01:58 +08:00
|
|
|
dev_warn(dev->ctrl.device,
|
2019-09-03 23:22:24 +08:00
|
|
|
"failed to mark controller live state\n");
|
2019-06-09 04:01:02 +08:00
|
|
|
result = -ENODEV;
|
2016-04-26 19:51:57 +08:00
|
|
|
goto out;
|
|
|
|
}
|
2016-04-26 19:51:58 +08:00
|
|
|
|
2021-07-15 05:02:37 +08:00
|
|
|
if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
|
|
|
|
&nvme_pci_attr_group))
|
|
|
|
dev->attrs_added = true;
|
|
|
|
|
2017-07-02 15:56:43 +08:00
|
|
|
nvme_start_ctrl(&dev->ctrl);
|
2015-10-03 15:49:23 +08:00
|
|
|
return;
|
2013-07-16 05:02:21 +08:00
|
|
|
|
2019-02-12 00:23:50 +08:00
|
|
|
out_unlock:
|
|
|
|
mutex_unlock(&dev->shutdown_lock);
|
2015-10-03 15:49:23 +08:00
|
|
|
out:
|
2019-06-09 04:16:32 +08:00
|
|
|
if (result)
|
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"Removing after probe failure status: %d\n", result);
|
|
|
|
nvme_remove_dead_ctrl(dev);
|
2013-07-16 05:02:21 +08:00
|
|
|
}
|
|
|
|
|
2015-11-26 19:35:49 +08:00
|
|
|
static void nvme_remove_dead_ctrl_work(struct work_struct *work)
|
2013-12-11 04:10:36 +08:00
|
|
|
{
|
2015-11-26 19:35:49 +08:00
|
|
|
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
|
2015-05-22 17:12:39 +08:00
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
2013-12-11 04:10:36 +08:00
|
|
|
|
|
|
|
if (pci_get_drvdata(pdev))
|
2016-03-29 06:03:21 +08:00
|
|
|
device_release_driver(&pdev->dev);
|
2015-11-26 17:54:19 +08:00
|
|
|
nvme_put_ctrl(&dev->ctrl);
|
2013-12-11 04:10:36 +08:00
|
|
|
}
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
|
2014-03-07 23:24:49 +08:00
|
|
|
{
|
2015-11-26 17:06:56 +08:00
|
|
|
*val = readl(to_nvme_dev(ctrl)->bar + off);
|
2015-10-03 00:49:23 +08:00
|
|
|
return 0;
|
2014-03-07 23:24:49 +08:00
|
|
|
}
|
|
|
|
|
2015-11-28 22:03:49 +08:00
|
|
|
static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
|
2015-06-06 00:30:08 +08:00
|
|
|
{
|
2015-11-28 22:03:49 +08:00
|
|
|
writel(val, to_nvme_dev(ctrl)->bar + off);
|
|
|
|
return 0;
|
|
|
|
}
|
2015-06-06 00:30:08 +08:00
|
|
|
|
2015-11-28 22:37:52 +08:00
|
|
|
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
|
|
|
|
{
|
nvme: retain split access workaround for capability reads
Commit 7fd8930f26be4
"nvme: add a common helper to read Identify Controller data"
has re-introduced an issue that we have attempted to work around in the
past, in commit a310acd7a7ea ("NVMe: use split lo_hi_{read,write}q").
The problem is that some PCIe NVMe controllers do not implement 64-bit
outbound accesses correctly, which is why the commit above switched
to using lo_hi_[read|write]q for all 64-bit BAR accesses occuring in
the code.
In the mean time, the NVMe subsystem has been refactored, and now calls
into the PCIe support layer for NVMe via a .reg_read64() method, which
fails to use lo_hi_readq(), and thus reintroduces the problem that the
workaround above aimed to address.
Given that, at the moment, .reg_read64() is only used to read the
capability register [which is known to tolerate split reads], let's
switch .reg_read64() to lo_hi_readq() as well.
This fixes a boot issue on some ARM boxes with NVMe behind a Synopsys
DesignWare PCIe host controller.
Fixes: 7fd8930f26be4 ("nvme: add a common helper to read Identify Controller data")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
2019-10-03 19:57:29 +08:00
|
|
|
*val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
|
2015-11-28 22:37:52 +08:00
|
|
|
return 0;
|
2015-06-06 00:30:08 +08:00
|
|
|
}
|
|
|
|
|
2018-03-09 05:50:32 +08:00
|
|
|
static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
|
|
|
|
|
2020-03-09 23:04:12 +08:00
|
|
|
return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
|
2018-03-09 05:50:32 +08:00
|
|
|
}
|
|
|
|
|
2015-11-26 17:06:56 +08:00
|
|
|
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
|
2016-06-13 22:45:24 +08:00
|
|
|
.name = "pcie",
|
2016-02-11 02:03:29 +08:00
|
|
|
.module = THIS_MODULE,
|
2018-10-05 05:27:44 +08:00
|
|
|
.flags = NVME_F_METADATA_SUPPORTED |
|
|
|
|
NVME_F_PCI_P2PDMA,
|
2015-11-26 17:06:56 +08:00
|
|
|
.reg_read32 = nvme_pci_reg_read32,
|
2015-11-28 22:03:49 +08:00
|
|
|
.reg_write32 = nvme_pci_reg_write32,
|
2015-11-28 22:37:52 +08:00
|
|
|
.reg_read64 = nvme_pci_reg_read64,
|
2015-11-26 17:54:19 +08:00
|
|
|
.free_ctrl = nvme_pci_free_ctrl,
|
2016-04-26 19:52:00 +08:00
|
|
|
.submit_async_event = nvme_pci_submit_async_event,
|
2018-03-09 05:50:32 +08:00
|
|
|
.get_address = nvme_pci_get_address,
|
2015-11-26 17:06:56 +08:00
|
|
|
};
|
2015-06-06 00:30:08 +08:00
|
|
|
|
2016-02-25 00:15:52 +08:00
|
|
|
static int nvme_dev_map(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
|
|
|
|
2016-06-07 15:44:02 +08:00
|
|
|
if (pci_request_mem_regions(pdev, "nvme"))
|
2016-02-25 00:15:52 +08:00
|
|
|
return -ENODEV;
|
|
|
|
|
2017-05-24 16:39:55 +08:00
|
|
|
if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
|
2016-02-25 00:15:52 +08:00
|
|
|
goto release;
|
|
|
|
|
2016-12-19 22:18:24 +08:00
|
|
|
return 0;
|
2016-02-25 00:15:52 +08:00
|
|
|
release:
|
2016-12-19 22:18:24 +08:00
|
|
|
pci_release_mem_regions(pdev);
|
|
|
|
return -ENODEV;
|
2016-02-25 00:15:52 +08:00
|
|
|
}
|
|
|
|
|
2017-11-09 14:12:03 +08:00
|
|
|
static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
|
2017-04-21 04:37:55 +08:00
|
|
|
{
|
|
|
|
if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
|
|
|
|
/*
|
|
|
|
* Several Samsung devices seem to drop off the PCIe bus
|
|
|
|
* randomly when APST is on and uses the deepest sleep state.
|
|
|
|
* This has been observed on a Samsung "SM951 NVMe SAMSUNG
|
|
|
|
* 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
|
|
|
|
* 950 PRO 256GB", but it seems to be restricted to two Dell
|
|
|
|
* laptops.
|
|
|
|
*/
|
|
|
|
if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
|
|
|
|
(dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
|
|
|
|
dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
|
|
|
|
return NVME_QUIRK_NO_DEEPEST_PS;
|
2017-11-09 14:12:03 +08:00
|
|
|
} else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
|
|
|
|
/*
|
|
|
|
* Samsung SSD 960 EVO drops off the PCIe bus after system
|
2018-03-12 02:51:56 +08:00
|
|
|
* suspend on a Ryzen board, ASUS PRIME B350M-A, as well as
|
|
|
|
* within few minutes after bootup on a Coffee Lake board -
|
|
|
|
* ASUS PRIME Z370-A
|
2017-11-09 14:12:03 +08:00
|
|
|
*/
|
|
|
|
if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
|
2018-03-12 02:51:56 +08:00
|
|
|
(dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
|
|
|
|
dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
|
2017-11-09 14:12:03 +08:00
|
|
|
return NVME_QUIRK_NO_APST;
|
2020-02-07 04:17:25 +08:00
|
|
|
} else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 ||
|
|
|
|
pdev->device == 0xa808 || pdev->device == 0xa809)) ||
|
|
|
|
(pdev->vendor == 0x1e0f && pdev->device == 0x0001)) {
|
|
|
|
/*
|
|
|
|
* Forcing to use host managed nvme power settings for
|
|
|
|
* lowest idle power with quick resume latency on
|
|
|
|
* Samsung and Toshiba SSDs based on suspend behavior
|
|
|
|
* on Coffee Lake board for LENOVO C640
|
|
|
|
*/
|
|
|
|
if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) &&
|
|
|
|
dmi_match(DMI_BOARD_NAME, "LNVNB161216"))
|
|
|
|
return NVME_QUIRK_SIMPLE_SUSPEND;
|
2017-04-21 04:37:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-28 03:42:52 +08:00
|
|
|
static void nvme_async_probe(void *data, async_cookie_t cookie)
|
|
|
|
{
|
|
|
|
struct nvme_dev *dev = data;
|
2018-05-07 22:30:24 +08:00
|
|
|
|
2019-07-30 06:34:52 +08:00
|
|
|
flush_work(&dev->ctrl.reset_work);
|
2018-04-28 03:42:52 +08:00
|
|
|
flush_work(&dev->ctrl.scan_work);
|
2018-05-07 22:30:24 +08:00
|
|
|
nvme_put_ctrl(&dev->ctrl);
|
2018-04-28 03:42:52 +08:00
|
|
|
}
|
|
|
|
|
2012-12-22 07:13:49 +08:00
|
|
|
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
2014-11-04 23:20:14 +08:00
|
|
|
int node, result = -ENOMEM;
|
2011-01-21 01:50:14 +08:00
|
|
|
struct nvme_dev *dev;
|
2017-04-21 04:37:55 +08:00
|
|
|
unsigned long quirks = id->driver_data;
|
2018-06-21 23:49:37 +08:00
|
|
|
size_t alloc_size;
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2014-11-04 23:20:14 +08:00
|
|
|
node = dev_to_node(&pdev->dev);
|
|
|
|
if (node == NUMA_NO_NODE)
|
2016-06-20 08:33:17 +08:00
|
|
|
set_dev_node(&pdev->dev, first_memory_node);
|
2014-11-04 23:20:14 +08:00
|
|
|
|
|
|
|
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
|
2011-01-21 01:50:14 +08:00
|
|
|
if (!dev)
|
|
|
|
return -ENOMEM;
|
2018-01-14 18:39:01 +08:00
|
|
|
|
2020-05-02 15:29:41 +08:00
|
|
|
dev->nr_write_queues = write_queues;
|
|
|
|
dev->nr_poll_queues = poll_queues;
|
|
|
|
dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
|
|
|
|
dev->queues = kcalloc_node(dev->nr_allocated_queues,
|
|
|
|
sizeof(struct nvme_queue), GFP_KERNEL, node);
|
2011-01-21 01:50:14 +08:00
|
|
|
if (!dev->queues)
|
|
|
|
goto free;
|
|
|
|
|
2015-05-22 17:12:39 +08:00
|
|
|
dev->dev = get_device(&pdev->dev);
|
2013-12-11 04:10:36 +08:00
|
|
|
pci_set_drvdata(pdev, dev);
|
2015-11-26 17:06:56 +08:00
|
|
|
|
2016-02-25 00:15:52 +08:00
|
|
|
result = nvme_dev_map(dev);
|
|
|
|
if (result)
|
2017-07-16 16:39:03 +08:00
|
|
|
goto put_pci;
|
2016-02-25 00:15:52 +08:00
|
|
|
|
2017-06-15 21:41:08 +08:00
|
|
|
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
|
2015-11-26 19:35:49 +08:00
|
|
|
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
|
2015-11-26 19:21:29 +08:00
|
|
|
mutex_init(&dev->shutdown_lock);
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2011-02-10 22:56:01 +08:00
|
|
|
result = nvme_setup_prp_pools(dev);
|
|
|
|
if (result)
|
2017-07-16 16:39:03 +08:00
|
|
|
goto unmap;
|
2015-06-06 00:30:08 +08:00
|
|
|
|
2017-11-09 14:12:03 +08:00
|
|
|
quirks |= check_vendor_combination_bug(pdev);
|
2017-04-21 04:37:55 +08:00
|
|
|
|
2021-06-10 02:40:17 +08:00
|
|
|
if (!noacpi && acpi_storage_d3(&pdev->dev)) {
|
2020-07-10 02:43:33 +08:00
|
|
|
/*
|
|
|
|
* Some systems use a bios work around to ask for D3 on
|
|
|
|
* platforms that support kernel managed suspend.
|
|
|
|
*/
|
|
|
|
dev_info(&pdev->dev,
|
|
|
|
"platform quirk: setting simple suspend\n");
|
|
|
|
quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
|
|
|
|
}
|
|
|
|
|
2018-06-21 23:49:37 +08:00
|
|
|
/*
|
|
|
|
* Double check that our mempool alloc size will cover the biggest
|
|
|
|
* command we support.
|
|
|
|
*/
|
2020-07-20 21:23:37 +08:00
|
|
|
alloc_size = nvme_pci_iod_alloc_size();
|
2018-06-21 23:49:37 +08:00
|
|
|
WARN_ON_ONCE(alloc_size > PAGE_SIZE);
|
|
|
|
|
|
|
|
dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
|
|
|
|
mempool_kfree,
|
|
|
|
(void *) alloc_size,
|
|
|
|
GFP_KERNEL, node);
|
|
|
|
if (!dev->iod_mempool) {
|
|
|
|
result = -ENOMEM;
|
|
|
|
goto release_pools;
|
|
|
|
}
|
|
|
|
|
2018-07-12 06:44:44 +08:00
|
|
|
result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
|
|
|
|
quirks);
|
|
|
|
if (result)
|
|
|
|
goto release_mempool;
|
|
|
|
|
2016-02-10 23:51:15 +08:00
|
|
|
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
|
|
|
|
|
2019-07-30 06:34:52 +08:00
|
|
|
nvme_reset_ctrl(&dev->ctrl);
|
2018-04-28 03:42:52 +08:00
|
|
|
async_schedule(nvme_async_probe, dev);
|
2017-12-31 20:01:19 +08:00
|
|
|
|
2011-01-21 01:50:14 +08:00
|
|
|
return 0;
|
|
|
|
|
2018-07-12 06:44:44 +08:00
|
|
|
release_mempool:
|
|
|
|
mempool_destroy(dev->iod_mempool);
|
2013-07-16 05:02:19 +08:00
|
|
|
release_pools:
|
2011-02-10 22:56:01 +08:00
|
|
|
nvme_release_prp_pools(dev);
|
2017-07-16 16:39:03 +08:00
|
|
|
unmap:
|
|
|
|
nvme_dev_unmap(dev);
|
2014-08-20 09:15:59 +08:00
|
|
|
put_pci:
|
2015-05-22 17:12:39 +08:00
|
|
|
put_device(dev->dev);
|
2011-01-21 01:50:14 +08:00
|
|
|
free:
|
|
|
|
kfree(dev->queues);
|
|
|
|
kfree(dev);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-06-01 19:10:38 +08:00
|
|
|
static void nvme_reset_prepare(struct pci_dev *pdev)
|
2014-05-03 00:40:43 +08:00
|
|
|
{
|
2014-06-24 06:03:21 +08:00
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
2019-09-05 00:06:11 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't need to check the return value from waiting for the reset
|
|
|
|
* state as pci_dev device lock is held, making it impossible to race
|
|
|
|
* with ->remove().
|
|
|
|
*/
|
|
|
|
nvme_disable_prepare_reset(dev, false);
|
|
|
|
nvme_sync_queues(&dev->ctrl);
|
2017-06-01 19:10:38 +08:00
|
|
|
}
|
2014-05-03 00:40:43 +08:00
|
|
|
|
2017-06-01 19:10:38 +08:00
|
|
|
static void nvme_reset_done(struct pci_dev *pdev)
|
|
|
|
{
|
2017-07-09 06:51:57 +08:00
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
2019-09-05 00:06:11 +08:00
|
|
|
|
|
|
|
if (!nvme_try_sched_reset(&dev->ctrl))
|
|
|
|
flush_work(&dev->ctrl.reset_work);
|
2014-05-03 00:40:43 +08:00
|
|
|
}
|
|
|
|
|
2014-01-28 00:29:40 +08:00
|
|
|
static void nvme_shutdown(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
2020-07-03 10:49:21 +08:00
|
|
|
|
2019-09-05 00:06:11 +08:00
|
|
|
nvme_disable_prepare_reset(dev, true);
|
2014-01-28 00:29:40 +08:00
|
|
|
}
|
|
|
|
|
2021-07-15 05:02:37 +08:00
|
|
|
static void nvme_remove_attrs(struct nvme_dev *dev)
|
|
|
|
{
|
|
|
|
if (dev->attrs_added)
|
|
|
|
sysfs_remove_group(&dev->ctrl.device->kobj,
|
|
|
|
&nvme_pci_attr_group);
|
|
|
|
}
|
|
|
|
|
2016-02-25 00:15:55 +08:00
|
|
|
/*
|
|
|
|
* The driver's remove may be called on a device in a partially initialized
|
|
|
|
* state. This function must not have any dependencies on the device state in
|
|
|
|
* order to proceed.
|
|
|
|
*/
|
2012-12-22 07:13:49 +08:00
|
|
|
static void nvme_remove(struct pci_dev *pdev)
|
2011-01-21 01:50:14 +08:00
|
|
|
{
|
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
2013-12-11 04:10:36 +08:00
|
|
|
|
2016-04-26 19:51:57 +08:00
|
|
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
|
2013-12-11 04:10:36 +08:00
|
|
|
pci_set_drvdata(pdev, NULL);
|
2016-05-12 22:37:14 +08:00
|
|
|
|
2017-02-11 07:15:49 +08:00
|
|
|
if (!pci_device_is_present(pdev)) {
|
2016-05-12 22:37:14 +08:00
|
|
|
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
|
2018-06-06 22:13:08 +08:00
|
|
|
nvme_dev_disable(dev, true);
|
2017-02-11 07:15:49 +08:00
|
|
|
}
|
2016-05-12 22:37:14 +08:00
|
|
|
|
2017-06-15 21:41:08 +08:00
|
|
|
flush_work(&dev->ctrl.reset_work);
|
2017-07-02 15:56:43 +08:00
|
|
|
nvme_stop_ctrl(&dev->ctrl);
|
|
|
|
nvme_remove_namespaces(&dev->ctrl);
|
2016-01-13 05:41:18 +08:00
|
|
|
nvme_dev_disable(dev, true);
|
2021-07-15 05:02:37 +08:00
|
|
|
nvme_remove_attrs(dev);
|
2017-05-12 23:02:58 +08:00
|
|
|
nvme_free_host_mem(dev);
|
2014-11-04 23:20:14 +08:00
|
|
|
nvme_dev_remove_admin(dev);
|
2013-12-17 02:50:00 +08:00
|
|
|
nvme_free_queues(dev, 0);
|
2013-12-11 04:10:36 +08:00
|
|
|
nvme_release_prp_pools(dev);
|
2016-02-25 00:15:52 +08:00
|
|
|
nvme_dev_unmap(dev);
|
2020-03-24 23:29:42 +08:00
|
|
|
nvme_uninit_ctrl(&dev->ctrl);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
2014-02-13 10:19:14 +08:00
|
|
|
#ifdef CONFIG_PM_SLEEP
|
2019-05-23 23:27:35 +08:00
|
|
|
static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
|
|
|
|
{
|
|
|
|
return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
|
|
|
|
{
|
|
|
|
return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_resume(struct device *dev)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
|
|
|
|
struct nvme_ctrl *ctrl = &ndev->ctrl;
|
|
|
|
|
nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
One of the modifications made by commit d916b1be94b6 ("nvme-pci: use
host managed power state for suspend") was adding a pci_save_state()
call to nvme_suspend() so as to instruct the PCI bus type to leave
devices handled by the nvme driver in D0 during suspend-to-idle.
That was done with the assumption that ASPM would transition the
device's PCIe link into a low-power state when the device became
inactive. However, if ASPM is disabled for the device, its PCIe
link will stay in L0 and in that case commit d916b1be94b6 is likely
to cause the energy used by the system while suspended to increase.
Namely, if the device in question works in accordance with the PCIe
specification, putting it into D3hot causes its PCIe link to go to
L1 or L2/L3 Ready, which is lower-power than L0. Since the energy
used by the system while suspended depends on the state of its PCIe
link (as a general rule, the lower-power the state of the link, the
less energy the system will use), putting the device into D3hot
during suspend-to-idle should be more energy-efficient that leaving
it in D0 with disabled ASPM.
For this reason, avoid leaving NVMe devices with disabled ASPM in D0
during suspend-to-idle. Instead, shut them down entirely and let
the PCI bus type put them into D3.
Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
2019-08-09 05:58:38 +08:00
|
|
|
if (ndev->last_ps == U32_MAX ||
|
2019-05-23 23:27:35 +08:00
|
|
|
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
|
2021-07-28 00:40:44 +08:00
|
|
|
goto reset;
|
|
|
|
if (ctrl->hmpre && nvme_setup_host_mem(ndev))
|
|
|
|
goto reset;
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
return 0;
|
2021-07-28 00:40:44 +08:00
|
|
|
reset:
|
|
|
|
return nvme_try_sched_reset(ctrl);
|
2019-05-23 23:27:35 +08:00
|
|
|
}
|
|
|
|
|
2013-07-16 05:02:23 +08:00
|
|
|
static int nvme_suspend(struct device *dev)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev);
|
|
|
|
struct nvme_dev *ndev = pci_get_drvdata(pdev);
|
2019-05-23 23:27:35 +08:00
|
|
|
struct nvme_ctrl *ctrl = &ndev->ctrl;
|
|
|
|
int ret = -EBUSY;
|
|
|
|
|
nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
One of the modifications made by commit d916b1be94b6 ("nvme-pci: use
host managed power state for suspend") was adding a pci_save_state()
call to nvme_suspend() so as to instruct the PCI bus type to leave
devices handled by the nvme driver in D0 during suspend-to-idle.
That was done with the assumption that ASPM would transition the
device's PCIe link into a low-power state when the device became
inactive. However, if ASPM is disabled for the device, its PCIe
link will stay in L0 and in that case commit d916b1be94b6 is likely
to cause the energy used by the system while suspended to increase.
Namely, if the device in question works in accordance with the PCIe
specification, putting it into D3hot causes its PCIe link to go to
L1 or L2/L3 Ready, which is lower-power than L0. Since the energy
used by the system while suspended depends on the state of its PCIe
link (as a general rule, the lower-power the state of the link, the
less energy the system will use), putting the device into D3hot
during suspend-to-idle should be more energy-efficient that leaving
it in D0 with disabled ASPM.
For this reason, avoid leaving NVMe devices with disabled ASPM in D0
during suspend-to-idle. Instead, shut them down entirely and let
the PCI bus type put them into D3.
Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
2019-08-09 05:58:38 +08:00
|
|
|
ndev->last_ps = U32_MAX;
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
/*
|
|
|
|
* The platform does not remove power for a kernel managed suspend so
|
|
|
|
* use host managed nvme power settings for lowest idle power if
|
|
|
|
* possible. This should have quicker resume latency than a full device
|
|
|
|
* shutdown. But if the firmware is involved after the suspend or the
|
|
|
|
* device does not support any non-default power states, shut down the
|
|
|
|
* device fully.
|
nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
One of the modifications made by commit d916b1be94b6 ("nvme-pci: use
host managed power state for suspend") was adding a pci_save_state()
call to nvme_suspend() so as to instruct the PCI bus type to leave
devices handled by the nvme driver in D0 during suspend-to-idle.
That was done with the assumption that ASPM would transition the
device's PCIe link into a low-power state when the device became
inactive. However, if ASPM is disabled for the device, its PCIe
link will stay in L0 and in that case commit d916b1be94b6 is likely
to cause the energy used by the system while suspended to increase.
Namely, if the device in question works in accordance with the PCIe
specification, putting it into D3hot causes its PCIe link to go to
L1 or L2/L3 Ready, which is lower-power than L0. Since the energy
used by the system while suspended depends on the state of its PCIe
link (as a general rule, the lower-power the state of the link, the
less energy the system will use), putting the device into D3hot
during suspend-to-idle should be more energy-efficient that leaving
it in D0 with disabled ASPM.
For this reason, avoid leaving NVMe devices with disabled ASPM in D0
during suspend-to-idle. Instead, shut them down entirely and let
the PCI bus type put them into D3.
Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
2019-08-09 05:58:38 +08:00
|
|
|
*
|
|
|
|
* If ASPM is not enabled for the device, shut down the device and allow
|
|
|
|
* the PCI bus layer to put it into D3 in order to take the PCIe link
|
|
|
|
* down, so as to allow the platform to achieve its minimum low-power
|
|
|
|
* state (which may not be possible if the link is up).
|
2019-05-23 23:27:35 +08:00
|
|
|
*/
|
nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
One of the modifications made by commit d916b1be94b6 ("nvme-pci: use
host managed power state for suspend") was adding a pci_save_state()
call to nvme_suspend() so as to instruct the PCI bus type to leave
devices handled by the nvme driver in D0 during suspend-to-idle.
That was done with the assumption that ASPM would transition the
device's PCIe link into a low-power state when the device became
inactive. However, if ASPM is disabled for the device, its PCIe
link will stay in L0 and in that case commit d916b1be94b6 is likely
to cause the energy used by the system while suspended to increase.
Namely, if the device in question works in accordance with the PCIe
specification, putting it into D3hot causes its PCIe link to go to
L1 or L2/L3 Ready, which is lower-power than L0. Since the energy
used by the system while suspended depends on the state of its PCIe
link (as a general rule, the lower-power the state of the link, the
less energy the system will use), putting the device into D3hot
during suspend-to-idle should be more energy-efficient that leaving
it in D0 with disabled ASPM.
For this reason, avoid leaving NVMe devices with disabled ASPM in D0
during suspend-to-idle. Instead, shut them down entirely and let
the PCI bus type put them into D3.
Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
2019-08-09 05:58:38 +08:00
|
|
|
if (pm_suspend_via_firmware() || !ctrl->npss ||
|
2019-08-17 04:16:19 +08:00
|
|
|
!pcie_aspm_enabled(pdev) ||
|
2019-09-05 00:06:11 +08:00
|
|
|
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
|
|
|
|
return nvme_disable_prepare_reset(ndev, true);
|
2019-05-23 23:27:35 +08:00
|
|
|
|
|
|
|
nvme_start_freeze(ctrl);
|
|
|
|
nvme_wait_freeze(ctrl);
|
|
|
|
nvme_sync_queues(ctrl);
|
|
|
|
|
2019-09-03 23:22:24 +08:00
|
|
|
if (ctrl->state != NVME_CTRL_LIVE)
|
2019-05-23 23:27:35 +08:00
|
|
|
goto unfreeze;
|
|
|
|
|
2021-07-28 00:40:44 +08:00
|
|
|
/*
|
|
|
|
* Host memory access may not be successful in a system suspend state,
|
|
|
|
* but the specification allows the controller to access memory in a
|
|
|
|
* non-operational power state.
|
|
|
|
*/
|
|
|
|
if (ndev->hmb) {
|
|
|
|
ret = nvme_set_host_mem(ndev, 0);
|
|
|
|
if (ret < 0)
|
|
|
|
goto unfreeze;
|
|
|
|
}
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
|
|
|
|
if (ret < 0)
|
|
|
|
goto unfreeze;
|
|
|
|
|
2019-09-19 02:15:55 +08:00
|
|
|
/*
|
|
|
|
* A saved state prevents pci pm from generically controlling the
|
|
|
|
* device's power. If we're using protocol specific settings, we don't
|
|
|
|
* want pci interfering.
|
|
|
|
*/
|
|
|
|
pci_save_state(pdev);
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
ret = nvme_set_power_state(ctrl, ctrl->npss);
|
|
|
|
if (ret < 0)
|
|
|
|
goto unfreeze;
|
|
|
|
|
|
|
|
if (ret) {
|
2019-09-19 02:15:55 +08:00
|
|
|
/* discard the saved state */
|
|
|
|
pci_load_saved_state(pdev, NULL);
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
/*
|
|
|
|
* Clearing npss forces a controller reset on resume. The
|
2019-10-24 23:24:00 +08:00
|
|
|
* correct value will be rediscovered then.
|
2019-05-23 23:27:35 +08:00
|
|
|
*/
|
2019-09-05 00:06:11 +08:00
|
|
|
ret = nvme_disable_prepare_reset(ndev, true);
|
2019-05-23 23:27:35 +08:00
|
|
|
ctrl->npss = 0;
|
|
|
|
}
|
|
|
|
unfreeze:
|
|
|
|
nvme_unfreeze(ctrl);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nvme_simple_suspend(struct device *dev)
|
|
|
|
{
|
|
|
|
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
|
2020-07-03 10:49:21 +08:00
|
|
|
|
2019-09-05 00:06:11 +08:00
|
|
|
return nvme_disable_prepare_reset(ndev, true);
|
2013-07-16 05:02:23 +08:00
|
|
|
}
|
|
|
|
|
2019-05-23 23:27:35 +08:00
|
|
|
static int nvme_simple_resume(struct device *dev)
|
2013-07-16 05:02:23 +08:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(dev);
|
|
|
|
struct nvme_dev *ndev = pci_get_drvdata(pdev);
|
|
|
|
|
2019-09-05 00:06:11 +08:00
|
|
|
return nvme_try_sched_reset(&ndev->ctrl);
|
2013-07-16 05:02:23 +08:00
|
|
|
}
|
|
|
|
|
2019-06-26 10:09:02 +08:00
|
|
|
static const struct dev_pm_ops nvme_dev_pm_ops = {
|
2019-05-23 23:27:35 +08:00
|
|
|
.suspend = nvme_suspend,
|
|
|
|
.resume = nvme_resume,
|
|
|
|
.freeze = nvme_simple_suspend,
|
|
|
|
.thaw = nvme_simple_resume,
|
|
|
|
.poweroff = nvme_simple_suspend,
|
|
|
|
.restore = nvme_simple_resume,
|
|
|
|
};
|
|
|
|
#endif /* CONFIG_PM_SLEEP */
|
2011-01-21 01:50:14 +08:00
|
|
|
|
2015-12-08 06:30:31 +08:00
|
|
|
static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
|
|
|
|
pci_channel_state_t state)
|
|
|
|
{
|
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A frozen channel requires a reset. When detected, this method will
|
|
|
|
* shutdown the controller to quiesce. The controller will be restarted
|
|
|
|
* after the slot reset through driver's slot_reset callback.
|
|
|
|
*/
|
|
|
|
switch (state) {
|
|
|
|
case pci_channel_io_normal:
|
|
|
|
return PCI_ERS_RESULT_CAN_RECOVER;
|
|
|
|
case pci_channel_io_frozen:
|
2016-04-05 05:07:41 +08:00
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"frozen state error detected, reset controller\n");
|
2016-01-13 05:41:18 +08:00
|
|
|
nvme_dev_disable(dev, false);
|
2015-12-08 06:30:31 +08:00
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
case pci_channel_io_perm_failure:
|
2016-04-05 05:07:41 +08:00
|
|
|
dev_warn(dev->ctrl.device,
|
|
|
|
"failure state error detected, request disconnect\n");
|
2015-12-08 06:30:31 +08:00
|
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
}
|
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
}
|
|
|
|
|
|
|
|
static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
|
|
|
|
2016-02-10 23:51:15 +08:00
|
|
|
dev_info(dev->ctrl.device, "restart after slot reset\n");
|
2015-12-08 06:30:31 +08:00
|
|
|
pci_restore_state(pdev);
|
2017-06-15 21:41:08 +08:00
|
|
|
nvme_reset_ctrl(&dev->ctrl);
|
2015-12-08 06:30:31 +08:00
|
|
|
return PCI_ERS_RESULT_RECOVERED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nvme_error_resume(struct pci_dev *pdev)
|
|
|
|
{
|
2018-05-25 06:16:04 +08:00
|
|
|
struct nvme_dev *dev = pci_get_drvdata(pdev);
|
|
|
|
|
|
|
|
flush_work(&dev->ctrl.reset_work);
|
2015-12-08 06:30:31 +08:00
|
|
|
}
|
|
|
|
|
2012-09-08 00:33:17 +08:00
|
|
|
static const struct pci_error_handlers nvme_err_handler = {
|
2011-01-21 01:50:14 +08:00
|
|
|
.error_detected = nvme_error_detected,
|
|
|
|
.slot_reset = nvme_slot_reset,
|
|
|
|
.resume = nvme_error_resume,
|
2017-06-01 19:10:38 +08:00
|
|
|
.reset_prepare = nvme_reset_prepare,
|
|
|
|
.reset_done = nvme_reset_done,
|
2011-01-21 01:50:14 +08:00
|
|
|
};
|
|
|
|
|
2014-03-24 22:11:22 +08:00
|
|
|
static const struct pci_device_id nvme_id_table[] = {
|
2020-07-03 05:31:22 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */
|
2016-03-05 04:15:17 +08:00
|
|
|
.driver_data = NVME_QUIRK_STRIPE_SIZE |
|
2017-04-06 01:21:13 +08:00
|
|
|
NVME_QUIRK_DEALLOCATE_ZEROES, },
|
2020-07-03 05:31:22 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */
|
2016-05-03 05:14:24 +08:00
|
|
|
.driver_data = NVME_QUIRK_STRIPE_SIZE |
|
2017-04-06 01:21:13 +08:00
|
|
|
NVME_QUIRK_DEALLOCATE_ZEROES, },
|
2020-07-03 05:31:22 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
|
2016-05-03 05:14:24 +08:00
|
|
|
.driver_data = NVME_QUIRK_STRIPE_SIZE |
|
2021-06-22 07:07:01 +08:00
|
|
|
NVME_QUIRK_DEALLOCATE_ZEROES |
|
|
|
|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2020-07-03 05:31:22 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
|
2017-07-11 02:39:59 +08:00
|
|
|
.driver_data = NVME_QUIRK_STRIPE_SIZE |
|
|
|
|
NVME_QUIRK_DEALLOCATE_ZEROES, },
|
2017-05-25 06:06:31 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
|
2018-05-09 00:25:15 +08:00
|
|
|
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
|
2019-11-14 23:40:01 +08:00
|
|
|
NVME_QUIRK_MEDIUM_PRIO_SQ |
|
2020-09-11 05:18:50 +08:00
|
|
|
NVME_QUIRK_NO_TEMP_THRESH_CHANGE |
|
|
|
|
NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
2019-01-09 01:20:51 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
|
|
|
|
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2015-10-23 05:45:06 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
|
2019-03-14 01:55:05 +08:00
|
|
|
.driver_data = NVME_QUIRK_IDENTIFY_CNS |
|
2022-04-12 13:07:56 +08:00
|
|
|
NVME_QUIRK_DISABLE_WRITE_ZEROES |
|
|
|
|
NVME_QUIRK_BOGUS_NID, },
|
|
|
|
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
|
|
|
|
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
2020-07-28 19:09:03 +08:00
|
|
|
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
|
|
|
|
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
|
2018-04-13 03:25:25 +08:00
|
|
|
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
|
2021-02-16 20:25:43 +08:00
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
|
|
|
|
NVME_QUIRK_NO_NS_DESC_LIST, },
|
2016-06-15 05:22:41 +08:00
|
|
|
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
|
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
2017-11-22 00:44:37 +08:00
|
|
|
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
|
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
2016-09-09 00:12:11 +08:00
|
|
|
{ PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
|
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
2017-06-28 10:27:57 +08:00
|
|
|
{ PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
|
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
|
|
|
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
|
2020-12-05 00:16:57 +08:00
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
|
nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a Samsung PM1725a
This adds a quirk for Samsung PM1725a drive which fixes timeouts and
I/O errors due to the fact that the controller does not properly
handle the Write Zeroes command, dmesg log:
nvme nvme0: I/O 528 QID 10 timeout, aborting
nvme nvme0: I/O 529 QID 10 timeout, aborting
nvme nvme0: I/O 530 QID 10 timeout, aborting
nvme nvme0: I/O 531 QID 10 timeout, aborting
nvme nvme0: I/O 532 QID 10 timeout, aborting
nvme nvme0: I/O 533 QID 10 timeout, aborting
nvme nvme0: I/O 534 QID 10 timeout, aborting
nvme nvme0: I/O 535 QID 10 timeout, aborting
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: Abort status: 0x0
nvme nvme0: I/O 528 QID 10 timeout, reset controller
nvme nvme0: controller is down; will reset: CSTS=0x3, PCI_STATUS=0x10
nvme nvme0: Device not ready; aborting reset, CSTS=0x3
nvme nvme0: Device not ready; aborting reset, CSTS=0x3
nvme nvme0: Removing after probe failure status: -19
nvme0n1: detected capacity change from 6251233968 to 0
blk_update_request: I/O error, dev nvme0n1, sector 32776 op 0x1:(WRITE) flags 0x3000 phys_seg 6 prio class 0
blk_update_request: I/O error, dev nvme0n1, sector 113319936 op 0x9:(WRITE_ZEROES) flags 0x800 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 1, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113319680 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 2, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113319424 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 3, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113319168 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 4, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113318912 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 5, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113318656 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Buffer I/O error on dev nvme0n1p2, logical block 6, lost async page write
blk_update_request: I/O error, dev nvme0n1, sector 113318400 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
blk_update_request: I/O error, dev nvme0n1, sector 113318144 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
blk_update_request: I/O error, dev nvme0n1, sector 113317888 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
Signed-off-by: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2021-03-10 20:06:41 +08:00
|
|
|
NVME_QUIRK_DISABLE_WRITE_ZEROES|
|
2020-12-05 00:16:57 +08:00
|
|
|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2021-02-02 05:08:22 +08:00
|
|
|
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
|
|
|
|
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2021-02-24 06:10:46 +08:00
|
|
|
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
|
|
|
|
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
|
|
|
|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2019-07-15 15:11:49 +08:00
|
|
|
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
|
|
|
|
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2019-09-24 02:22:56 +08:00
|
|
|
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
|
|
|
|
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
|
|
|
|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2020-07-24 01:29:10 +08:00
|
|
|
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
|
|
|
|
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
2020-10-13 16:34:45 +08:00
|
|
|
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
|
|
|
|
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a SPCC device
This adds a quirk for SPCC 256GB NVMe 1.3 drive which fixes timeouts and
I/O errors due to the fact that the controller does not properly
handle the Write Zeroes command:
[ 2745.659527] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 5.10.6-BET #1
[ 2745.659528] Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 3001 12/04/2020
[ 2776.138874] nvme nvme1: I/O 414 QID 3 timeout, aborting
[ 2776.138886] nvme nvme1: I/O 415 QID 3 timeout, aborting
[ 2776.138891] nvme nvme1: I/O 416 QID 3 timeout, aborting
[ 2776.138895] nvme nvme1: I/O 417 QID 3 timeout, aborting
[ 2776.138912] nvme nvme1: Abort status: 0x0
[ 2776.138921] nvme nvme1: I/O 428 QID 3 timeout, aborting
[ 2776.138922] nvme nvme1: Abort status: 0x0
[ 2776.138925] nvme nvme1: Abort status: 0x0
[ 2776.138974] nvme nvme1: Abort status: 0x0
[ 2776.138977] nvme nvme1: Abort status: 0x0
[ 2806.346792] nvme nvme1: I/O 414 QID 3 timeout, reset controller
[ 2806.363566] nvme nvme1: 15/0/0 default/read/poll queues
[ 2836.554298] nvme nvme1: I/O 415 QID 3 timeout, disable controller
[ 2836.672064] blk_update_request: I/O error, dev nvme1n1, sector 16350 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672072] blk_update_request: I/O error, dev nvme1n1, sector 16093 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672074] blk_update_request: I/O error, dev nvme1n1, sector 15836 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672076] blk_update_request: I/O error, dev nvme1n1, sector 15579 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672078] blk_update_request: I/O error, dev nvme1n1, sector 15322 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672080] blk_update_request: I/O error, dev nvme1n1, sector 15065 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672082] blk_update_request: I/O error, dev nvme1n1, sector 14808 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672083] blk_update_request: I/O error, dev nvme1n1, sector 14551 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672085] blk_update_request: I/O error, dev nvme1n1, sector 14294 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672087] blk_update_request: I/O error, dev nvme1n1, sector 14037 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672121] nvme nvme1: failed to mark controller live state
[ 2836.672123] nvme nvme1: Removing after probe failure status: -19
[ 2836.689016] Aborting journal on device dm-0-8.
[ 2836.689024] Buffer I/O error on dev dm-0, logical block 25198592, lost sync page write
[ 2836.689027] JBD2: Error -5 detected when updating journal superblock for dm-0-8.
Reported-by: Bradley Chapman <chapman6235@comcast.net>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Tested-by: Bradley Chapman <chapman6235@comcast.net>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
2021-01-26 13:19:16 +08:00
|
|
|
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
|
|
|
|
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
|
2021-02-21 13:12:16 +08:00
|
|
|
{ PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
|
|
|
|
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
|
2021-01-29 13:24:42 +08:00
|
|
|
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
|
|
|
|
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
|
2022-04-11 14:05:27 +08:00
|
|
|
{ PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */
|
|
|
|
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
|
|
|
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
|
|
|
|
.driver_data = NVME_QUIRK_BOGUS_NID, },
|
2021-02-10 08:39:42 +08:00
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x8061),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0xcd00),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0xcd01),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0xcd02),
|
|
|
|
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
|
2020-02-12 18:32:18 +08:00
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
|
|
|
|
.driver_data = NVME_QUIRK_SINGLE_VECTOR },
|
2017-02-23 06:17:29 +08:00
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
|
2019-08-07 15:51:21 +08:00
|
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
|
|
|
|
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
|
2019-08-07 15:51:22 +08:00
|
|
|
NVME_QUIRK_128_BYTES_SQES |
|
2021-09-27 23:43:06 +08:00
|
|
|
NVME_QUIRK_SHARED_TAGS |
|
|
|
|
NVME_QUIRK_SKIP_CID_GEN },
|
2022-03-16 15:54:49 +08:00
|
|
|
{ PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */
|
|
|
|
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY|
|
|
|
|
NVME_QUIRK_NO_DEEPEST_PS |
|
|
|
|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
|
2020-08-18 16:35:30 +08:00
|
|
|
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
|
2011-01-21 01:50:14 +08:00
|
|
|
{ 0, }
|
|
|
|
};
|
|
|
|
MODULE_DEVICE_TABLE(pci, nvme_id_table);
|
|
|
|
|
|
|
|
static struct pci_driver nvme_driver = {
|
|
|
|
.name = "nvme",
|
|
|
|
.id_table = nvme_id_table,
|
|
|
|
.probe = nvme_probe,
|
2012-12-22 07:13:49 +08:00
|
|
|
.remove = nvme_remove,
|
2014-01-28 00:29:40 +08:00
|
|
|
.shutdown = nvme_shutdown,
|
2019-05-23 23:27:35 +08:00
|
|
|
#ifdef CONFIG_PM_SLEEP
|
2013-07-16 05:02:23 +08:00
|
|
|
.driver = {
|
|
|
|
.pm = &nvme_dev_pm_ops,
|
|
|
|
},
|
2019-05-23 23:27:35 +08:00
|
|
|
#endif
|
2018-04-25 05:47:27 +08:00
|
|
|
.sriov_configure = pci_sriov_configure_simple,
|
2011-01-21 01:50:14 +08:00
|
|
|
.err_handler = &nvme_err_handler,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init nvme_init(void)
|
|
|
|
{
|
2019-04-30 23:36:52 +08:00
|
|
|
BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
|
|
|
|
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
|
|
|
|
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
|
2019-02-17 01:13:10 +08:00
|
|
|
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
|
2019-12-07 00:16:59 +08:00
|
|
|
|
2017-06-08 02:31:55 +08:00
|
|
|
return pci_register_driver(&nvme_driver);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit nvme_exit(void)
|
|
|
|
{
|
|
|
|
pci_unregister_driver(&nvme_driver);
|
2017-11-09 19:32:07 +08:00
|
|
|
flush_workqueue(nvme_wq);
|
2011-01-21 01:50:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
|
|
|
|
MODULE_LICENSE("GPL");
|
2014-11-22 06:16:32 +08:00
|
|
|
MODULE_VERSION("1.0");
|
2011-01-21 01:50:14 +08:00
|
|
|
module_init(nvme_init);
|
|
|
|
module_exit(nvme_exit);
|