From 12d9f07022dcde261ad16e9a11f45096dc68b03c Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 4 May 2018 16:01:57 +0800 Subject: [PATCH 1/3] nvme: fix use-after-free in nvme_free_ns_head Currently only nvme_ctrl will take a reference counter of nvme_subsystem, nvme_ns_head also needs it. Otherwise nvme_free_ns_head will access the nvme_subsystem.ns_ida which has been freed by __nvme_release_subsystem after all the reference of nvme_subsystem have been released by nvme_free_ctrl. This could cause memory corruption. BUG: KASAN: use-after-free in radix_tree_next_chunk+0x9f/0x4b0 Read of size 8 at addr ffff88036494d2e8 by task fio/1815 CPU: 1 PID: 1815 Comm: fio Kdump: loaded Tainted: G W 4.17.0-rc1+ #18 Hardware name: LENOVO 10MLS0E339/3106, BIOS M1AKT22A 06/27/2017 Call Trace: dump_stack+0x91/0xeb print_address_description+0x6b/0x290 kasan_report+0x261/0x360 radix_tree_next_chunk+0x9f/0x4b0 ida_remove+0x8b/0x180 ida_simple_remove+0x26/0x40 nvme_free_ns_head+0x58/0xc0 __blkdev_put+0x30a/0x3a0 blkdev_close+0x44/0x50 __fput+0x184/0x380 task_work_run+0xaf/0xe0 do_exit+0x501/0x1440 do_group_exit+0x89/0x140 __x64_sys_exit_group+0x28/0x30 do_syscall_64+0x72/0x230 Signed-off-by: Jianchao Wang Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a3771c5729f5..2cbc378bc0d6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -99,6 +99,7 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); +static void nvme_put_subsystem(struct nvme_subsystem *subsys); int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { @@ -350,6 +351,7 @@ static void nvme_free_ns_head(struct kref *ref) ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); + nvme_put_subsystem(head->subsys); kfree(head); } @@ -2861,6 +2863,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; list_add_tail(&head->entry, &ctrl->subsys->nsheads); + + kref_get(&ctrl->subsys->ref); + return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); From 4e50d9ebaeaa3c6761d2b513ef7039510c8cf213 Mon Sep 17 00:00:00 2001 From: Charles Machalow Date: Thu, 10 May 2018 16:01:38 -0700 Subject: [PATCH 2/3] nvme: Fix sync controller reset return If a controller reset is requested while the device has no namespaces, we were incorrectly returning ENETRESET. This patch adds the check for ADMIN_ONLY controller state to indicate a successful reset. Fixes: 8000d1fdb0 ("nvme-rdma: fix sysfs invoked reset_ctrl error flow ") Cc: Signed-off-by: Charles Machalow [changelog] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2cbc378bc0d6..99b857e5a7a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -118,7 +118,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) ret = -ENETRESET; } From 9abd68ef454c824bfd18629033367b4382b5f390 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 May 2018 10:25:15 -0600 Subject: [PATCH 3/3] nvme: add quirk to force medium priority for SQ creation Some P3100 drives have a bug where they think WRRU (weighted round robin) is always enabled, even though the host doesn't set it. Since they think it's enabled, they also look at the submission queue creation priority. We used to set that to MEDIUM by default, but that was removed in commit 81c1cd98351b. This causes various issues on that drive. Add a quirk to still set MEDIUM priority for that controller. Fixes: 81c1cd98351b ("nvme/pci: Don't set reserved SQ create flags") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7ded7a51c430..17d2f7cf3fed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,6 +84,11 @@ enum nvme_quirks { * Supports the LighNVM command set if indicated in vs[1]. */ NVME_QUIRK_LIGHTNVM = (1 << 6), + + /* + * Set MEDIUM priority on SQ creation + */ + NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..17a0190bd88f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1093,9 +1093,18 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { + struct nvme_ctrl *ctrl = &dev->ctrl; struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; + /* + * Some drives have a bug that auto-enables WRRU if MEDIUM isn't + * set. Since URGENT priority is zeroes, it makes all queues + * URGENT. + */ + if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ) + flags |= NVME_SQ_PRIO_MEDIUM; + /* * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. @@ -2701,7 +2710,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ - .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */