From 566d3c57eb526f32951af15866086e236ce1fc8a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 8 Jun 2022 10:13:02 +0900 Subject: [PATCH 1/8] scsi: scsi_debug: Fix zone transition to full condition When a write command to a sequential write required or sequential write preferred zone result in the zone write pointer reaching the end of the zone, the zone condition must be set to full AND the number of implicitly or explicitly open zones updated to have a correct accounting for zone resources. However, the function zbc_inc_wp() only sets the zone condition to full without updating the open zone counters, resulting in a zone state machine breakage. Introduce the helper function zbc_set_zone_full() and use it in zbc_inc_wp() to correctly transition zones to the full condition. Link: https://lore.kernel.org/r/20220608011302.92061-1-damien.lemoal@opensource.wdc.com Fixes: f0d1cf9378bd ("scsi: scsi_debug: Add ZBC zone commands") Reviewed-by: Niklas Cassel Acked-by: Douglas Gilbert Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 1f423f723d06..b8a76b89f85a 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -2826,6 +2826,24 @@ static void zbc_open_zone(struct sdebug_dev_info *devip, } } +static inline void zbc_set_zone_full(struct sdebug_dev_info *devip, + struct sdeb_zone_state *zsp) +{ + switch (zsp->z_cond) { + case ZC2_IMPLICIT_OPEN: + devip->nr_imp_open--; + break; + case ZC3_EXPLICIT_OPEN: + devip->nr_exp_open--; + break; + default: + WARN_ONCE(true, "Invalid zone %llu condition %x\n", + zsp->z_start, zsp->z_cond); + break; + } + zsp->z_cond = ZC5_FULL; +} + static void zbc_inc_wp(struct sdebug_dev_info *devip, unsigned long long lba, unsigned int num) { @@ -2838,7 +2856,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, if (zsp->z_type == ZBC_ZTYPE_SWR) { zsp->z_wp += num; if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); return; } @@ -2857,7 +2875,7 @@ static void zbc_inc_wp(struct sdebug_dev_info *devip, n = num; } if (zsp->z_wp >= zend) - zsp->z_cond = ZC5_FULL; + zbc_set_zone_full(devip, zsp); num -= n; lba += n; From f6eed15f3ea76596ccc689331e1cc850b999133b Mon Sep 17 00:00:00 2001 From: Sergey Gorenko Date: Mon, 13 Jun 2022 15:38:54 +0300 Subject: [PATCH 2/8] scsi: iscsi: Exclude zero from the endpoint ID range The kernel returns an endpoint ID as r.ep_connect_ret.handle in the iscsi_uevent. The iscsid validates a received endpoint ID and treats zero as an error. The commit referenced in the fixes line changed the endpoint ID range, and zero is always assigned to the first endpoint ID. So, the first attempt to create a new iSER connection always fails. Link: https://lore.kernel.org/r/20220613123854.55073-1-sergeygo@nvidia.com Fixes: 3c6ae371b8a1 ("scsi: iscsi: Release endpoint ID when its freed") Reviewed-by: Max Gurtovoy Reviewed-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Sergey Gorenko Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2c0dd64159b0..5d21f07456c6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -212,7 +212,12 @@ iscsi_create_endpoint(int dd_size) return NULL; mutex_lock(&iscsi_ep_idr_mutex); - id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO); + + /* + * First endpoint id should be 1 to comply with user space + * applications (iscsid). + */ + id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO); if (id < 0) { mutex_unlock(&iscsi_ep_idr_mutex); printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", From da8badd7d3583f447eac2ab65a332f2d773deca1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 13 Jun 2022 14:44:40 -0700 Subject: [PATCH 3/8] scsi: ufs: Simplify ufshcd_clear_cmd() Remove the local variable 'err'. This patch does not change any functionality. Link: https://lore.kernel.org/r/20220613214442.212466-2-bvanassche@acm.org Reviewed-by: Stanley Chu Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 01fb4bad86be..2d479e31c588 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2866,7 +2866,6 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, static int ufshcd_clear_cmd(struct ufs_hba *hba, int tag) { - int err = 0; unsigned long flags; u32 mask = 1 << tag; @@ -2879,11 +2878,8 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag) * wait for h/w to clear corresponding bit in door-bell. * max. wait is 1 sec. */ - err = ufshcd_wait_for_register(hba, - REG_UTP_TRANSFER_REQ_DOOR_BELL, - mask, ~mask, 1000, 1000); - - return err; + return ufshcd_wait_for_register(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL, + mask, ~mask, 1000, 1000); } static int From d1a7644648b7cdacaf8d1013a4285001911e9bc8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 13 Jun 2022 14:44:41 -0700 Subject: [PATCH 4/8] scsi: ufs: Support clearing multiple commands at once Modify ufshcd_clear_cmd() such that it supports clearing multiple commands at once instead of one command at a time. This change will be used in a later patch to reduce the time spent in the reset handler. Link: https://lore.kernel.org/r/20220613214442.212466-3-bvanassche@acm.org Reviewed-by: Stanley Chu Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 42 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2d479e31c588..8789147760ae 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -748,17 +748,28 @@ static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp) } /** - * ufshcd_utrl_clear - Clear a bit in UTRLCLR register + * ufshcd_utrl_clear() - Clear requests from the controller request list. * @hba: per adapter instance - * @pos: position of the bit to be cleared + * @mask: mask with one bit set for each request to be cleared */ -static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 pos) +static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask) { if (hba->quirks & UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR) - ufshcd_writel(hba, (1 << pos), REG_UTP_TRANSFER_REQ_LIST_CLEAR); - else - ufshcd_writel(hba, ~(1 << pos), - REG_UTP_TRANSFER_REQ_LIST_CLEAR); + mask = ~mask; + /* + * From the UFSHCI specification: "UTP Transfer Request List CLear + * Register (UTRLCLR): This field is bit significant. Each bit + * corresponds to a slot in the UTP Transfer Request List, where bit 0 + * corresponds to request slot 0. A bit in this field is set to ‘0’ + * by host software to indicate to the host controller that a transfer + * request slot is cleared. The host controller + * shall free up any resources associated to the request slot + * immediately, and shall set the associated bit in UTRLDBR to ‘0’. The + * host software indicates no change to request slots by setting the + * associated bits in this field to ‘1’. Bits in this field shall only + * be set ‘1’ or ‘0’ by host software when UTRLRSR is set to ‘1’." + */ + ufshcd_writel(hba, ~mask, REG_UTP_TRANSFER_REQ_LIST_CLEAR); } /** @@ -2863,15 +2874,18 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, return ufshcd_compose_devman_upiu(hba, lrbp); } -static int -ufshcd_clear_cmd(struct ufs_hba *hba, int tag) +/* + * Clear all the requests from the controller for which a bit has been set in + * @mask and wait until the controller confirms that these requests have been + * cleared. + */ +static int ufshcd_clear_cmds(struct ufs_hba *hba, u32 mask) { unsigned long flags; - u32 mask = 1 << tag; /* clear outstanding transaction before retry */ spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_utrl_clear(hba, tag); + ufshcd_utrl_clear(hba, mask); spin_unlock_irqrestore(hba->host->host_lock, flags); /* @@ -2959,7 +2973,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, err = -ETIMEDOUT; dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n", __func__, lrbp->task_tag); - if (!ufshcd_clear_cmd(hba, lrbp->task_tag)) + if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag)) /* successfully cleared the command, retry if needed */ err = -EAGAIN; /* @@ -6982,7 +6996,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) /* clear the commands that were pending for corresponding LUN */ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { if (hba->lrb[pos].lun == lun) { - err = ufshcd_clear_cmd(hba, pos); + err = ufshcd_clear_cmds(hba, 1U << pos); if (err) break; __ufshcd_transfer_req_compl(hba, 1U << pos); @@ -7084,7 +7098,7 @@ static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag) goto out; } - err = ufshcd_clear_cmd(hba, tag); + err = ufshcd_clear_cmds(hba, 1U << tag); if (err) dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n", __func__, tag, err); From 2acd76e7b8596e307fcec8fc6bc5fe5ab174749a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 13 Jun 2022 14:44:42 -0700 Subject: [PATCH 5/8] scsi: ufs: Fix a race between the interrupt handler and the reset handler Prevent that both the interrupt handler and the reset handler try to complete a request at the same time. This patch is the result of an analysis of the following crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000120 CPU: 0 PID: 0 Comm: swapper/0 Tainted: G OE 5.10.107-android13-4-00051-g1e48e8970cca-ab8664745 #1 pc : ufshcd_release_scsi_cmd+0x30/0x46c lr : __ufshcd_transfer_req_compl+0x4fc/0x9c0 Call trace: ufshcd_release_scsi_cmd+0x30/0x46c __ufshcd_transfer_req_compl+0x4fc/0x9c0 ufshcd_poll+0xf0/0x208 ufshcd_sl_intr+0xb8/0xf0 ufshcd_intr+0x168/0x2f4 __handle_irq_event_percpu+0xa0/0x30c handle_irq_event+0x84/0x178 handle_fasteoi_irq+0x150/0x2e8 __handle_domain_irq+0x114/0x1e4 gic_handle_irq.31846+0x58/0x300 el1_irq+0xe4/0x1c0 cpuidle_enter_state+0x3ac/0x8c4 do_idle+0x2fc/0x55c cpu_startup_entry+0x84/0x90 kernel_init+0x0/0x310 start_kernel+0x0/0x608 start_kernel+0x4ec/0x608 Link: https://lore.kernel.org/r/20220613214442.212466-4-bvanassche@acm.org Reviewed-by: Stanley Chu Reviewed-by: Adrian Hunter Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8789147760ae..ce86d1b790c0 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6968,14 +6968,14 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, } /** - * ufshcd_eh_device_reset_handler - device reset handler registered to - * scsi layer. + * ufshcd_eh_device_reset_handler() - Reset a single logical unit. * @cmd: SCSI command pointer * * Returns SUCCESS/FAILED */ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) { + unsigned long flags, pending_reqs = 0, not_cleared = 0; struct Scsi_Host *host; struct ufs_hba *hba; u32 pos; @@ -6994,14 +6994,24 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) } /* clear the commands that were pending for corresponding LUN */ - for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { - if (hba->lrb[pos].lun == lun) { - err = ufshcd_clear_cmds(hba, 1U << pos); - if (err) - break; - __ufshcd_transfer_req_compl(hba, 1U << pos); - } + spin_lock_irqsave(&hba->outstanding_lock, flags); + for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) + if (hba->lrb[pos].lun == lun) + __set_bit(pos, &pending_reqs); + hba->outstanding_reqs &= ~pending_reqs; + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + + if (ufshcd_clear_cmds(hba, pending_reqs) < 0) { + spin_lock_irqsave(&hba->outstanding_lock, flags); + not_cleared = pending_reqs & + ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); + hba->outstanding_reqs |= not_cleared; + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + + dev_err(hba->dev, "%s: failed to clear requests %#lx\n", + __func__, not_cleared); } + __ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared); out: hba->req_abort_count = 0; From 1d3e0980782fbafaf93285779fd3905e4f866802 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 14 Jun 2022 00:05:55 -0700 Subject: [PATCH 6/8] scsi: storvsc: Correct reporting of Hyper-V I/O size limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code is based on the idea that the max number of SGL entries also determines the max size of an I/O request. While this idea was true in older versions of the storvsc driver when SGL entry length was limited to 4 Kbytes, commit 3d9c3dcc58e9 ("scsi: storvsc: Enable scatterlist entry lengths > 4Kbytes") removed that limitation. It's now theoretically possible for the block layer to send requests that exceed the maximum size supported by Hyper-V. This problem doesn't currently happen in practice because the block layer defaults to a 512 Kbyte maximum, while Hyper-V in Azure supports 2 Mbyte I/O sizes. But some future configuration of Hyper-V could have a smaller max I/O size, and the block layer could exceed that max. Fix this by correctly setting max_sectors as well as sg_tablesize to reflect the maximum I/O size that Hyper-V reports. While allowing I/O sizes larger than the block layer default of 512 Kbytes doesn’t provide any noticeable performance benefit in the tests we ran, it's still appropriate to report the correct underlying Hyper-V capabilities to the Linux block layer. Also tweak the virt_boundary_mask to reflect that the required alignment derives from Hyper-V communication using a 4 Kbyte page size, and not on the guest page size, which might be bigger (eg. ARM64). Link: https://lore.kernel.org/r/1655190355-28722-1-git-send-email-ssengar@linux.microsoft.com Fixes: 3d9c3dcc58e9 ("scsi: storvsc: Enable scatter list entry lengths > 4Kbytes") Reviewed-by: Michael Kelley Signed-off-by: Saurabh Sengar Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ca3530982e52..fe000da11332 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1844,7 +1844,7 @@ static struct scsi_host_template scsi_driver = { .cmd_per_lun = 2048, .this_id = -1, /* Ensure there are no gaps in presented sgls */ - .virt_boundary_mask = PAGE_SIZE-1, + .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1, .no_write_same = 1, .track_queue_depth = 1, .change_queue_depth = storvsc_change_queue_depth, @@ -1895,6 +1895,7 @@ static int storvsc_probe(struct hv_device *device, int target = 0; struct storvsc_device *stor_device; int max_sub_channels = 0; + u32 max_xfer_bytes; /* * We support sub-channels for storage on SCSI and FC controllers. @@ -1968,12 +1969,28 @@ static int storvsc_probe(struct hv_device *device, } /* max cmd length */ host->max_cmd_len = STORVSC_MAX_CMD_LEN; - /* - * set the table size based on the info we got - * from the host. + * Any reasonable Hyper-V configuration should provide + * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE, + * protecting it from any weird value. */ - host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); + max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE); + /* max_hw_sectors_kb */ + host->max_sectors = max_xfer_bytes >> 9; + /* + * There are 2 requirements for Hyper-V storvsc sgl segments, + * based on which the below calculation for max segments is + * done: + * + * 1. Except for the first and last sgl segment, all sgl segments + * should be align to HV_HYP_PAGE_SIZE, that also means the + * maximum number of segments in a sgl can be calculated by + * dividing the total max transfer length by HV_HYP_PAGE_SIZE. + * + * 2. Except for the first and last, each entry in the SGL must + * have an offset that is a multiple of HV_HYP_PAGE_SIZE. + */ + host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1; /* * For non-IDE disks, the host supports multiple channels. * Set the number of HW queues we are supporting. From 72ea7fe0db73d65c7d977208842d8ade9b823de9 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 16 Jun 2022 12:11:26 -0700 Subject: [PATCH 7/8] scsi: ibmvfc: Allocate/free queue resource only during probe/remove Currently, the sub-queues and event pool resources are allocated/freed for every CRQ connection event such as reset and LPM. This exposes the driver to a couple issues. First the inefficiency of freeing and reallocating memory that can simply be resued after being sanitized. Further, a system under memory pressue runs the risk of allocation failures that could result in a crippled driver. Finally, there is a race window where command submission/compeletion can try to pull/return elements from/to an event pool that is being deleted or already has been deleted due to the lack of host state around freeing/allocating resources. The following is an example of list corruption following a live partition migration (LPM): Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: vfat fat isofs cdrom ext4 mbcache jbd2 nft_counter nft_compat nf_tables nfnetlink rpadlpar_io rpaphp xsk_diag nfsv3 nfs_acl nfs lockd grace fscache netfs rfkill bonding tls sunrpc pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth vmx_crypto dm_multipath dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse CPU: 0 PID: 2108 Comm: ibmvfc_0 Kdump: loaded Not tainted 5.14.0-70.9.1.el9_0.ppc64le #1 NIP: c0000000007c4bb0 LR: c0000000007c4bac CTR: 00000000005b9a10 REGS: c00000025c10b760 TRAP: 0700 Not tainted (5.14.0-70.9.1.el9_0.ppc64le) MSR: 800000000282b033 CR: 2800028f XER: 0000000f CFAR: c0000000001f55bc IRQMASK: 0 GPR00: c0000000007c4bac c00000025c10ba00 c000000002a47c00 000000000000004e GPR04: c0000031e3006f88 c0000031e308bd00 c00000025c10b768 0000000000000027 GPR08: 0000000000000000 c0000031e3009dc0 00000031e0eb0000 0000000000000000 GPR12: c0000031e2ffffa8 c000000002dd0000 c000000000187108 c00000020fcee2c0 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 c008000002f81300 GPR24: 5deadbeef0000100 5deadbeef0000122 c000000263ba6910 c00000024cc88000 GPR28: 000000000000003c c0000002430a0000 c0000002430ac300 000000000000c300 NIP [c0000000007c4bb0] __list_del_entry_valid+0x90/0x100 LR [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100 Call Trace: [c00000025c10ba00] [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100 (unreliable) [c00000025c10ba60] [c008000002f42284] ibmvfc_free_queue+0xec/0x210 [ibmvfc] [c00000025c10bb10] [c008000002f4246c] ibmvfc_deregister_scsi_channel+0xc4/0x160 [ibmvfc] [c00000025c10bba0] [c008000002f42580] ibmvfc_release_sub_crqs+0x78/0x130 [ibmvfc] [c00000025c10bc20] [c008000002f4f6cc] ibmvfc_do_work+0x5c4/0xc70 [ibmvfc] [c00000025c10bce0] [c008000002f4fdec] ibmvfc_work+0x74/0x1e8 [ibmvfc] [c00000025c10bda0] [c0000000001872b8] kthread+0x1b8/0x1c0 [c00000025c10be10] [c00000000000cd64] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 40820034 38600001 38210060 4e800020 7c0802a6 7c641b78 3c62fe7a 7d254b78 3863b590 f8010070 4ba309cd 60000000 <0fe00000> 7c0802a6 3c62fe7a 3863b640 ---[ end trace 11a2b65a92f8b66c ]--- ibmvfc 30000003: Send warning. Receive queue closed, will retry. Add registration/deregistration helpers that are called instead during connection resets to sanitize and reconfigure the queues. Link: https://lore.kernel.org/r/20220616191126.1281259-3-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Cc: stable@vger.kernel.org Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 79 ++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index d0eab5700dc5..4cd03fe73183 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *); static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *); static void ibmvfc_tgt_move_login(struct ibmvfc_target *); -static void ibmvfc_release_sub_crqs(struct ibmvfc_host *); -static void ibmvfc_init_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *); +static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *); static const char *unknown_error = "unknown error"; @@ -917,7 +917,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); unsigned long flags; - ibmvfc_release_sub_crqs(vhost); + ibmvfc_dereg_sub_crqs(vhost); /* Re-enable the CRQ */ do { @@ -936,7 +936,7 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); - ibmvfc_init_sub_crqs(vhost); + ibmvfc_reg_sub_crqs(vhost); return rc; } @@ -955,7 +955,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) struct vio_dev *vdev = to_vio_dev(vhost->dev); struct ibmvfc_queue *crq = &vhost->crq; - ibmvfc_release_sub_crqs(vhost); + ibmvfc_dereg_sub_crqs(vhost); /* Close the CRQ */ do { @@ -988,7 +988,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost) spin_unlock(vhost->crq.q_lock); spin_unlock_irqrestore(vhost->host->host_lock, flags); - ibmvfc_init_sub_crqs(vhost); + ibmvfc_reg_sub_crqs(vhost); return rc; } @@ -5757,9 +5757,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, ENTER; - if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) - return -ENOMEM; - rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE, &scrq->cookie, &scrq->hw_irq); @@ -5800,7 +5797,6 @@ irq_failed: rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie); } while (rtas_busy_delay(rc)); reg_failed: - ibmvfc_free_queue(vhost, scrq); LEAVE; return rc; } @@ -5826,12 +5822,50 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index) if (rc) dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc); - ibmvfc_free_queue(vhost, scrq); + /* Clean out the queue */ + memset(scrq->msgs.crq, 0, PAGE_SIZE); + scrq->cur = 0; + + LEAVE; +} + +static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost) +{ + int i, j; + + ENTER; + if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs) + return; + + for (i = 0; i < nr_scsi_hw_queues; i++) { + if (ibmvfc_register_scsi_channel(vhost, i)) { + for (j = i; j > 0; j--) + ibmvfc_deregister_scsi_channel(vhost, j - 1); + vhost->do_enquiry = 0; + return; + } + } + + LEAVE; +} + +static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost) +{ + int i; + + ENTER; + if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs) + return; + + for (i = 0; i < nr_scsi_hw_queues; i++) + ibmvfc_deregister_scsi_channel(vhost, i); + LEAVE; } static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) { + struct ibmvfc_queue *scrq; int i, j; ENTER; @@ -5847,30 +5881,41 @@ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost) } for (i = 0; i < nr_scsi_hw_queues; i++) { - if (ibmvfc_register_scsi_channel(vhost, i)) { - for (j = i; j > 0; j--) - ibmvfc_deregister_scsi_channel(vhost, j - 1); + scrq = &vhost->scsi_scrqs.scrqs[i]; + if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) { + for (j = i; j > 0; j--) { + scrq = &vhost->scsi_scrqs.scrqs[j - 1]; + ibmvfc_free_queue(vhost, scrq); + } kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; vhost->scsi_scrqs.active_queues = 0; vhost->do_enquiry = 0; - break; + vhost->mq_enabled = 0; + return; } } + ibmvfc_reg_sub_crqs(vhost); + LEAVE; } static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost) { + struct ibmvfc_queue *scrq; int i; ENTER; if (!vhost->scsi_scrqs.scrqs) return; - for (i = 0; i < nr_scsi_hw_queues; i++) - ibmvfc_deregister_scsi_channel(vhost, i); + ibmvfc_dereg_sub_crqs(vhost); + + for (i = 0; i < nr_scsi_hw_queues; i++) { + scrq = &vhost->scsi_scrqs.scrqs[i]; + ibmvfc_free_queue(vhost, scrq); + } kfree(vhost->scsi_scrqs.scrqs); vhost->scsi_scrqs.scrqs = NULL; From aeaadcde1a60138bceb65de3cdaeec78170b4459 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 16 Jun 2022 12:11:25 -0700 Subject: [PATCH 8/8] scsi: ibmvfc: Store vhost pointer during subcrq allocation Currently the back pointer from a queue to the vhost adapter isn't set until after subcrq interrupt registration. The value is available when a queue is first allocated and can/should be also set for primary and async queues as well as subcrqs. This fixes a crash observed during kexec/kdump on Power 9 with legacy XICS interrupt controller where a pending subcrq interrupt from the previous kernel can be replayed immediately upon IRQ registration resulting in dereference of a garbage backpointer in ibmvfc_interrupt_scsi(). Kernel attempted to read user page (58) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000058 Faulting instruction address: 0xc008000003216a08 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c008000003216a08] ibmvfc_interrupt_scsi+0x40/0xb0 [ibmvfc] LR [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270 Call Trace: [c000000047fa3d80] [c0000000123e6180] 0xc0000000123e6180 (unreliable) [c000000047fa3df0] [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270 [c000000047fa3ea0] [c000000008207d18] handle_irq_event+0x98/0x188 [c000000047fa3ef0] [c00000000820f564] handle_fasteoi_irq+0xc4/0x310 [c000000047fa3f40] [c000000008205c60] generic_handle_irq+0x50/0x80 [c000000047fa3f60] [c000000008015c40] __do_irq+0x70/0x1a0 [c000000047fa3f90] [c000000008016d7c] __do_IRQ+0x9c/0x130 [c000000014622f60] [0000000020000000] 0x20000000 [c000000014622ff0] [c000000008016e50] do_IRQ+0x40/0xa0 [c000000014623020] [c000000008017044] replay_soft_interrupts+0x194/0x2f0 [c000000014623210] [c0000000080172a8] arch_local_irq_restore+0x108/0x170 [c000000014623240] [c000000008eb1008] _raw_spin_unlock_irqrestore+0x58/0xb0 [c000000014623270] [c00000000820b12c] __setup_irq+0x49c/0x9f0 [c000000014623310] [c00000000820b7c0] request_threaded_irq+0x140/0x230 [c000000014623380] [c008000003212a50] ibmvfc_register_scsi_channel+0x1e8/0x2f0 [ibmvfc] [c000000014623450] [c008000003213d1c] ibmvfc_init_sub_crqs+0xc4/0x1f0 [ibmvfc] [c0000000146234d0] [c0080000032145a8] ibmvfc_reset_crq+0x150/0x210 [ibmvfc] [c000000014623550] [c0080000032147c8] ibmvfc_init_crq+0x160/0x280 [ibmvfc] [c0000000146235f0] [c00800000321a9cc] ibmvfc_probe+0x2a4/0x530 [ibmvfc] Link: https://lore.kernel.org/r/20220616191126.1281259-2-tyreld@linux.ibm.com Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels") Cc: stable@vger.kernel.org Reviewed-by: Brian King Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- drivers/scsi/ibmvscsi/ibmvfc.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 4cd03fe73183..00684e11976b 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -5682,6 +5682,8 @@ static int ibmvfc_alloc_queue(struct ibmvfc_host *vhost, queue->cur = 0; queue->fmt = fmt; queue->size = PAGE_SIZE / fmt_size; + + queue->vhost = vhost; return 0; } @@ -5787,7 +5789,6 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost, } scrq->hwq_id = index; - scrq->vhost = vhost; LEAVE; return 0; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 3718406e0988..c39a245f43d0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -789,6 +789,7 @@ struct ibmvfc_queue { spinlock_t _lock; spinlock_t *q_lock; + struct ibmvfc_host *vhost; struct ibmvfc_event_pool evt_pool; struct list_head sent; struct list_head free; @@ -797,7 +798,6 @@ struct ibmvfc_queue { union ibmvfc_iu cancel_rsp; /* Sub-CRQ fields */ - struct ibmvfc_host *vhost; unsigned long cookie; unsigned long vios_cookie; unsigned long hw_irq;