From 7d21fcfb409500dc9b114567f0ef8d30b3190dee Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 11 Nov 2022 10:44:49 +0900 Subject: [PATCH 1/5] scsi: mpi3mr: Suppress command reply debug prints After it receives command reply, mpi3mr driver checks command result. If the result is not zero, it prints out command information. This debug information is confusing since they are printed even when the non-zero result is expected. "Power-on or device reset occurred" is printed for Test Unit Ready command at drive detection. Inquiry failure for unsupported VPD page header is also printed. They are harmless but look like failures. To avoid the confusion, print the command reply debug information only when the module parameter logging_level has value MPI3_DEBUG_SCSI_ERROR= 64, in same manner as mpt3sas driver. Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221111014449.1649968-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index f77ee4051b00..3306de7170f6 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3265,7 +3265,8 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc, } if (scmd->result != (DID_OK << 16) && (scmd->cmnd[0] != ATA_12) && - (scmd->cmnd[0] != ATA_16)) { + (scmd->cmnd[0] != ATA_16) && + mrioc->logging_level & MPI3_DEBUG_SCSI_ERROR) { ioc_info(mrioc, "%s :scmd->result 0x%x\n", __func__, scmd->result); scsi_print_command(scmd); From bc68e428d4963af0201e92159629ab96948f0893 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Nov 2022 09:50:42 +0800 Subject: [PATCH 2/5] scsi: target: tcm_loop: Fix possible name leak in tcm_loop_setup_hba_bus() If device_register() fails in tcm_loop_setup_hba_bus(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). The 'tl_hba' will be freed in tcm_loop_release_adapter(), so it don't need goto error label in this case. Fixes: 3703b2c5d041 ("[SCSI] tcm_loop: Add multi-fabric Linux/SCSI LLD fabric module") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221115015042.3652261-1-yangyingliang@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/loopback/tcm_loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 4407b56aa6d1..139031ccb700 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -397,6 +397,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host ret = device_register(&tl_hba->dev); if (ret) { pr_err("device_register() failed for tl_hba->dev: %d\n", ret); + put_device(&tl_hba->dev); return -ENODEV; } @@ -1073,7 +1074,7 @@ check_len: */ ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt); if (ret) - goto out; + return ERR_PTR(ret); sh = tl_hba->sh; tcm_loop_hba_no_cnt++; From e208a1d795a08d1ac0398c79ad9c58106531bcc5 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 17 Nov 2022 08:44:21 +0000 Subject: [PATCH 3/5] scsi: scsi_debug: Fix possible UAF in sdebug_add_host_helper() If device_register() fails in sdebug_add_host_helper(), it will goto clean and sdbg_host will be freed, but sdbg_host->host_list will not be removed from sdebug_host_list, then list traversal may cause UAF. Fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221117084421.58918-1-yuancan@huawei.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 629853662b82..bebda917b138 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7323,8 +7323,12 @@ static int sdebug_add_host_helper(int per_host_idx) dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); - if (error) + if (error) { + spin_lock(&sdebug_host_list_lock); + list_del(&sdbg_host->host_list); + spin_unlock(&sdebug_host_list_lock); goto clean; + } ++sdebug_num_hosts; return 0; From 0954256e970ecf371b03a6c9af2cf91b9c4085ff Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Wed, 16 Nov 2022 11:50:37 +0100 Subject: [PATCH 4/5] scsi: zfcp: Fix double free of FSF request when qdio send fails We used to use the wrong type of integer in 'zfcp_fsf_req_send()' to cache the FSF request ID when sending a new FSF request. This is used in case the sending fails and we need to remove the request from our internal hash table again (so we don't keep an invalid reference and use it when we free the request again). In 'zfcp_fsf_req_send()' we used to cache the ID as 'int' (signed and 32 bit wide), but the rest of the zfcp code (and the firmware specification) handles the ID as 'unsigned long'/'u64' (unsigned and 64 bit wide [s390x ELF ABI]). For one this has the obvious problem that when the ID grows past 32 bit (this can happen reasonably fast) it is truncated to 32 bit when storing it in the cache variable and so doesn't match the original ID anymore. The second less obvious problem is that even when the original ID has not yet grown past 32 bit, as soon as the 32nd bit is set in the original ID (0x80000000 = 2'147'483'648) we will have a mismatch when we cast it back to 'unsigned long'. As the cached variable is of a signed type, the compiler will choose a sign-extending instruction to load the 32 bit variable into a 64 bit register (e.g.: 'lgf %r11,188(%r15)'). So once we pass the cached variable into 'zfcp_reqlist_find_rm()' to remove the request again all the leading zeros will be flipped to ones to extend the sign and won't match the original ID anymore (this has been observed in practice). If we can't successfully remove the request from the hash table again after 'zfcp_qdio_send()' fails (this happens regularly when zfcp cannot notify the adapter about new work because the adapter is already gone during e.g. a ChpID toggle) we will end up with a double free. We unconditionally free the request in the calling function when 'zfcp_fsf_req_send()' fails, but because the request is still in the hash table we end up with a stale memory reference, and once the zfcp adapter is either reset during recovery or shutdown we end up freeing the same memory twice. The resulting stack traces vary depending on the kernel and have no direct correlation to the place where the bug occurs. Here are three examples that have been seen in practice: list_del corruption. next->prev should be 00000001b9d13800, but was 00000000dead4ead. (next=00000001bd131a00) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:62! monitor event: 0040 ilc:2 [#1] PREEMPT SMP Modules linked in: ... CPU: 9 PID: 1617 Comm: zfcperp0.0.1740 Kdump: loaded Hardware name: ... Krnl PSW : 0704d00180000000 00000003cbeea1f8 (__list_del_entry_valid+0x98/0x140) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 00000000916d12f1 0000000080000000 000000000000006d 00000003cb665cd6 0000000000000001 0000000000000000 0000000000000000 00000000d28d21e8 00000000d3844000 00000380099efd28 00000001bd131a00 00000001b9d13800 00000000d3290100 0000000000000000 00000003cbeea1f4 00000380099efc70 Krnl Code: 00000003cbeea1e8: c020004f68a7 larl %r2,00000003cc8d7336 00000003cbeea1ee: c0e50027fd65 brasl %r14,00000003cc3e9cb8 #00000003cbeea1f4: af000000 mc 0,0 >00000003cbeea1f8: c02000920440 larl %r2,00000003cd12aa78 00000003cbeea1fe: c0e500289c25 brasl %r14,00000003cc3fda48 00000003cbeea204: b9040043 lgr %r4,%r3 00000003cbeea208: b9040051 lgr %r5,%r1 00000003cbeea20c: b9040032 lgr %r3,%r2 Call Trace: [<00000003cbeea1f8>] __list_del_entry_valid+0x98/0x140 ([<00000003cbeea1f4>] __list_del_entry_valid+0x94/0x140) [<000003ff7ff502fe>] zfcp_fsf_req_dismiss_all+0xde/0x150 [zfcp] [<000003ff7ff49cd0>] zfcp_erp_strategy_do_action+0x160/0x280 [zfcp] [<000003ff7ff4a22e>] zfcp_erp_strategy+0x21e/0xca0 [zfcp] [<000003ff7ff4ad34>] zfcp_erp_thread+0x84/0x1a0 [zfcp] [<00000003cb5eece8>] kthread+0x138/0x150 [<00000003cb557f3c>] __ret_from_fork+0x3c/0x60 [<00000003cc4172ea>] ret_from_fork+0xa/0x40 INFO: lockdep is turned off. Last Breaking-Event-Address: [<00000003cc3e9d04>] _printk+0x4c/0x58 Kernel panic - not syncing: Fatal exception: panic_on_oops or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6803 Fault in home space mode while using kernel ASCE. AS:0000000063b10007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 10 PID: 0 Comm: swapper/10 Kdump: loaded Hardware name: ... Krnl PSW : 0404d00180000000 000003ff7febaf8e (zfcp_fsf_reqid_check+0x86/0x158 [zfcp]) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 5a6f1cfa89c49ac3 00000000aff2c4c8 6b6b6b6b6b6b6b6b 00000000000002a8 0000000000000000 0000000000000055 0000000000000000 00000000a8515800 0700000000000000 00000000a6e14500 00000000aff2c000 000000008003c44c 000000008093c700 0000000000000010 00000380009ebba8 00000380009ebb48 Krnl Code: 000003ff7febaf7e: a7f4003d brc 15,000003ff7febaff8 000003ff7febaf82: e32020000004 lg %r2,0(%r2) #000003ff7febaf88: ec2100388064 cgrj %r2,%r1,8,000003ff7febaff8 >000003ff7febaf8e: e3b020100020 cg %r11,16(%r2) 000003ff7febaf94: a774fff7 brc 7,000003ff7febaf82 000003ff7febaf98: ec280030007c cgij %r2,0,8,000003ff7febaff8 000003ff7febaf9e: e31020080004 lg %r1,8(%r2) 000003ff7febafa4: e33020000004 lg %r3,0(%r2) Call Trace: [<000003ff7febaf8e>] zfcp_fsf_reqid_check+0x86/0x158 [zfcp] [<000003ff7febbdbc>] zfcp_qdio_int_resp+0x6c/0x170 [zfcp] [<000003ff7febbf90>] zfcp_qdio_irq_tasklet+0xd0/0x108 [zfcp] [<0000000061d90a04>] tasklet_action_common.constprop.0+0xdc/0x128 [<000000006292f300>] __do_softirq+0x130/0x3c0 [<0000000061d906c6>] irq_exit_rcu+0xfe/0x118 [<000000006291e818>] do_io_irq+0xc8/0x168 [<000000006292d516>] io_int_handler+0xd6/0x110 [<000000006292d596>] psw_idle_exit+0x0/0xa ([<0000000061d3be50>] arch_cpu_idle+0x40/0xd0) [<000000006292ceea>] default_idle_call+0x52/0xf8 [<0000000061de4fa4>] do_idle+0xd4/0x168 [<0000000061de51fe>] cpu_startup_entry+0x36/0x40 [<0000000061d4faac>] smp_start_secondary+0x12c/0x138 [<000000006292d88e>] restart_int_handler+0x6e/0x90 Last Breaking-Event-Address: [<000003ff7febaf94>] zfcp_fsf_reqid_check+0x8c/0x158 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 523b05d3ae76a000 TEID: 523b05d3ae76a803 Fault in home space mode while using kernel ASCE. AS:0000000077c40007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 3 PID: 453 Comm: kworker/3:1H Kdump: loaded Hardware name: ... Workqueue: kblockd blk_mq_run_work_fn Krnl PSW : 0404d00180000000 0000000076fc0312 (__kmalloc+0xd2/0x398) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 523b05d3ae76abf6 0000000000000000 0000000000092a20 0000000000000002 00000007e49b5cc0 00000007eda8f000 0000000000092a20 00000007eda8f000 00000003b02856b9 00000000000000a8 523b05d3ae76abf6 00000007dd662000 00000007eda8f000 0000000076fc02b2 000003e0037637a0 Krnl Code: 0000000076fc0302: c004000000d4 brcl 0,76fc04aa 0000000076fc0308: b904001b lgr %r1,%r11 #0000000076fc030c: e3106020001a algf %r1,32(%r6) >0000000076fc0312: e31010000082 xg %r1,0(%r1) 0000000076fc0318: b9040001 lgr %r0,%r1 0000000076fc031c: e30061700082 xg %r0,368(%r6) 0000000076fc0322: ec59000100d9 aghik %r5,%r9,1 0000000076fc0328: e34003b80004 lg %r4,952 Call Trace: [<0000000076fc0312>] __kmalloc+0xd2/0x398 [<0000000076f318f2>] mempool_alloc+0x72/0x1f8 [<000003ff8027c5f8>] zfcp_fsf_req_create.isra.7+0x40/0x268 [zfcp] [<000003ff8027f1bc>] zfcp_fsf_fcp_cmnd+0xac/0x3f0 [zfcp] [<000003ff80280f1a>] zfcp_scsi_queuecommand+0x122/0x1d0 [zfcp] [<000003ff800b4218>] scsi_queue_rq+0x778/0xa10 [scsi_mod] [<00000000771782a0>] __blk_mq_try_issue_directly+0x130/0x208 [<000000007717a124>] blk_mq_request_issue_directly+0x4c/0xa8 [<000003ff801302e2>] dm_mq_queue_rq+0x2ea/0x468 [dm_mod] [<0000000077178c12>] blk_mq_dispatch_rq_list+0x33a/0x818 [<000000007717f064>] __blk_mq_do_dispatch_sched+0x284/0x2f0 [<000000007717f44c>] __blk_mq_sched_dispatch_requests+0x1c4/0x218 [<000000007717fa7a>] blk_mq_sched_dispatch_requests+0x52/0x90 [<0000000077176d74>] __blk_mq_run_hw_queue+0x9c/0xc0 [<0000000076da6d74>] process_one_work+0x274/0x4d0 [<0000000076da7018>] worker_thread+0x48/0x560 [<0000000076daef18>] kthread+0x140/0x160 [<000000007751d144>] ret_from_fork+0x28/0x30 Last Breaking-Event-Address: [<0000000076fc0474>] __kmalloc+0x234/0x398 Kernel panic - not syncing: Fatal exception: panic_on_oops To fix this, simply change the type of the cache variable to 'unsigned long', like the rest of zfcp and also the argument for 'zfcp_reqlist_find_rm()'. This prevents truncation and wrong sign extension and so can successfully remove the request from the hash table. Fixes: e60a6d69f1f8 ("[SCSI] zfcp: Remove function zfcp_reqlist_find_safe") Cc: #v2.6.34+ Signed-off-by: Benjamin Block Link: https://lore.kernel.org/r/979f6e6019d15f91ba56182f1aaf68d61bf37fc6.1668595505.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 19223b075568..ab3ea529cca7 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; - int req_id = req->req_id; + unsigned long req_id = req->req_id; zfcp_reqlist_add(adapter->req_list, req); From f014165faa7b953b81dcbf18835936e5f8d01f2a Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Thu, 10 Nov 2022 03:37:29 +0000 Subject: [PATCH 5/5] scsi: iscsi: Fix possible memory leak when device_register() failed If device_register() returns error, the name allocated by the dev_set_name() need be freed. As described in the comment of device_register(), we should use put_device() to give up the reference in the error path. Fix this by calling put_device(), the name will be freed in the kobject_cleanup(), and this patch modified resources will be released by calling the corresponding callback function in the device_release(). Signed-off-by: Zhou Guanghui Link: https://lore.kernel.org/r/20221110033729.1555-1-zhouguanghui1@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 31 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index cd3db9684e52..f473c002fa4d 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -231,7 +231,7 @@ iscsi_create_endpoint(int dd_size) dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) - goto free_id; + goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) @@ -245,10 +245,12 @@ unregister_dev: device_unregister(&ep->dev); return NULL; -free_id: +put_dev: mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, id); mutex_unlock(&iscsi_ep_idr_mutex); + put_device(&ep->dev); + return NULL; free_ep: kfree(ep); return NULL; @@ -766,7 +768,7 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, err = device_register(&iface->dev); if (err) - goto free_iface; + goto put_dev; err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); if (err) @@ -780,9 +782,8 @@ unreg_iface: device_unregister(&iface->dev); return NULL; -free_iface: - put_device(iface->dev.parent); - kfree(iface); +put_dev: + put_device(&iface->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_iface); @@ -1251,15 +1252,15 @@ iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, err = device_register(&fnode_sess->dev); if (err) - goto free_fnode_sess; + goto put_dev; if (dd_size) fnode_sess->dd_data = &fnode_sess[1]; return fnode_sess; -free_fnode_sess: - kfree(fnode_sess); +put_dev: + put_device(&fnode_sess->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); @@ -1299,15 +1300,15 @@ iscsi_create_flashnode_conn(struct Scsi_Host *shost, err = device_register(&fnode_conn->dev); if (err) - goto free_fnode_conn; + goto put_dev; if (dd_size) fnode_conn->dd_data = &fnode_conn[1]; return fnode_conn; -free_fnode_conn: - kfree(fnode_conn); +put_dev: + put_device(&fnode_conn->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); @@ -4815,7 +4816,7 @@ iscsi_register_transport(struct iscsi_transport *tt) dev_set_name(&priv->dev, "%s", tt->name); err = device_register(&priv->dev); if (err) - goto free_priv; + goto put_dev; err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); if (err) @@ -4850,8 +4851,8 @@ iscsi_register_transport(struct iscsi_transport *tt) unregister_dev: device_unregister(&priv->dev); return NULL; -free_priv: - kfree(priv); +put_dev: + put_device(&priv->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_register_transport);