From 13c3aa76320072d995aadbe659d1b1b39899c3dc Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 30 Mar 2023 19:09:30 +0800 Subject: [PATCH] scsi: libsas: Abort all in-flight requests when device is gone commit 0e4b1791d9b192ac263a03707d876132eb0f8dab upstream. When a disk is removed with in-flight I/O, the application needs to wait for 30 seconds (depending on the timeout configuration) to hear back from the kernel. Xingui tried to fix this issue by aborting the ATA link for SATA devices[1], however this approach left the SAS devices unresolved. Try to fix this issue by aborting all in-flight requests when the device is gone. This is implemented by iterating over the tagset. [1] https://lore.kernel.org/lkml/234e04db-7539-07e4-a6b8-c6b05f78193d@opensource.wdc.com/T/ Cc: Xingui Yang Cc: John Garry Cc: Damien Le Moal Cc: Hannes Reinecke Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20230330110930.175539-1-yanaijie@huawei.com Reviewed-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Jianping Liu --- drivers/scsi/libsas/sas_discover.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 10975f3f7ff6..faec434e1b84 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -367,6 +367,33 @@ static void sas_destruct_ports(struct asd_sas_port *port) } } +static bool sas_abort_cmd(struct request *req, void *data, bool reserved) +{ + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + struct domain_device *dev = data; + + if (dev == cmd_to_domain_dev(cmd)) + blk_abort_request(req); + return true; +} + +static void sas_abort_device_scsi_cmds(struct domain_device *dev) +{ + struct sas_ha_struct *sas_ha = dev->port->ha; + struct Scsi_Host *shost = sas_ha->core.shost; + + if (dev_is_expander(dev->dev_type)) + return; + + /* + * For removed device with active IOs, the user space applications have + * to spend very long time waiting for the timeout. This is not + * necessary because a removed device will not return the IOs. + * Abort the inflight IOs here so that EH can be quickly kicked in. + */ + blk_mq_tagset_busy_iter(&shost->tag_set, sas_abort_cmd, dev); +} + void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) { if (!test_bit(SAS_DEV_DESTROY, &dev->state) && @@ -379,6 +406,8 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev) } if (!test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) { + if (test_bit(SAS_DEV_GONE, &dev->state)) + sas_abort_device_scsi_cmds(dev); sas_rphy_unlink(dev->rphy); list_move_tail(&dev->disco_list_node, &port->destroy_list); }