From 3748be12fc6ca8889b63cace216286a7c6bac3b2 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Fri, 17 Mar 2023 15:04:19 +0800 Subject: [PATCH 01/12] Revert "scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure" driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6NX2M CVE: NA ---------------------------------------------------------------------- In that commit, if the softreset fails upon certain conditions, just disable the PHY associated with the disk. The user needs to restore the PHY. SATA disks do not support simultaneous connection of multiple hosts. Therefore, when multiple controllers are connected to a SATA disk at the same time, the controller which is connected later failed to issue an ATA softreset to the SATA disk. As a result, the PHY associated with the disk is disabled and cannot be automatically recovered. Now that, we will not focus on the execution result of softreset. No matter whether the execution is successful or not, we will directly carry out I_T_nexus_reset. Fixes: c723ada86707 ("scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure") Signed-off-by: Yihang Li Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 29 +++++---------------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index b155ac800979..fb9b4fbac9ca 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1818,33 +1818,14 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) } hisi_sas_dereg_device(hisi_hba, device); - rc = hisi_sas_debug_I_T_nexus_reset(device); - if (rc == TMF_RESP_FUNC_COMPLETE && dev_is_sata(device)) { - struct sas_phy *local_phy; - + if (dev_is_sata(device)) { rc = hisi_sas_softreset_ata_disk(device); - switch (rc) { - case -ECOMM: - rc = -ENODEV; - break; - case TMF_RESP_FUNC_FAILED: - case -EMSGSIZE: - case -EIO: - local_phy = sas_get_local_phy(device); - rc = sas_phy_enable(local_phy, 0); - if (!rc) { - local_phy->enabled = 0; - dev_err(dev, "Disabled local phy of ATA disk %016llx due to softreset fail (%d)\n", - SAS_ADDR(device->sas_addr), rc); - rc = -ENODEV; - } - sas_put_local_phy(local_phy); - break; - default: - break; - } + if (rc == TMF_RESP_FUNC_FAILED) + dev_err(dev, "ata disk %016llx reset (%d)\n", + SAS_ADDR(device->sas_addr), rc); } + rc = hisi_sas_debug_I_T_nexus_reset(device); if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV)) hisi_sas_release_task(hisi_hba, device); From 84d208aac03cef8e1df57f272ee4011aadce949f Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 8 Jun 2023 11:04:48 +0800 Subject: [PATCH 02/12] scsi: hisi_sas: Add slave_destroy interface for v3 hw driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7BNF8 CVE: NA ---------------------------------------------------------------------- A WARNING is triggered when executing link reset of remote PHY and rmmod SAS driver simultaneously. Following is the WARNING log: WARNING: CPU: 61 PID: 21818 at drivers/base/core.c:1347 __device_links_no_driver+0xb4/0xc0 Call trace: __device_links_no_driver+0xb4/0xc0 device_links_driver_cleanup+0xb0/0xfc __device_release_driver+0x198/0x23c device_release_driver+0x38/0x50 bus_remove_device+0x130/0x140 device_del+0x184/0x434 __scsi_remove_device+0x118/0x150 scsi_remove_target+0x1bc/0x240 sas_rphy_remove+0x90/0x94 sas_rphy_delete+0x24/0x3c sas_destruct_devices+0x64/0xa0 [libsas] sas_revalidate_domain+0xe4/0x150 [libsas] process_one_work+0x1e0/0x46c worker_thread+0x15c/0x464 kthread+0x160/0x170 ret_from_fork+0x10/0x20 ---[ end trace 71e059eb58f85d4a ]--- During SAS phy up, link->status is set to DL_STATE_AVAILABLE in device_links_driver_bound, then this setting influences __device_links_no_driver() before driver rmmod and caused WARNING. So we add the slave_destroy interface, to make sure link is removed after flush workque. Fixes: 16fd4a7c59170 ("scsi: hisi_sas: Add device link between SCSI devices and hisi_hba") Signed-off-by: Qi Liu Signed-off-by: John Garry Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e914c0c13bb5..91d9e796d759 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2909,7 +2909,8 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) return 0; if (!device_link_add(&sdev->sdev_gendev, dev, - DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)) { + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME | + DL_FLAG_RPM_ACTIVE)) { if (pm_runtime_enabled(dev)) { dev_info(dev, "add device link failed, disable runtime PM for the host\n"); pm_runtime_disable(dev); @@ -2919,6 +2920,15 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) return 0; } +static void slave_destroy_v3_hw(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = dev_to_shost(&sdev->sdev_gendev); + struct hisi_hba *hisi_hba = shost_priv(shost); + struct device *dev = hisi_hba->dev; + + device_link_remove(&sdev->sdev_gendev, dev); +} + static struct attribute *host_v3_hw_attrs[] = { &dev_attr_phy_event_threshold.attr, &dev_attr_intr_conv_v3_hw.attr, @@ -3335,6 +3345,7 @@ static const struct scsi_host_template sht_v3_hw = { .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_target_reset_handler = sas_eh_target_reset_handler, .slave_alloc = hisi_sas_slave_alloc, + .slave_destroy = slave_destroy_v3_hw, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, #ifdef CONFIG_COMPAT From 713ec2834fb5ce917a2460ab6ee4269a25e04c12 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Thu, 8 Jun 2023 11:04:51 +0800 Subject: [PATCH 03/12] scsi: hisi_sas: Check usage count only when the runtime PM status is RPM_SUSPENDING driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7BNF8 CVE: NA ---------------------------------------------------------------------- Users can suspend the machine with 'echo disk > /sys/power/state', but the suspend will fail because the SAS controller cannot be suspended: [root@localhost ~]# echo freeze > /sys/power/state -bash: echo: write error: Device or resource busy [15104.142955] PM: suspend entry (s2idle) ... [15104.283465] hisi_sas_v3_hw 0000:32:04.0: entering suspend state [15104.283480] hisi_sas_v3_hw 0000:30:04.0: entering suspend state [15104.283500] hisi_sas_v3_hw 0000:32:04.0: PM suspend: host status cannot be suspended [15104.283508] hisi_sas_v3_hw 0000:30:04.0: PM suspend: host status cannot be suspended [15104.283516] hisi_sas_v3_hw 0000:32:04.0: PM: pci_pm_suspend(): suspend_v3_hw+0x0/0x210 [hisi_sas_v3_hw] returns -16 [15104.283527] hisi_sas_v3_hw 0000:32:04.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x1c0 returns -16 [15104.283524] hisi_sas_v3_hw 0000:30:04.0: PM: pci_pm_suspend(): suspend_v3_hw+0x0/0x210 [hisi_sas_v3_hw] returns -16 [15104.283533] hisi_sas_v3_hw 0000:32:04.0: PM: failed to suspend async: error -16 [15104.283536] hisi_sas_v3_hw 0000:30:04.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x1c0 returns -16 [15104.283542] hisi_sas_v3_hw 0000:30:04.0: PM: failed to suspend async: error -16 The problem is that when the ->runtime_suspend() callback suspend_v3_hw() is executing, the current runtime PM status is RPM_ACTIVE and the usage count of the controller is not 0, so return immediately. To fix it, Check the device usage count only when the runtime PM status is RPM_SUSPENDING. Signed-off-by: Yihang Li Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 91d9e796d759..cd65bd289d3f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5174,7 +5174,8 @@ static int _suspend_v3_hw(struct device *device) interrupt_disable_v3_hw(hisi_hba); #ifdef CONFIG_PM - if (atomic_read(&device->power.usage_count)) { + if ((device->power.runtime_status == RPM_SUSPENDING) && + atomic_read(&device->power.usage_count)) { dev_err(dev, "PM suspend: host status cannot be suspended\n"); rc = -EBUSY; goto err_out; From 0d73b0b9d861b6a879c5b55b9c3e65d813df0470 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 13 Sep 2023 10:15:26 +0800 Subject: [PATCH 04/12] scsi: hisi_sas: Directly call register snapshot instead of using workqueue Upstream commit:2ff07b5c6fe9173e7a7de3b23f300d71ad4d8fde Currently, register information dump is performed via workqueue, regardless of the trigger mode (automatic or manual). There is a delay in dumping register through workqueue, the exact register information at trigger time cannot be obtained. Call register snapshot directly instead of through a workqueue. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1694571327-78697-3-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas.h | 1 - drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 +++----------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 9e73e9cbbcfc..3d511c44c02d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -451,7 +451,6 @@ struct hisi_hba { const struct hisi_sas_hw *hw; /* Low level hw interface */ unsigned long sata_dev_bitmap[BITS_TO_LONGS(HISI_SAS_MAX_DEVICES)]; struct work_struct rst_work; - struct work_struct debugfs_work; u32 phy_state; u32 intr_coal_ticks; /* Time of interrupt coalesce in us */ u32 intr_coal_count; /* Interrupt count to coalesce */ diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index fb9b4fbac9ca..f5779c2ccaf5 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1942,8 +1942,11 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data; - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); + up(&hisi_hba->sem); + } if (task->task_state_flags & SAS_TASK_STATE_DONE) { pr_err("Internal abort: timeout %016llx\n", diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index cd65bd289d3f..8882cddeb8f5 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,6 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0"); -static void debugfs_work_handler_v3_hw(struct work_struct *work); static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) @@ -3399,7 +3398,6 @@ hisi_sas_shost_alloc_pci(struct pci_dev *pdev) hisi_hba = shost_priv(shost); INIT_WORK(&hisi_hba->rst_work, hisi_sas_rst_work_handler); - INIT_WORK(&hisi_hba->debugfs_work, debugfs_work_handler_v3_hw); hisi_hba->hw = &hisi_sas_v3_hw; hisi_hba->pci_dev = pdev; hisi_hba->dev = dev; @@ -3921,7 +3919,9 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, if (buf[0] != '1') return -EFAULT; - queue_work(hisi_hba->wq, &hisi_hba->debugfs_work); + down(&hisi_hba->sem); + debugfs_snapshot_regs_v3_hw(hisi_hba); + up(&hisi_hba->sem); return count; } @@ -4672,14 +4672,6 @@ static void debugfs_fifo_init_v3_hw(struct hisi_hba *hisi_hba) } } -static void debugfs_work_handler_v3_hw(struct work_struct *work) -{ - struct hisi_hba *hisi_hba = - container_of(work, struct hisi_hba, debugfs_work); - - debugfs_snapshot_regs_v3_hw(hisi_hba); -} - static void debugfs_release_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { struct device *dev = hisi_hba->dev; From 251bffee94aff73c4e5c5cb1e1b0b2e61bfffb44 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 13 Sep 2023 10:15:27 +0800 Subject: [PATCH 05/12] scsi: hisi_sas: Allocate DFX memory during dump trigger Upstream commit:63f0733d07ce60252e885602b39571ade0441015 Currently, if CONFIG_SCSI_HISI_SAS_DEBUGFS_DEFAULT_ENABLE is enabled, the memory space used by DFX is allocated during device initialization, which occupies a large number of memory resources. The memory usage before and after the driver is loaded is as follows: Memory usage before the driver is loaded: $ free -m total used free shared buff/cache available Mem: 867352 2578 864037 11 735 861681 Swap: 4095 0 4095 Memory usage after the driver which include 4 HBAs is loaded: $ insmod hisi_sas_v3_hw.ko $ free -m total used free shared buff/cache available Mem: 867352 4760 861848 11 743 859495 Swap: 4095 0 4095 The driver with 4 HBAs connected will allocate about 110 MB of memory without enabling debugfs. Therefore, to avoid wasting memory resources, DFX memory is allocated during dump triggering. The dump may fail due to memory allocation failure. After this change, each dump costs about 10 MB of memory, and each dump lasts about 100 ms. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1694571327-78697-4-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas.h | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 93 +++++++++++++------------- 2 files changed, 46 insertions(+), 49 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index 3d511c44c02d..1e4550156b73 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -343,7 +343,7 @@ struct hisi_sas_hw { u8 reg_index, u8 reg_count, u8 *write_data); void (*wait_cmds_complete_timeout)(struct hisi_hba *hisi_hba, int delay_ms, int timeout_ms); - void (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); + int (*debugfs_snapshot_regs)(struct hisi_hba *hisi_hba); int complete_hdr_size; const struct scsi_host_template *sht; }; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 8882cddeb8f5..a16b1ff3bacd 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -558,7 +558,7 @@ static int experimental_iopoll_q_cnt; module_param(experimental_iopoll_q_cnt, int, 0444); MODULE_PARM_DESC(experimental_iopoll_q_cnt, "number of queues to be used as poll mode, def=0"); -static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba); static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off) { @@ -3869,37 +3869,6 @@ static void debugfs_create_files_v3_hw(struct hisi_hba *hisi_hba) &debugfs_ras_v3_hw_fops); } -static void debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) -{ - int debugfs_dump_index = hisi_hba->debugfs_dump_index; - struct device *dev = hisi_hba->dev; - u64 timestamp = local_clock(); - - if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { - dev_warn(dev, "dump count exceeded!\n"); - return; - } - - do_div(timestamp, NSEC_PER_MSEC); - hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; - - debugfs_snapshot_prepare_v3_hw(hisi_hba); - - debugfs_snapshot_global_reg_v3_hw(hisi_hba); - debugfs_snapshot_port_reg_v3_hw(hisi_hba); - debugfs_snapshot_axi_reg_v3_hw(hisi_hba); - debugfs_snapshot_ras_reg_v3_hw(hisi_hba); - debugfs_snapshot_cq_reg_v3_hw(hisi_hba); - debugfs_snapshot_dq_reg_v3_hw(hisi_hba); - debugfs_snapshot_itct_reg_v3_hw(hisi_hba); - debugfs_snapshot_iost_reg_v3_hw(hisi_hba); - - debugfs_create_files_v3_hw(hisi_hba); - - debugfs_snapshot_restore_v3_hw(hisi_hba); - hisi_hba->debugfs_dump_index++; -} - static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -3907,9 +3876,6 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, struct hisi_hba *hisi_hba = file->f_inode->i_private; char buf[8]; - if (hisi_hba->debugfs_dump_index >= hisi_sas_debugfs_dump_count) - return -EFAULT; - if (count > 8) return -EFAULT; @@ -3920,7 +3886,10 @@ static ssize_t debugfs_trigger_dump_v3_hw_write(struct file *file, return -EFAULT; down(&hisi_hba->sem); - debugfs_snapshot_regs_v3_hw(hisi_hba); + if (debugfs_snapshot_regs_v3_hw(hisi_hba)) { + up(&hisi_hba->sem); + return -EFAULT; + } up(&hisi_hba->sem); return count; @@ -4706,7 +4675,7 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; - int p, c, d, r, i; + int p, c, d, r; size_t sz; for (r = 0; r < DEBUGFS_REGS_NUM; r++) { @@ -4786,11 +4755,48 @@ static int debugfs_alloc_v3_hw(struct hisi_hba *hisi_hba, int dump_index) return 0; fail: - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) - debugfs_release_v3_hw(hisi_hba, i); + debugfs_release_v3_hw(hisi_hba, dump_index); return -ENOMEM; } +static int debugfs_snapshot_regs_v3_hw(struct hisi_hba *hisi_hba) +{ + int debugfs_dump_index = hisi_hba->debugfs_dump_index; + struct device *dev = hisi_hba->dev; + u64 timestamp = local_clock(); + + if (debugfs_dump_index >= hisi_sas_debugfs_dump_count) { + dev_warn(dev, "dump count exceeded!\n"); + return -EINVAL; + } + + if (debugfs_alloc_v3_hw(hisi_hba, debugfs_dump_index)) { + dev_warn(dev, "failed to alloc memory\n"); + return -ENOMEM; + } + + do_div(timestamp, NSEC_PER_MSEC); + hisi_hba->debugfs_timestamp[debugfs_dump_index] = timestamp; + + debugfs_snapshot_prepare_v3_hw(hisi_hba); + + debugfs_snapshot_global_reg_v3_hw(hisi_hba); + debugfs_snapshot_port_reg_v3_hw(hisi_hba); + debugfs_snapshot_axi_reg_v3_hw(hisi_hba); + debugfs_snapshot_ras_reg_v3_hw(hisi_hba); + debugfs_snapshot_cq_reg_v3_hw(hisi_hba); + debugfs_snapshot_dq_reg_v3_hw(hisi_hba); + debugfs_snapshot_itct_reg_v3_hw(hisi_hba); + debugfs_snapshot_iost_reg_v3_hw(hisi_hba); + + debugfs_create_files_v3_hw(hisi_hba); + + debugfs_snapshot_restore_v3_hw(hisi_hba); + hisi_hba->debugfs_dump_index++; + + return 0; +} + static void debugfs_phy_down_cnt_init_v3_hw(struct hisi_hba *hisi_hba) { struct dentry *dir = debugfs_create_dir("phy_down_cnt", @@ -4877,7 +4883,6 @@ static void debugfs_exit_v3_hw(struct hisi_hba *hisi_hba) static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; - int i; hisi_hba->debugfs_dir = debugfs_create_dir(dev_name(dev), hisi_sas_debugfs_dir); @@ -4894,14 +4899,6 @@ static void debugfs_init_v3_hw(struct hisi_hba *hisi_hba) debugfs_phy_down_cnt_init_v3_hw(hisi_hba); debugfs_fifo_init_v3_hw(hisi_hba); - - for (i = 0; i < hisi_sas_debugfs_dump_count; i++) { - if (debugfs_alloc_v3_hw(hisi_hba, i)) { - debugfs_exit_v3_hw(hisi_hba); - dev_dbg(dev, "failed to init debugfs!\n"); - break; - } - } } static int From 33e74bc74c1d0e72adbf0bcbbabf881795d05bda Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 4 Dec 2023 17:49:06 +0800 Subject: [PATCH 06/12] scsi: hisi_sas: Remove redundant checks for automatic debugfs dump driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA ---------------------------------------------------------------------- In commit 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger"), the memory allocation time of the DFX is changed from device initialization to dump occurs, so .debugfs_itct is not a valid address and do not need to check. The parameter hisi_sas_debugfs_enable is enough to check whether automatic debugfs dump is triggered, so remove redunant checks. Fixes: 63f0733d07ce ("scsi: hisi_sas: Allocate DFX memory during dump trigger") Signed-off-by: Yihang Li Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index f5779c2ccaf5..d6685e4bebe1 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1573,7 +1573,7 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba) return -EPERM; } - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) + if (hisi_sas_debugfs_enable) hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); return 0; @@ -1942,7 +1942,7 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct hisi_sas_internal_abort_data *timeout = data; - if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) { + if (hisi_sas_debugfs_enable) { down(&hisi_hba->sem); hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); up(&hisi_hba->sem); From ef4be5c3d5423b6924b3444d402e7c13a11e46bf Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Mon, 4 Dec 2023 17:49:07 +0800 Subject: [PATCH 07/12] scsi: hisi_sas: Handle the NCQ error returned by D2H frame driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA ---------------------------------------------------------------------- We find that some disks use D2H frame instead of SDB frame to return NCQ error. Currently, only the I/O corresponding to the D2H frame is processed in this scenario, which does not meet the processing requirements of the NCQ error scenario. So we set dev_status to HISI_SAS_DEV_NCQ_ERR and abort all I/Os of the disk in this scenario. Signed-off-by: Xingui Yang Reviewed-by: Xiang Chen Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a16b1ff3bacd..d0dfe988271f 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task, case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP: if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) && (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) { - ts->stat = SAS_PROTO_RESPONSE; + if (task->ata_task.use_ncq) { + struct domain_device *device = task->dev; + struct hisi_sas_device *sas_dev = + device->lldd_dev; + sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR; + slot->abort = 1; + } else { + ts->stat = SAS_PROTO_RESPONSE; + } } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) { ts->residual = trans_tx_fail_type; ts->stat = SAS_DATA_UNDERRUN; From 101482ffcd19cccf5106a7654782d338de21d766 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 4 Dec 2023 17:49:13 +0800 Subject: [PATCH 08/12] scsi: hisi_sas: Fix the deadlock issue that occurs during automatic dump driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8IL6H CVE: NA ---------------------------------------------------------------------- If we issue a disable PHY command, the device attached with it will go offline, if a 2 bit ECC error occurs at the same time, a hung task may be found: [ 4613.652388] INFO: task kworker/u256:0:165233 blocked for more than 120 seconds. [ 4613.666297] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.674809] task:kworker/u256:0 state:D stack: 0 pid:165233 ppid: 2 flags:0x00000208 [ 4613.683959] Workqueue: 0000:74:02.0_disco_q sas_revalidate_domain [libsas] [ 4613.691518] Call trace: [ 4613.694678] __switch_to+0xf8/0x17c [ 4613.698872] __schedule+0x660/0xee0 [ 4613.703063] schedule+0xac/0x240 [ 4613.706994] schedule_timeout+0x500/0x610 [ 4613.711705] __down+0x128/0x36c [ 4613.715548] down+0x240/0x2d0 [ 4613.719221] hisi_sas_internal_abort_timeout+0x1bc/0x260 [hisi_sas_main] [ 4613.726618] sas_execute_internal_abort+0x144/0x310 [libsas] [ 4613.732976] sas_execute_internal_abort_dev+0x44/0x60 [libsas] [ 4613.739504] hisi_sas_internal_task_abort_dev.isra.0+0xbc/0x1b0 [hisi_sas_main] [ 4613.747499] hisi_sas_dev_gone+0x174/0x250 [hisi_sas_main] [ 4613.753682] sas_notify_lldd_dev_gone+0xec/0x2e0 [libsas] [ 4613.759781] sas_unregister_common_dev+0x4c/0x7a0 [libsas] [ 4613.765962] sas_destruct_devices+0xb8/0x120 [libsas] [ 4613.771709] sas_do_revalidate_domain.constprop.0+0x1b8/0x31c [libsas] [ 4613.778930] sas_revalidate_domain+0x60/0xa4 [libsas] [ 4613.784716] process_one_work+0x248/0x950 [ 4613.789424] worker_thread+0x318/0x934 [ 4613.793878] kthread+0x190/0x200 [ 4613.797810] ret_from_fork+0x10/0x18 [ 4613.802121] INFO: task kworker/u256:4:316722 blocked for more than 120 seconds. [ 4613.816026] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.824538] task:kworker/u256:4 state:D stack: 0 pid:316722 ppid: 2 flags:0x00000208 [ 4613.833670] Workqueue: 0000:74:02.0 hisi_sas_rst_work_handler [hisi_sas_main] [ 4613.841491] Call trace: [ 4613.844647] __switch_to+0xf8/0x17c [ 4613.848852] __schedule+0x660/0xee0 [ 4613.853052] schedule+0xac/0x240 [ 4613.856984] schedule_timeout+0x500/0x610 [ 4613.861695] __down+0x128/0x36c [ 4613.865542] down+0x240/0x2d0 [ 4613.869216] hisi_sas_controller_prereset+0x58/0x1fc [hisi_sas_main] [ 4613.876324] hisi_sas_rst_work_handler+0x40/0x8c [hisi_sas_main] [ 4613.883019] process_one_work+0x248/0x950 [ 4613.887732] worker_thread+0x318/0x934 [ 4613.892204] kthread+0x190/0x200 [ 4613.896118] ret_from_fork+0x10/0x18 [ 4613.900423] INFO: task kworker/u256:1:348985 blocked for more than 121 seconds. [ 4613.914341] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 4613.922852] task:kworker/u256:1 state:D stack: 0 pid:348985 ppid: 2 flags:0x00000208 [ 4613.931984] Workqueue: 0000:74:02.0_event_q sas_port_event_worker [libsas] [ 4613.939549] Call trace: [ 4613.942702] __switch_to+0xf8/0x17c [ 4613.946892] __schedule+0x660/0xee0 [ 4613.951083] schedule+0xac/0x240 [ 4613.955015] schedule_timeout+0x500/0x610 [ 4613.959725] wait_for_common+0x200/0x610 [ 4613.964349] wait_for_completion+0x3c/0x5c [ 4613.969146] flush_workqueue+0x198/0x790 [ 4613.973776] sas_porte_broadcast_rcvd+0x1e8/0x320 [libsas] [ 4613.979960] sas_port_event_worker+0x54/0xa0 [libsas] [ 4613.985708] process_one_work+0x248/0x950 [ 4613.990420] worker_thread+0x318/0x934 [ 4613.994868] kthread+0x190/0x200 [ 4613.998800] ret_from_fork+0x10/0x18 This is because when the device goes offline, we obtain the hisi_hba semaphore and send the ABORT_DEV command to the device. However, the internal abort timed out due to the 2 bit ECC error and triggers automatic dump. In addition, because the hisi_hba semaphore has been obtained, the dump cannot be executed and the controller cannot be reset. Therefore, the deadlocks occur on the following circular dependencies: hisi_sas_dev_gone() -> down() -> hisi_sas_internal_task_abort_dev() -> ... -> hisi_sas_internal_abort_timeout() -> down(). The deadlock is triggered only when the timeout occurs during device goes offline. To fix this issue, use .rst_ha_timeout to distinguish the scenario where a device goes offline from other scenarios. Fixes: 2ff07b5c6fe9 ("scsi: hisi_sas: Directly call register snapshot instead of using workqueue") Signed-off-by: Yihang Li Reviewed-by: Xiang Chen Signed-off-by: xiabing Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d6685e4bebe1..8fd4bb1d8ba3 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1943,9 +1943,18 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task, struct hisi_sas_internal_abort_data *timeout = data; if (hisi_sas_debugfs_enable) { - down(&hisi_hba->sem); + /* + * If timeout occurs in device gone scenario, to avoid + * circular dependency like: + * hisi_sas_dev_gone() -> down() -> ... -> + * hisi_sas_internal_abort_timeout() -> down(). + */ + if (!timeout->rst_ha_timeout) + down(&hisi_hba->sem); + hisi_hba->hw->debugfs_snapshot_regs(hisi_hba); - up(&hisi_hba->sem); + if (!timeout->rst_ha_timeout) + up(&hisi_hba->sem); } if (task->task_state_flags & SAS_TASK_STATE_DONE) { From 18f56f8104c0da983badf3b35aa3cfd463dee0ce Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Thu, 28 Mar 2024 17:06:26 +0800 Subject: [PATCH 09/12] scsi: libsas: Allocation SMP request is aligned to ARCH_DMA_MINALIGN This series [1] reducing the kmalloc() minimum alignment on arm64 to 8 (from 128). In libsas, this will cause SMP requests to be 8-byte-aligned through kmalloc() allocation. However, for the hisi_sas hardware, all commands address must be 16-byte-aligned. Otherwise, the commands fail to be executed. ARCH_DMA_MINALIGN represents the minimum (static) alignment for safe DMA operations, so use ARCH_DMA_MINALIGN as the alignment for SMP request. Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com [1] Signed-off-by: Yihang Li Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Jason Yan Signed-off-by: chenyi --- drivers/scsi/libsas/sas_expander.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 5c261005b74e..f6e6db8b8aba 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size, static inline void *alloc_smp_req(int size) { - u8 *p = kzalloc(size, GFP_KERNEL); + u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL); if (p) p[0] = SMP_REQUEST; return p; From 6a038864099a9b73fcd90ac244b1e2a95d003cd7 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 10 Apr 2024 09:56:30 +0800 Subject: [PATCH 10/12] scsi: hisi_sas: Check whether debugfs is enabled before removing or releasing it mainline inclusion from mainline-v6.9-rc1 commit 69097a631c034451a75ca7cb6025460ba3a08f80 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=69097a631c034451a75ca7cb6025460ba3a08f80 ---------------------------------------------------------------------- hisi_sas debugfs remove should be executed only when debugfs is enabled. Check whether debugfs is enabled and then remove it only if enabled. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1705904747-62186-4-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 3 ++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 8fd4bb1d8ba3..9c9b45aa5791 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2607,7 +2607,8 @@ static __exit void hisi_sas_exit(void) { sas_release_transport(hisi_sas_stt); - debugfs_remove(hisi_sas_debugfs_dir); + if (hisi_sas_debugfs_enable) + debugfs_remove(hisi_sas_debugfs_dir); } module_init(hisi_sas_init); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index d0dfe988271f..de0176b56911 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5040,7 +5040,8 @@ err_out_unregister_ha: err_out_remove_host: scsi_remove_host(shost); err_out_undo_debugfs: - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); err_out_free_host: hisi_sas_free(hisi_hba); scsi_host_put(shost); @@ -5080,7 +5081,9 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) hisi_sas_v3_destroy_irqs(pdev, hisi_hba); hisi_sas_free(hisi_hba); - debugfs_exit_v3_hw(hisi_hba); + if (hisi_sas_debugfs_enable) + debugfs_exit_v3_hw(hisi_hba); + scsi_host_put(shost); } From 391f2b4c725ac0ce2a45a0b22398ab2e797019fc Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Wed, 10 Apr 2024 09:56:31 +0800 Subject: [PATCH 11/12] scsi: hisi_sas: Remove hisi_hba->timer for v3 hw mainline inclusion from mainline-v6.9-rc1 commit f9242f166770b681d9f71341d96adc01c4da00ef category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f9242f166770b681d9f71341d96adc01c4da00ef ---------------------------------------------------------------------- hisi_hba->timer is not used for v3 hw but there are two places that some operations related to hisi_hba->timer are called by v3 hw: - Deleting the timer in function hisi_sas_v3_hw() which is only for v3 hw; - Deleting the timer in function hisi_sas_controller_reset_prepare() which is common for v1/v2/v3 hw. We can remove the timer in the first case, but for the second scenario we need to remove it only for v3 hw, so check hw->sht which is NULL only for v3 hw before deleting hisi_hba->timer. Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1705904747-62186-5-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Bing Xia Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 7 ++++++- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 9c9b45aa5791..03f38bb34104 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1507,7 +1507,12 @@ void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba) scsi_block_requests(shost); hisi_hba->hw->wait_cmds_complete_timeout(hisi_hba, 100, 5000); - del_timer_sync(&hisi_hba->timer); + /* + * hisi_hba->timer is only used for v1/v2 hw, and check hw->sht + * which is also only used for v1/v2 hw to skip it for v3 hw + */ + if (hisi_hba->hw->sht) + del_timer_sync(&hisi_hba->timer); set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags); } diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index de0176b56911..3ce4b947a93d 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -5073,7 +5073,6 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev) struct Scsi_Host *shost = sha->shost; pm_runtime_get_noresume(dev); - del_timer_sync(&hisi_hba->timer); sas_unregister_ha(sha); flush_workqueue(hisi_hba->wq); From 6b2bfc75b636378054c6554dc3c61e6005d43b40 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 10 Apr 2024 09:56:38 +0800 Subject: [PATCH 12/12] scsi: hisi_sas: Modify the deadline for ata_wait_after_reset() driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I96KNQ CVE: NA ---------------------------------------------------------------------- We found that the second parameter of function ata_wait_after_reset() is incorrectly used. We call smp_ata_check_ready_type() to poll the device type until the 30s timeout, so the correct deadline should be (jiffies + 30000). Fixes: 3c2673a09cf1 ("scsi: hisi_sas: Fix SATA devices missing issue during I_T nexus reset") Signed-off-by: xiabing Signed-off-by: Yihang Li Reviewed-by: Xiang Chen Signed-off-by: Bing Xia Signed-off-by: chenyi --- drivers/scsi/hisi_sas/hisi_sas_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 03f38bb34104..8814ae45947c 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1796,8 +1796,10 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device) if (dev_is_sata(device)) { struct ata_link *link = &device->sata_dev.ap->link; + unsigned long deadline = ata_deadline(jiffies, + HISI_SAS_WAIT_PHYUP_TIMEOUT / HZ * 1000); - rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT, + rc = ata_wait_after_reset(link, deadline, smp_ata_check_ready_type); } else { msleep(2000);