From 74fd1b8c4419ab9942ccf4e378adf3bd4a86dd69 Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Mon, 2 Oct 2023 13:55:51 +0200 Subject: [PATCH 001/299] sched/rt: Disallow writing invalid values to sched_rt_period_us commit 079be8fc630943d9fc70a97807feb73d169ee3fc upstream. The validation of the value written to sched_rt_period_us was broken because: - the sysclt_sched_rt_period is declared as unsigned int - parsed by proc_do_intvec() - the range is asserted after the value parsed by proc_do_intvec() Because of this negative values written to the file were written into a unsigned integer that were later on interpreted as large positive integers which did passed the check: if (sysclt_sched_rt_period <= 0) return EINVAL; This commit fixes the parsing by setting explicit range for both perid_us and runtime_us into the sched_rt_sysctls table and processes the values with proc_dointvec_minmax() instead. Alternatively if we wanted to use full range of unsigned int for the period value we would have to split the proc_handler and use proc_douintvec() for it however even the Documentation/scheduller/sched-rt-group.rst describes the range as 1 to INT_MAX. As far as I can tell the only problem this causes is that the sysctl file allows writing negative values which when read back may confuse userspace. There is also a LTP test being submitted for these sysctl files at: http://patchwork.ozlabs.org/project/ltp/patch/20230901144433.2526-1-chrubis@suse.cz/ Signed-off-by: Cyril Hrubis Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231002115553.3007-2-chrubis@suse.cz Cc: Mahmoud Adam Signed-off-by: Petr Vorel Signed-off-by: Greg Kroah-Hartman --- kernel/sched/rt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 904dd8534597..4ac36eb4cdee 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -37,6 +37,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rt_runtime_us", @@ -44,6 +46,8 @@ static struct ctl_table sched_rt_sysctls[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rt_handler, + .extra1 = SYSCTL_NEG_ONE, + .extra2 = SYSCTL_INT_MAX, }, { .procname = "sched_rr_timeslice_ms", @@ -2989,9 +2993,6 @@ static int sched_rt_global_constraints(void) #ifdef CONFIG_SYSCTL static int sched_rt_global_validate(void) { - if (sysctl_sched_rt_period <= 0) - return -EINVAL; - if ((sysctl_sched_rt_runtime != RUNTIME_INF) && ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || ((u64)sysctl_sched_rt_runtime * @@ -3022,7 +3023,7 @@ static int sched_rt_handler(struct ctl_table *table, int write, void *buffer, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_validate(); From 6ea2f3b9b9f6061b21efad834b2bbbe4429bc6df Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:40:37 +0300 Subject: [PATCH 002/299] PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 upstream. The "msg_addr" variable is u64. However, the "aligned_offset" is an unsigned int. This means that when the code does: msg_addr &= ~aligned_offset; it will unintentionally zero out the high 32 bits. Use ALIGN_DOWN() to do the alignment instead. Fixes: 2217fffcd63f ("PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support") Link: https://lore.kernel.org/r/af59c7ad-ab93-40f7-ad4a-7ac0b14d37f5@moroto.mountain Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Cc: Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 8d79dd0e1d60..9d1f259fe357 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I */ +#include #include #include @@ -598,7 +599,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) From 5babeec518c24d42d834228c3f115e66897667ae Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 12 Jan 2024 19:37:29 +0100 Subject: [PATCH 003/299] riscv/efistub: Ensure GP-relative addressing is not used commit afb2a4fb84555ef9e61061f6ea63ed7087b295d5 upstream. The cflags for the RISC-V efistub were missing -mno-relax, thus were under the risk that the compiler could use GP-relative addressing. That happened for _edata with binutils-2.41 and kernel 6.1, causing the relocation to fail due to an invalid kernel_size in handle_kernel_image. It was not yet observed with newer versions, but that may just be luck. Cc: Signed-off-by: Jan Kiszka Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index ef4c12f0877b..a0f1569b790d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic +cflags-$(CONFIG_RISCV) += -fpic -mno-relax cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt From 43ee59fa01c8a1e8ce46b9248c678ba6f739681a Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 29 Oct 2023 18:07:04 +0100 Subject: [PATCH 004/299] dmaengine: apple-admac: Keep upper bits of REG_BUS_WIDTH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 306f5df81fcc89b462fbeb9dbe26d9a8ad7c7582 ] For RX channels, REG_BUS_WIDTH seems to default to a value of 0xf00, and macOS preserves the upper bits when setting the configuration in the lower ones. If we reset the upper bits to 0, this causes framing errors on suspend/resume (the data stream "tears" and channels get swapped around). Keeping the upper bits untouched, like the macOS driver does, fixes this issue. Signed-off-by: Hector Martin Reviewed-by: Martin Povišer Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20231029170704.82238-1-povik+lin@cutebit.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/apple-admac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c index 3af795635c5c..356298e4dd22 100644 --- a/drivers/dma/apple-admac.c +++ b/drivers/dma/apple-admac.c @@ -57,6 +57,8 @@ #define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200) +#define BUS_WIDTH_WORD_SIZE GENMASK(3, 0) +#define BUS_WIDTH_FRAME_SIZE GENMASK(7, 4) #define BUS_WIDTH_8BIT 0x00 #define BUS_WIDTH_16BIT 0x01 #define BUS_WIDTH_32BIT 0x02 @@ -740,7 +742,8 @@ static int admac_device_config(struct dma_chan *chan, struct admac_data *ad = adchan->host; bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV; int wordsize = 0; - u32 bus_width = 0; + u32 bus_width = readl_relaxed(ad->base + REG_BUS_WIDTH(adchan->no)) & + ~(BUS_WIDTH_WORD_SIZE | BUS_WIDTH_FRAME_SIZE); switch (is_tx ? config->dst_addr_width : config->src_addr_width) { case DMA_SLAVE_BUSWIDTH_1_BYTE: From ce10905116e6d204384e9ad0ae95f6b89124df8f Mon Sep 17 00:00:00 2001 From: David Strahan Date: Tue, 19 Dec 2023 13:36:50 -0600 Subject: [PATCH 005/299] scsi: smartpqi: Add new controller PCI IDs [ Upstream commit c6d5aa44eaf6d119f9ceb3bfc7d22405ac04232a ] All PCI ID entries in Hex. Add PCI IDs for Cisco controllers: VID / DID / SVID / SDID ---- ---- ---- ---- Cisco 24G TriMode M1 RAID 4GB FBWC 32D 9005 / 028f / 1137 / 02f8 Cisco 24G TriMode M1 RAID 4GB FBWC 16D 9005 / 028f / 1137 / 02f9 Cisco 24G TriMode M1 HBA 16D 9005 / 028f / 1137 / 02fa Add PCI IDs for CloudNine controllers: VID / DID / SVID / SDID ---- ---- ---- ---- SmartRAID P7604N-16i 9005 / 028f / 1f51 / 100e SmartRAID P7604N-8i 9005 / 028f / 1f51 / 100f SmartRAID P7504N-16i 9005 / 028f / 1f51 / 1010 SmartRAID P7504N-8i 9005 / 028f / 1f51 / 1011 SmartRAID P7504N-8i 9005 / 028f / 1f51 / 1043 SmartHBA P6500-8i 9005 / 028f / 1f51 / 1044 SmartRAID P7504-8i 9005 / 028f / 1f51 / 1045 Reviewed-by: Murthy Bhat Reviewed-by: Mahesh Rajashekhara Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: David Strahan Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20231219193653.277553-2-don.brace@microchip.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 9a58df9312fa..d56201120087 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -10142,6 +10142,18 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1014, 0x0718) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f8) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02f9) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1137, 0x02fa) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1e93, 0x1000) @@ -10198,6 +10210,34 @@ static const struct pci_device_id pqi_pci_id_table[] = { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, 0x1f51, 0x100a) }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100e) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x100f) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1010) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1011) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1043) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1044) + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + 0x1f51, 0x1045) + }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, PCI_ANY_ID, PCI_ANY_ID) From df75b8ef712237061f599a4c7ecf2dcca898824b Mon Sep 17 00:00:00 2001 From: Mahesh Rajashekhara Date: Tue, 19 Dec 2023 13:36:51 -0600 Subject: [PATCH 006/299] scsi: smartpqi: Fix logical volume rescan race condition [ Upstream commit fb4cece17b4583f55b34a8538e27a4adc833c9d4 ] Correct rescan flag race condition. Multiple conditions are being evaluated before notifying OS to do a rescan. Driver will skip rescanning the device if any one of the following conditions are met: - Devices that have not yet been added to the OS or devices that have been removed. - Devices which are already marked for removal or in the phase of removal. Under very rare conditions, after logical volume size expansion, the OS still sees the size of the logical volume which was before expansion. The rescan flag in the driver is used to signal the need for a logical volume rescan. A race condition can occur in the driver, and it leads to one thread overwriting the flag inadvertently. As a result, driver is not notifying the OS SML to rescan the logical volume. Move device->rescan update into new function pqi_mark_volumes_for_rescan() and protect with a spin lock. Move check for device->rescan into new function pqi_volume_rescan_needed() and protect function call with a spin_lock. Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Co-developed-by: Murthy Bhat Signed-off-by: Murthy Bhat Signed-off-by: Mahesh Rajashekhara Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20231219193653.277553-3-don.brace@microchip.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi.h | 1 - drivers/scsi/smartpqi/smartpqi_init.c | 43 ++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index 041940183516..cdedc271857a 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1347,7 +1347,6 @@ struct pqi_ctrl_info { bool controller_online; bool block_requests; bool scan_blocked; - u8 logical_volume_rescan_needed : 1; u8 inbound_spanning_supported : 1; u8 outbound_spanning_supported : 1; u8 pqi_mode_enabled : 1; diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index d56201120087..081bb2c09806 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -2093,8 +2093,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, if (existing_device->devtype == TYPE_DISK) { existing_device->raid_level = new_device->raid_level; existing_device->volume_status = new_device->volume_status; - if (ctrl_info->logical_volume_rescan_needed) - existing_device->rescan = true; memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); if (!pqi_raid_maps_equal(existing_device->raid_map, new_device->raid_map)) { kfree(existing_device->raid_map); @@ -2164,6 +2162,20 @@ static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device) INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker); } +static inline bool pqi_volume_rescan_needed(struct pqi_scsi_dev *device) +{ + if (pqi_device_in_remove(device)) + return false; + + if (device->sdev == NULL) + return false; + + if (!scsi_device_online(device->sdev)) + return false; + + return device->rescan; +} + static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices) { @@ -2284,9 +2296,13 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, if (device->sdev && device->queue_depth != device->advertised_queue_depth) { device->advertised_queue_depth = device->queue_depth; scsi_change_queue_depth(device->sdev, device->advertised_queue_depth); - if (device->rescan) { - scsi_rescan_device(device->sdev); + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + if (pqi_volume_rescan_needed(device)) { device->rescan = false; + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + scsi_rescan_device(device->sdev); + } else { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); } } } @@ -2308,8 +2324,6 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, } } - ctrl_info->logical_volume_rescan_needed = false; - } static inline bool pqi_is_supported_device(struct pqi_scsi_dev *device) @@ -3702,6 +3716,21 @@ static bool pqi_ofa_process_event(struct pqi_ctrl_info *ctrl_info, return ack_event; } +static void pqi_mark_volumes_for_rescan(struct pqi_ctrl_info *ctrl_info) +{ + unsigned long flags; + struct pqi_scsi_dev *device; + + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) { + if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK) + device->rescan = true; + } + + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); +} + static void pqi_disable_raid_bypass(struct pqi_ctrl_info *ctrl_info) { unsigned long flags; @@ -3742,7 +3771,7 @@ static void pqi_event_worker(struct work_struct *work) ack_event = true; rescan_needed = true; if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE) - ctrl_info->logical_volume_rescan_needed = true; + pqi_mark_volumes_for_rescan(ctrl_info); else if (event->event_type == PQI_EVENT_TYPE_AIO_STATE_CHANGE) pqi_disable_raid_bypass(ctrl_info); } From a613c646660aa3083330fb0d57d1b1bf5c63b21e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Thu, 23 Nov 2023 19:58:20 +0100 Subject: [PATCH 007/299] tools: selftests: riscv: Fix compile warnings in vector tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e1baf5e68ed128c1e22ba43e5190526d85de323c ] GCC prints a couple of format string warnings when compiling the vector tests. Let's follow the recommendation in Documentation/printk-formats.txt to fix these warnings. Signed-off-by: Christoph Müllner Reviewed-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231123185821.2272504-5-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- tools/testing/selftests/riscv/vector/v_initval_nolibc.c | 2 +- tools/testing/selftests/riscv/vector/vstate_prctl.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c index 66764edb0d52..1dd94197da30 100644 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -27,7 +27,7 @@ int main(void) datap = malloc(MAX_VSIZE); if (!datap) { - ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); exit(-1); } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index b348b475be57..8ad94e08ff4d 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -68,7 +68,7 @@ int test_and_compare_child(long provided, long expected, int inherit) } rc = launch_test(inherit); if (rc != expected) { - ksft_test_result_fail("Test failed, check %d != %d\n", rc, + ksft_test_result_fail("Test failed, check %d != %ld\n", rc, expected); return -2; } @@ -87,7 +87,7 @@ int main(void) pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); if (rc < 0) { - ksft_test_result_fail("hwprobe() failed with %d\n", rc); + ksft_test_result_fail("hwprobe() failed with %ld\n", rc); return -1; } From 12d43aec0e75731dd800e5cbfa0714bf0b493443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Thu, 23 Nov 2023 19:58:21 +0100 Subject: [PATCH 008/299] tools: selftests: riscv: Fix compile warnings in mm tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 12c16919652b5873f524c8b361336ecfa5ce5e6b ] When building the mm tests with a riscv32 compiler, we see a range of shift-count-overflow errors from shifting 1UL by more than 32 bits in do_mmaps(). Since, the relevant code is only called from code that is gated by `__riscv_xlen == 64`, we can just apply the same gating to do_mmaps(). Signed-off-by: Christoph Müllner Reviewed-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20231123185821.2272504-6-christoph.muellner@vrull.eu Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- tools/testing/selftests/riscv/mm/mmap_test.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 9b8434f62f57..2e0db9c5be6c 100644 --- a/tools/testing/selftests/riscv/mm/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -18,6 +18,8 @@ struct addresses { int *on_56_addr; }; +// Only works on 64 bit +#if __riscv_xlen == 64 static inline void do_mmaps(struct addresses *mmap_addresses) { /* @@ -50,6 +52,7 @@ static inline void do_mmaps(struct addresses *mmap_addresses) mmap_addresses->on_56_addr = mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); } +#endif /* __riscv_xlen == 64 */ static inline int memory_layout(void) { From 36bc5040c863b44af06094b22f1e50059227b9cb Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Thu, 11 Jan 2024 15:59:41 +0300 Subject: [PATCH 009/299] scsi: target: core: Add TMF to tmr_list handling [ Upstream commit 83ab68168a3d990d5ff39ab030ad5754cbbccb25 ] An abort that is responded to by iSCSI itself is added to tmr_list but does not go to target core. A LUN_RESET that goes through tmr_list takes a refcounter on the abort and waits for completion. However, the abort will be never complete because it was not started in target core. Unable to locate ITT: 0x05000000 on CID: 0 Unable to locate RefTaskTag: 0x05000000 on CID: 0. wait_for_tasks: Stopping tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop wait for tasks: tmf LUN_RESET with tag 0x0 ref_task_tag 0x0 i_state 34 t_state ISTATE_PROCESSING refcnt 2 transport_state active,stop,fabric_stop ... INFO: task kworker/0:2:49 blocked for more than 491 seconds. task:kworker/0:2 state:D stack: 0 pid: 49 ppid: 2 flags:0x00000800 Workqueue: events target_tmr_work [target_core_mod] Call Trace: __switch_to+0x2c4/0x470 _schedule+0x314/0x1730 schedule+0x64/0x130 schedule_timeout+0x168/0x430 wait_for_completion+0x140/0x270 target_put_cmd_and_wait+0x64/0xb0 [target_core_mod] core_tmr_lun_reset+0x30/0xa0 [target_core_mod] target_tmr_work+0xc8/0x1b0 [target_core_mod] process_one_work+0x2d4/0x5d0 worker_thread+0x78/0x6c0 To fix this, only add abort to tmr_list if it will be handled by target core. Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20240111125941.8688-1-d.bogdanov@yadro.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 5 ----- drivers/target/target_core_transport.c | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b6523d4b9259..86590a7e29f6 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -147,7 +147,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; struct se_node_acl *nacl = se_sess->se_node_acl; struct se_tmr_req *se_tmr = se_cmd->se_tmr_req; - unsigned long flags; rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); @@ -178,10 +177,6 @@ out_unlock: se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); - spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); - spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); - return 0; } EXPORT_SYMBOL(transport_lookup_tmr_lun); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0686882bcbda..fb93d74c5d0b 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3627,6 +3627,10 @@ int transport_generic_handle_tmr( unsigned long flags; bool aborted = false; + spin_lock_irqsave(&cmd->se_dev->se_tmr_lock, flags); + list_add_tail(&cmd->se_tmr_req->tmr_list, &cmd->se_dev->dev_tmr_list); + spin_unlock_irqrestore(&cmd->se_dev->se_tmr_lock, flags); + spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->transport_state & CMD_T_ABORTED) { aborted = true; From 8d76726eeb11b6a15762a76f5ec915afb297f0f8 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 17 Jan 2024 05:55:39 +0000 Subject: [PATCH 010/299] cifs: open_cached_dir should not rely on primary channel [ Upstream commit 936eba9cfb5cfbf6a2c762cd163605f2b784e03e ] open_cached_dir today selects ses->server a.k.a primary channel to send requests. When multichannel is used, the primary channel maybe down. So it does not make sense to rely only on that channel. This fix makes this function pick a channel with the standard helper function cifs_pick_channel. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index d64a306a414b..971892620504 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -151,7 +151,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, return -EOPNOTSUPP; ses = tcon->ses; - server = ses->server; + server = cifs_pick_channel(ses); cfids = tcon->cfids; if (!server->ops->new_lease_key) From 6e400d6b960ab9e4b77a5c5f542ae90615ffd4f2 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 011/299] dmaengine: shdma: increase size of 'dev_id' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 404290240827c3bb5c4e195174a8854eef2f89ac ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'dev_id' drivers/dma/sh/shdmac.c: In function ‘sh_dmae_probe’: drivers/dma/sh/shdmac.c:541:34: error: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size 9 [-Werror=format-truncation=] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~ In function ‘sh_dmae_chan_probe’, inlined from ‘sh_dmae_probe’ at drivers/dma/sh/shdmac.c:845:9: drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 2147483647] 541 | "sh-dmae%d.%d", pdev->id, id); | ^~~~~~~~~~~~~~ drivers/dma/sh/shdmac.c:541:26: note: directive argument in the range [0, 19] drivers/dma/sh/shdmac.c:540:17: note: ‘snprintf’ output between 11 and 21 bytes into a destination of size 16 540 | snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id), | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 541 | "sh-dmae%d.%d", pdev->id, id); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/sh/shdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 9c121a4b33ad..f97d80343aea 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -25,7 +25,7 @@ struct sh_dmae_chan { const struct sh_dmae_slave_config *config; /* Slave DMA configuration */ int xmit_shift; /* log_2(bytes_per_xfer) */ void __iomem *base; - char dev_id[16]; /* unique name per DMAC of channel */ + char dev_id[32]; /* unique name per DMAC of channel */ int pm_error; dma_addr_t slave_addr; }; From 9f11992462ad0c0b5de0c06f3398d467f12bac3b Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 012/299] dmaengine: fsl-qdma: increase size of 'irq_name' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6386f6c995b3ab91c72cfb76e4465553c555a8da ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'irq_name' drivers/dma/fsl-qdma.c: In function ‘fsl_qdma_irq_init’: drivers/dma/fsl-qdma.c:824:46: error: ‘%d’ directive writing between 1 and 11 bytes into a region of size 10 [-Werror=format-overflow=] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~ drivers/dma/fsl-qdma.c:824:35: note: directive argument in the range [-2147483641, 2147483646] 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~ drivers/dma/fsl-qdma.c:824:17: note: ‘sprintf’ output between 12 and 22 bytes into a destination of size 20 824 | sprintf(irq_name, "qdma-queue%d", i); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/fsl-qdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index e4c293b76e05..781a3180baf2 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -805,7 +805,7 @@ fsl_qdma_irq_init(struct platform_device *pdev, int i; int cpu; int ret; - char irq_name[20]; + char irq_name[32]; fsl_qdma->error_irq = platform_get_irq_byname(pdev, "qdma-error"); From 22dced37d9c7c9975dc2f07e5a8ff107fe04bb1f Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 19 Jan 2024 18:10:44 +0530 Subject: [PATCH 013/299] dmaengine: dw-edma: increase size of 'name' in debugfs code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb95a4fa50bbc1262bfb7fea482388a50b12948f ] We seem to have hit warnings of 'output may be truncated' which is fixed by increasing the size of 'name' drivers/dma/dw-edma/dw-hdma-v0-debugfs.c: In function ‘dw_hdma_v0_debugfs_on’: drivers/dma/dw-edma/dw-hdma-v0-debugfs.c:125:50: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 8 [-Werror=format-truncation=] 125 | snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); | ^~ drivers/dma/dw-edma/dw-hdma-v0-debugfs.c: In function ‘dw_hdma_v0_debugfs_on’: drivers/dma/dw-edma/dw-hdma-v0-debugfs.c:142:50: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 8 [-Werror=format-truncation=] 142 | snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); | ^~ drivers/dma/dw-edma/dw-edma-v0-debugfs.c: In function ‘dw_edma_debugfs_regs_wr’: drivers/dma/dw-edma/dw-edma-v0-debugfs.c:193:50: error: ‘%d’ directive output may be truncated writing between 1 and 11 bytes into a region of size 8 [-Werror=format-truncation=] 193 | snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i); | ^~ Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 4 ++-- drivers/dma/dw-edma/dw-hdma-v0-debugfs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c index 0745d9e7d259..406f169b09a7 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c @@ -176,7 +176,7 @@ dw_edma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) }; struct dentry *regs_dent, *ch_dent; int nr_entries, i; - char name[16]; + char name[32]; regs_dent = debugfs_create_dir(WRITE_STR, dent); @@ -239,7 +239,7 @@ static noinline_for_stack void dw_edma_debugfs_regs_rd(struct dw_edma *dw, }; struct dentry *regs_dent, *ch_dent; int nr_entries, i; - char name[16]; + char name[32]; regs_dent = debugfs_create_dir(READ_STR, dent); diff --git a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c index 520c81978b08..dcdc57fe976c 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-debugfs.c @@ -116,7 +116,7 @@ static void dw_hdma_debugfs_regs_ch(struct dw_edma *dw, enum dw_edma_dir dir, static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) { struct dentry *regs_dent, *ch_dent; - char name[16]; + char name[32]; int i; regs_dent = debugfs_create_dir(WRITE_STR, dent); @@ -133,7 +133,7 @@ static void dw_hdma_debugfs_regs_wr(struct dw_edma *dw, struct dentry *dent) static void dw_hdma_debugfs_regs_rd(struct dw_edma *dw, struct dentry *dent) { struct dentry *regs_dent, *ch_dent; - char name[16]; + char name[32]; int i; regs_dent = debugfs_create_dir(READ_STR, dent); From 29df20cae2ce9a791885bc61f760560006d2be87 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Tue, 16 Jan 2024 14:22:57 +0000 Subject: [PATCH 014/299] wifi: cfg80211: fix missing interfaces when dumping [ Upstream commit a6e4f85d3820d00694ed10f581f4c650445dbcda ] The nl80211_dump_interface() supports resumption in case nl80211_send_iface() doesn't have the resources to complete its work. The logic would store the progress as iteration offsets for rdev and wdev loops. However the logic did not properly handle resumption for non-last rdev. Assuming a system with 2 rdevs, with 2 wdevs each, this could happen: dump(cb=[0, 0]): if_start=cb[1] (=0) send rdev0.wdev0 -> ok send rdev0.wdev1 -> yield cb[1] = 1 dump(cb=[0, 1]): if_start=cb[1] (=1) send rdev0.wdev1 -> ok // since if_start=1 the rdev0.wdev0 got skipped // through if_idx < if_start send rdev1.wdev1 -> ok The if_start needs to be reset back to 0 upon wdev loop end. The problem is actually hard to hit on a desktop, and even on most routers. The prerequisites for this manifesting was: - more than 1 wiphy - a few handful of interfaces - dump without rdev or wdev filter I was seeing this with 4 wiphys 9 interfaces each. It'd miss 6 interfaces from the last wiphy reported to userspace. Signed-off-by: Michal Kazior Link: https://msgid.link/20240116142340.89678-1-kazikcz@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0b0dfecedc50..c8bfacd5c8f3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4012,6 +4012,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; } + if_start = 0; wp_idx++; } out: From 54b79d8786964e2f840e8a2ec4a9f9a50f3d4954 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jan 2024 19:10:59 +0100 Subject: [PATCH 015/299] wifi: mac80211: fix race condition on enabling fast-xmit [ Upstream commit bcbc84af1183c8cf3d1ca9b78540c2185cd85e7f ] fast-xmit must only be enabled after the sta has been uploaded to the driver, otherwise it could end up passing the not-yet-uploaded sta via drv_tx calls to the driver, leading to potential crashes because of uninitialized drv_priv data. Add a missing sta->uploaded check and re-check fast xmit after inserting a sta. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240104181059.84032-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/sta_info.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index e112300caaf7..c61eb867bb4a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -914,6 +914,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5ab9594ae119..5c6c5254d987 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3034,7 +3034,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || From bc3c2e58d73b28b9a8789fca84778ee165a72d13 Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 11:49:40 +0800 Subject: [PATCH 016/299] fbdev: savage: Error out if pixclock equals zero [ Upstream commit 04e5eac8f3ab2ff52fa191c187a46d4fdbc1e288 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. Although pixclock is checked in savagefb_decode_var(), but it is not checked properly in savagefb_probe(). Fix this by checking whether pixclock is zero in the function savagefb_check_var() before info->var.pixclock is used as the divisor. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/savage/savagefb_driver.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/savage/savagefb_driver.c b/drivers/video/fbdev/savage/savagefb_driver.c index b5f84bd4804b..4ba5cd55e5a5 100644 --- a/drivers/video/fbdev/savage/savagefb_driver.c +++ b/drivers/video/fbdev/savage/savagefb_driver.c @@ -869,6 +869,9 @@ static int savagefb_check_var(struct fb_var_screeninfo *var, DBG("savagefb_check_var"); + if (!var->pixclock) + return -EINVAL; + var->transp.offset = 0; var->transp.length = 0; switch (var->bits_per_pixel) { From 99f1abc34a6dde248d2219d64aa493c76bbdd9eb Mon Sep 17 00:00:00 2001 From: Fullway Wang Date: Thu, 18 Jan 2024 14:24:43 +0800 Subject: [PATCH 017/299] fbdev: sis: Error out if pixclock equals zero [ Upstream commit e421946be7d9bf545147bea8419ef8239cb7ca52 ] The userspace program could pass any values to the driver through ioctl() interface. If the driver doesn't check the value of pixclock, it may cause divide-by-zero error. In sisfb_check_var(), var->pixclock is used as a divisor to caculate drate before it is checked against zero. Fix this by checking it at the beginning. This is similar to CVE-2022-3061 in i740fb which was fixed by commit 15cf0b8. Signed-off-by: Fullway Wang Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/sis/sis_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c index 0f5374f6ef05..6d524a65af18 100644 --- a/drivers/video/fbdev/sis/sis_main.c +++ b/drivers/video/fbdev/sis/sis_main.c @@ -1475,6 +1475,8 @@ sisfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) vtotal = var->upper_margin + var->lower_margin + var->vsync_len; + if (!var->pixclock) + return -EINVAL; pixclock = var->pixclock; if((var->vmode & FB_VMODE_MASK) == FB_VMODE_NONINTERLACED) { From 763c59714cf4230cb822d26459a7d9b59b2ba70f Mon Sep 17 00:00:00 2001 From: Liming Sun Date: Thu, 11 Jan 2024 12:31:06 -0500 Subject: [PATCH 018/299] platform/mellanox: mlxbf-tmfifo: Drop Tx network packet when Tx TmFIFO is full MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8cbc756b802605dee3dd40019bd75960772bacf5 ] Starting from Linux 5.16 kernel, Tx timeout mechanism was added in the virtio_net driver which prints the "Tx timeout" warning message when a packet stays in Tx queue for too long. Below is an example of the reported message: "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since last trans: 3079892256". This issue could happen when external host driver which drains the FIFO is restared, stopped or upgraded. To avoid such confusing "Tx timeout" messages, this commit adds logic to drop the outstanding Tx packet if it's not able to transmit in two seconds due to Tx FIFO full, which can be considered as congestion or out-of-resource drop. This commit also handles the special case that the packet is half- transmitted into the Tx FIFO. In such case, the packet is discarded with remaining length stored in vring->rem_padding. So paddings with zeros can be sent out when Tx space is available to maintain the integrity of the packet format. The padded packet will be dropped on the receiving side. Signed-off-by: Liming Sun Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240111173106.96958-1-limings@nvidia.com Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index ab7d7a1235b8..39828eb84e0b 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -47,6 +47,9 @@ /* Message with data needs at least two words (for header & data). */ #define MLXBF_TMFIFO_DATA_MIN_WORDS 2 +/* Tx timeout in milliseconds. */ +#define TMFIFO_TX_TIMEOUT 2000 + /* ACPI UID for BlueField-3. */ #define TMFIFO_BF3_UID 1 @@ -62,12 +65,14 @@ struct mlxbf_tmfifo; * @drop_desc: dummy desc for packet dropping * @cur_len: processed length of the current descriptor * @rem_len: remaining length of the pending packet + * @rem_padding: remaining bytes to send as paddings * @pkt_len: total length of the pending packet * @next_avail: next avail descriptor id * @num: vring size (number of descriptors) * @align: vring alignment size * @index: vring index * @vdev_id: vring virtio id (VIRTIO_ID_xxx) + * @tx_timeout: expire time of last tx packet * @fifo: pointer to the tmfifo structure */ struct mlxbf_tmfifo_vring { @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring { struct vring_desc drop_desc; int cur_len; int rem_len; + int rem_padding; u32 pkt_len; u16 next_avail; int num; int align; int index; int vdev_id; + unsigned long tx_timeout; struct mlxbf_tmfifo *fifo; }; @@ -819,6 +826,50 @@ mlxbf_tmfifo_desc_done: return true; } +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring) +{ + unsigned long flags; + + /* Only handle Tx timeout for network vdev. */ + if (vring->vdev_id != VIRTIO_ID_NET) + return; + + /* Initialize the timeout or return if not expired. */ + if (!vring->tx_timeout) { + /* Initialize the timeout. */ + vring->tx_timeout = jiffies + + msecs_to_jiffies(TMFIFO_TX_TIMEOUT); + return; + } else if (time_before(jiffies, vring->tx_timeout)) { + /* Return if not timeout yet. */ + return; + } + + /* + * Drop the packet after timeout. The outstanding packet is + * released and the remaining bytes will be sent with padding byte 0x00 + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be + * either dropped directly, or appended into existing outstanding packet + * thus dropped as corrupted network packet. + */ + vring->rem_padding = round_up(vring->rem_len, sizeof(u64)); + mlxbf_tmfifo_release_pkt(vring); + vring->cur_len = 0; + vring->rem_len = 0; + vring->fifo->vring[0] = NULL; + + /* + * Make sure the load/store are in order before + * returning back to virtio. + */ + virtio_mb(false); + + /* Notify upper layer. */ + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags); + vring_interrupt(0, vring->vq); + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags); +} + /* Rx & Tx processing of a queue. */ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) { @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) return; do { +retry: /* Get available FIFO space. */ if (avail == 0) { if (is_rx) @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) break; } + /* Insert paddings for discarded Tx packet. */ + if (!is_rx) { + vring->tx_timeout = 0; + while (vring->rem_padding >= sizeof(u64)) { + writeq(0, vring->fifo->tx.data); + vring->rem_padding -= sizeof(u64); + if (--avail == 0) + goto retry; + } + } + /* Console output always comes from the Tx buffer. */ if (!is_rx && devid == VIRTIO_ID_CONSOLE) { mlxbf_tmfifo_console_tx(fifo, avail); @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) /* Handle one descriptor. */ more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail); } while (more); + + /* Check Tx timeout. */ + if (avail <= 0 && !is_rx) + mlxbf_tmfifo_check_tx_timeout(vring); } /* Handle Rx or Tx queues. */ From 8298ea0f51a987642c25fae2a8edc61c82382c8d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jan 2024 14:00:34 +0200 Subject: [PATCH 019/299] spi: intel-pci: Add support for Arrow Lake SPI serial flash [ Upstream commit 8afe3c7fcaf72fca1e7d3dab16a5b7f4201ece17 ] This adds the PCI ID of the Arrow Lake and Meteor Lake-S PCH SPI serial flash controller. This one supports all the necessary commands Linux SPI-NOR stack requires. Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240122120034.2664812-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index b9918dcc3802..07d20ca1164c 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -76,6 +76,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7f24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9d24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x9da4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&cnl_info }, From d637b5118274701e8448f35953877daf04df18b4 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 23 Jan 2024 15:11:49 +0800 Subject: [PATCH 020/299] spi: hisi-sfc-v3xx: Return IRQ_NONE if no interrupts were detected [ Upstream commit de8b6e1c231a95abf95ad097b993d34b31458ec9 ] Return IRQ_NONE from the interrupt handler when no interrupt was detected. Because an empty interrupt will cause a null pointer error: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: complete+0x54/0x100 hisi_sfc_v3xx_isr+0x2c/0x40 [spi_hisi_sfc_v3xx] __handle_irq_event_percpu+0x64/0x1e0 handle_irq_event+0x7c/0x1cc Signed-off-by: Devyn Liu Link: https://msgid.link/r/20240123071149.917678-1-liudingyuan@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-hisi-sfc-v3xx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c index 9d22018f7985..1301d14483d4 100644 --- a/drivers/spi/spi-hisi-sfc-v3xx.c +++ b/drivers/spi/spi-hisi-sfc-v3xx.c @@ -377,6 +377,11 @@ static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = { static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data) { struct hisi_sfc_v3xx_host *host = data; + u32 reg; + + reg = readl(host->regbase + HISI_SFC_V3XX_INT_STAT); + if (!reg) + return IRQ_NONE; hisi_sfc_v3xx_disable_int(host); From 0f1bae071de9967602807472921829a54b2e5956 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:26:34 +0100 Subject: [PATCH 021/299] block: Fix WARNING in _copy_from_iter [ Upstream commit 13f3956eb5681a4045a8dfdef48df5dc4d9f58a6 ] Syzkaller reports a warning in _copy_from_iter because an iov_iter is supposedly used in the wrong direction. The reason is that syzcaller managed to generate a request with a transfer direction of SG_DXFER_TO_FROM_DEV. This instructs the kernel to copy user buffers into the kernel, read into the copied buffers and then copy the data back to user space. Thus the iovec is used in both directions. Detect this situation in the block layer and construct a new iterator with the correct direction for the copy-in. Reported-by: syzbot+a532b03fdfee2c137666@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000009b92c10604d7a5e9@google.com/t/ Reported-by: syzbot+63dec323ac56c28e644f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/0000000000003faaa105f6e7c658@google.com/T/ Signed-off-by: Christian A. Ehrhardt Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240121202634.275068-1-lk@c--e.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-map.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 8584babf3ea0..71210cdb3442 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -205,12 +205,19 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, /* * success */ - if ((iov_iter_rw(iter) == WRITE && - (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { + if (iov_iter_rw(iter) == WRITE && + (!map_data || !map_data->null_mapped)) { ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else if (map_data && map_data->from_user) { + struct iov_iter iter2 = *iter; + + /* This is the copy-in part of SG_DXFER_TO_FROM_DEV. */ + iter2.data_source = ITER_SOURCE; + ret = bio_copy_from_iter(bio, &iter2); + if (ret) + goto cleanup; } else { if (bmd->is_our_pages) zero_fill_bio(bio); From 8fbefa7a755d3481c80d621aec1f8feb447a18c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Jan 2024 15:47:34 -0800 Subject: [PATCH 022/299] smb: Work around Clang __bdos() type confusion [ Upstream commit 8deb05c84b63b4fdb8549e08942867a68924a5b8 ] Recent versions of Clang gets confused about the possible size of the "user" allocation, and CONFIG_FORTIFY_SOURCE ends up emitting a warning[1]: repro.c:126:4: warning: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 126 | __write_overflow_field(p_size_field, size); | ^ for this memset(): int len; __le16 *user; ... len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); ... if (len) { ... } else { memset(user, '\0', 2); } While Clang works on this bug[2], switch to using a direct assignment, which avoids memset() entirely which both simplifies the code and silences the false positive warning. (Making "len" size_t also silences the warning, but the direct assignment seems better.) Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/1966 [1] Link: https://github.com/llvm/llvm-project/issues/77813 [2] Cc: Steve French Cc: Paulo Alcantara Cc: Ronnie Sahlberg Cc: Shyam Prasad N Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: llvm@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsencrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index ef4c2e3c9fa6..6322f0f68a17 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -572,7 +572,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { - memset(user, '\0', 2); + *(u16 *)user = 0; } rc = crypto_shash_update(ses->server->secmech.hmacmd5, From 59e04d39fc2998ad1eb6eef2e7fed84d98143bbe Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:43 +0000 Subject: [PATCH 023/299] cifs: cifs_pick_channel should try selecting active channels [ Upstream commit fc43a8ac396d302ced1e991e4913827cf72c8eb9 ] cifs_pick_channel today just selects a channel based on the policy of least loaded channel. However, it does not take into account if the channel needs reconnect. As a result, we can have failures in send that can be completely avoided. This change doesn't make a channel a candidate for this selection if it needs reconnect. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 4f717ad7c21b..8695c9961f5a 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1026,6 +1026,9 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!server || server->terminate) continue; + if (CIFS_CHAN_NEEDS_RECONNECT(ses, i)) + continue; + /* * strictly speaking, we should pick up req_lock to read * server->in_flight. But it shouldn't matter much here if we From c09de6bb3ada8c3736a082b7fd7649f8ad4545b6 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:45 +0000 Subject: [PATCH 024/299] cifs: translate network errors on send to -ECONNABORTED [ Upstream commit a68106a6928e0a6680f12bcc7338c0dddcfe4d11 ] When the network stack returns various errors, we today bubble up the error to the user (in case of soft mounts). This change translates all network errors except -EINTR and -EAGAIN to -ECONNABORTED. A similar approach is taken when we receive network errors when reading from the socket. The change also forces the cifsd thread to reconnect during it's next activity. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/transport.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 8695c9961f5a..e00278fcfa4f 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -400,10 +400,17 @@ unmask: server->conn_id, server->hostname); } smbd_done: - if (rc < 0 && rc != -EINTR) + /* + * there's hardly any use for the layers above to know the + * actual error code here. All they should do at this point is + * to retry the connection and hope it goes away. + */ + if (rc < 0 && rc != -EINTR && rc != -EAGAIN) { cifs_server_dbg(VFS, "Error %d sending data on socket to server\n", rc); - else if (rc > 0) + rc = -ECONNABORTED; + cifs_signal_cifsd_for_reconnect(server, false); + } else if (rc > 0) rc = 0; out: cifs_in_send_dec(server); From f642fcf3f7e654b90c9dee8b2012f695d0edae28 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Sun, 21 Jan 2024 03:32:46 +0000 Subject: [PATCH 025/299] cifs: helper function to check replayable error codes [ Upstream commit 64cc377b7628b81ffdbdb1c6bacfba895dcac3f8 ] The code to check for replay is not just -EAGAIN. In some cases, the send request or receive response may result in network errors, which we're now mapping to -ECONNABORTED. This change introduces a helper function which checks if the error returned in one of the above two errors. And all checks for replays will now use this helper. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cached_dir.c | 1 + fs/smb/client/cifsglob.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 971892620504..5730c65ffb40 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -367,6 +367,7 @@ out: atomic_inc(&tcon->num_remote_opens); } kfree(utf16_path); + return rc; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 942e6ece56b1..f794b16095e4 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1820,6 +1820,13 @@ static inline bool is_retryable_error(int error) return false; } +static inline bool is_replayable_error(int error) +{ + if (error == -EAGAIN || error == -ECONNABORTED) + return true; + return false; +} + /* cifs_get_writable_file() flags */ #define FIND_WR_ANY 0 From 89f67051613c90be32ef01aefe96fe0adb8da9b1 Mon Sep 17 00:00:00 2001 From: Conrad Kostecki Date: Tue, 23 Jan 2024 19:30:02 +0100 Subject: [PATCH 026/299] ahci: asm1166: correct count of reported ports [ Upstream commit 0077a504e1a4468669fd2e011108db49133db56e ] The ASM1166 SATA host controller always reports wrongly, that it has 32 ports. But in reality, it only has six ports. This seems to be a hardware issue, as all tested ASM1166 SATA host controllers reports such high count of ports. Example output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0xffffff3f impl SATA mode. By adjusting the port_map, the count is limited to six ports. New output: ahci 0000:09:00.0: AHCI 0001.0301 32 slots 32 ports 6 Gbps 0x3f impl SATA mode. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=211873 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218346 Signed-off-by: Conrad Kostecki Reviewed-by: Hans de Goede Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 08745e7db820..2b8f0c3c3879 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -657,6 +657,11 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + } + if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { dev_info(&pdev->dev, "JMB361 has only one port\n"); hpriv->saved_port_map = 1; From 673629018ba04906899dcb631beec34d871f709c Mon Sep 17 00:00:00 2001 From: Maksim Kiselev Date: Wed, 24 Jan 2024 10:24:36 +0300 Subject: [PATCH 027/299] aoe: avoid potential deadlock at set_capacity [ Upstream commit e169bd4fb2b36c4b2bee63c35c740c85daeb2e86 ] Move set_capacity() outside of the section procected by (&d->lock). To avoid possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- [1] lock(&bdev->bd_size_lock); local_irq_disable(); [2] lock(&d->lock); [3] lock(&bdev->bd_size_lock); [4] lock(&d->lock); *** DEADLOCK *** Where [1](&bdev->bd_size_lock) hold by zram_add()->set_capacity(). [2]lock(&d->lock) hold by aoeblk_gdalloc(). And aoeblk_gdalloc() is trying to acquire [3](&bdev->bd_size_lock) at set_capacity() call. In this situation an attempt to acquire [4]lock(&d->lock) from aoecmd_cfg_rsp() will lead to deadlock. So the simplest solution is breaking lock dependency [2](&d->lock) -> [3](&bdev->bd_size_lock) by moving set_capacity() outside. Signed-off-by: Maksim Kiselev Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240124072436.3745720-2-bigunclemax@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/aoe/aoeblk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index cf6883756155..37eff1c97451 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -333,6 +333,7 @@ aoeblk_gdalloc(void *vp) struct gendisk *gd; mempool_t *mp; struct blk_mq_tag_set *set; + sector_t ssize; ulong flags; int late = 0; int err; @@ -395,7 +396,7 @@ aoeblk_gdalloc(void *vp) gd->minors = AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - set_capacity(gd, d->ssize); + ssize = d->ssize; snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); @@ -404,6 +405,8 @@ aoeblk_gdalloc(void *vp) spin_unlock_irqrestore(&d->lock, flags); + set_capacity(gd, ssize); + err = device_add_disk(NULL, gd, aoe_attr_groups); if (err) goto out_disk_cleanup; From ab7318c79570af9b39ee1722f9ec61c44db08544 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 24 Jan 2024 17:41:01 +0000 Subject: [PATCH 028/299] spi: cs42l43: Handle error from devm_pm_runtime_enable [ Upstream commit f9f4b0c6425eb9ffd9bf62b8b8143e786b6ba695 ] As it devm_pm_runtime_enable can fail due to memory allocations, it is best to handle the error. Suggested-by: Andy Shevchenko Signed-off-by: Charles Keepax Link: https://msgid.link/r/20240124174101.2270249-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-cs42l43.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-cs42l43.c b/drivers/spi/spi-cs42l43.c index d239fc5a49cc..c1556b652909 100644 --- a/drivers/spi/spi-cs42l43.c +++ b/drivers/spi/spi-cs42l43.c @@ -244,7 +244,10 @@ static int cs42l43_spi_probe(struct platform_device *pdev) priv->ctlr->use_gpio_descriptors = true; priv->ctlr->auto_runtime_pm = true; - devm_pm_runtime_enable(priv->dev); + ret = devm_pm_runtime_enable(priv->dev); + if (ret) + return ret; + pm_runtime_idle(priv->dev); regmap_write(priv->regmap, CS42L43_TRAN_CONFIG6, CS42L43_FIFO_SIZE - 1); From b0dd4d7ada6fcdfbfb043257ddefd50e10fb4403 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Thu, 25 Jan 2024 17:04:01 +0200 Subject: [PATCH 029/299] ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers [ Upstream commit 20730e9b277873deeb6637339edcba64468f3da3 ] With one of the on-board ASM1061 AHCI controllers (1b21:0612) on an ASUSTeK Pro WS WRX80E-SAGE SE WIFI mainboard, a controller hang was observed that was immediately preceded by the following kernel messages: ahci 0000:28:00.0: Using 64-bit DMA addresses ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00000 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00300 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00380 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00400 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00680 flags=0x0000] ahci 0000:28:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0035 address=0x7fffff00700 flags=0x0000] The first message is produced by code in drivers/iommu/dma-iommu.c which is accompanied by the following comment that seems to apply: /* * Try to use all the 32-bit PCI addresses first. The original SAC vs. * DAC reasoning loses relevance with PCIe, but enough hardware and * firmware bugs are still lurking out there that it's safest not to * venture into the 64-bit space until necessary. * * If your device goes wrong after seeing the notice then likely either * its driver is not setting DMA masks accurately, the hardware has * some inherent bug in handling >32-bit addresses, or not all the * expected address bits are wired up between the device and the IOMMU. */ Asking the ASM1061 on a discrete PCIe card to DMA from I/O virtual address 0xffffffff00000000 produces the following I/O page faults: vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000000 flags=0x0010] vfio-pci 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0021 address=0x7ff00000500 flags=0x0010] Note that the upper 21 bits of the logged DMA address are zero. (When asking a different PCIe device in the same PCIe slot to DMA to the same I/O virtual address, we do see all the upper 32 bits of the DMA address as 1, so this is not an issue with the chipset or IOMMU configuration on the test system.) Also, hacking libahci to always set the upper 21 bits of all DMA addresses to 1 produces no discernible effect on the behavior of the ASM1061, and mkfs/mount/scrub/etc work as without this hack. This all strongly suggests that the ASM1061 has a 43 bit DMA address limit, and this commit therefore adds a quirk to deal with this limit. This issue probably applies to (some of) the other supported ASMedia parts as well, but we limit it to the PCI IDs known to refer to ASM1061 parts, as that's the only part we know for sure to be affected by this issue at this point. Link: https://lore.kernel.org/linux-ide/ZaZ2PIpEId-rl6jv@wantstofly.org/ Signed-off-by: Lennert Buytenhek [cassel: drop date from error messages in commit log] Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 29 +++++++++++++++++++++++------ drivers/ata/ahci.h | 1 + 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 2b8f0c3c3879..20761eeea410 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -48,6 +48,7 @@ enum { enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, + board_ahci_43bit_dma, board_ahci_ign_iferr, board_ahci_low_power, board_ahci_no_debounce_delay, @@ -128,6 +129,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_43bit_dma] = { + AHCI_HFLAGS (AHCI_HFLAG_43BIT_ONLY), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_ign_iferr] = { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -596,11 +604,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ - /* Asmedia */ + /* ASMedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci }, /* ASM1061 */ - { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ + { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ + { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ @@ -948,11 +956,20 @@ static int ahci_pci_device_resume(struct device *dev) #endif /* CONFIG_PM */ -static int ahci_configure_dma_masks(struct pci_dev *pdev, int using_dac) +static int ahci_configure_dma_masks(struct pci_dev *pdev, + struct ahci_host_priv *hpriv) { - const int dma_bits = using_dac ? 64 : 32; + int dma_bits; int rc; + if (hpriv->cap & HOST_CAP_64) { + dma_bits = 64; + if (hpriv->flags & AHCI_HFLAG_43BIT_ONLY) + dma_bits = 43; + } else { + dma_bits = 32; + } + /* * If the device fixup already set the dma_mask to some non-standard * value, don't extend it here. This happens on STA2X11, for example. @@ -1925,7 +1942,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_gtf_filter_workaround(host); /* initialize adapter */ - rc = ahci_configure_dma_masks(pdev, hpriv->cap & HOST_CAP_64); + rc = ahci_configure_dma_masks(pdev, hpriv); if (rc) return rc; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 4bae95b06ae3..df8f8a1a3a34 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -247,6 +247,7 @@ enum { AHCI_HFLAG_SUSPEND_PHYS = BIT(26), /* handle PHYs during suspend/resume */ AHCI_HFLAG_NO_SXS = BIT(28), /* SXS not supported */ + AHCI_HFLAG_43BIT_ONLY = BIT(29), /* 43bit DMA addr limit */ /* ap->flags bits */ From 64783eaa37b74ac08c1f44e14bfaaec24c8dcb25 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 13 Jan 2024 10:03:51 +0100 Subject: [PATCH 030/299] ARM: dts: Fix TPM schema violations [ Upstream commit 8412c47d68436b9f9a260039a4a773daa6824925 ] Since commit 26c9d152ebf3 ("dt-bindings: tpm: Consolidate TCG TIS bindings"), several issues are reported by "make dtbs_check" for ARM devicetrees: The nodename needs to be "tpm@0" rather than "tpmdev@0" and the compatible property needs to contain the chip's name in addition to the generic "tcg,tpm_tis-spi" or "tcg,tpm-tis-i2c": tpmdev@0: $nodename:0: 'tpmdev@0' does not match '^tpm(@[0-9a-f]+)?$' from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm_tis-spi.yaml# tpm@2e: compatible: 'oneOf' conditional failed, one must be fixed: ['tcg,tpm-tis-i2c'] is too short from schema $id: http://devicetree.org/schemas/tpm/tcg,tpm-tis-i2c.yaml# Fix these schema violations. Aspeed Facebook BMCs use an Infineon SLB9670: https://lore.kernel.org/all/ZZSmMJ%2F%2Fl972Qbxu@fedora/ https://lore.kernel.org/all/ZZT4%2Fw2eVzMhtsPx@fedora/ https://lore.kernel.org/all/ZZTS0p1hdAchIbKp@heinlein.vulture-banana.ts.net/ Aspeed Tacoma uses a Nuvoton NPCT75X per commit 39d8a73c53a2 ("ARM: dts: aspeed: tacoma: Add TPM"). phyGATE-Tauri uses an Infineon SLB9670: https://lore.kernel.org/all/ab45c82485fa272f74adf560cbb58ee60cc42689.camel@phytec.de/ A single schema violation remains in am335x-moxa-uc-2100-common.dtsi because it is unknown which chip is used on the board. The devicetree's author has been asked for clarification but has not responded so far: https://lore.kernel.org/all/20231220090910.GA32182@wunner.de/ Signed-off-by: Lukas Wunner Reviewed-by: Patrick Williams Reviewed-by: Tao Ren Reviewed-by: Bruno Thomsen Signed-off-by: Sasha Levin --- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts | 4 ++-- arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts | 2 +- arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi | 4 ++-- arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi | 2 +- arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts | 2 +- arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts index e899de681f47..5be0e8fd2633 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-bletchley.dts @@ -45,8 +45,8 @@ num-chipselects = <1>; cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts index a677c827e758..5a8169bbda87 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-wedge400.dts @@ -80,8 +80,8 @@ gpio-miso = <&gpio ASPEED_GPIO(R, 5) GPIO_ACTIVE_HIGH>; num-chipselects = <1>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts index 3f6010ef2b86..213023bc5aec 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts +++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-opp-tacoma.dts @@ -456,7 +456,7 @@ status = "okay"; tpm: tpm@2e { - compatible = "tcg,tpm-tis-i2c"; + compatible = "nuvoton,npct75x", "tcg,tpm-tis-i2c"; reg = <0x2e>; }; }; diff --git a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi index 31590d3186a2..00e5887c926f 100644 --- a/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi +++ b/arch/arm/boot/dts/aspeed/ast2600-facebook-netbmc-common.dtsi @@ -35,8 +35,8 @@ gpio-mosi = <&gpio0 ASPEED_GPIO(X, 4) GPIO_ACTIVE_HIGH>; gpio-miso = <&gpio0 ASPEED_GPIO(X, 5) GPIO_ACTIVE_HIGH>; - tpmdev@0 { - compatible = "tcg,tpm_tis-spi"; + tpm@0 { + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; spi-max-frequency = <33000000>; reg = <0>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi index ea627638e40c..7dd1fe5a2fb7 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ull-phytec-tauri.dtsi @@ -121,7 +121,7 @@ tpm_tis: tpm@1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_tpm>; - compatible = "tcg,tpm_tis-spi"; + compatible = "infineon,slb9670", "tcg,tpm_tis-spi"; reg = <1>; spi-max-frequency = <20000000>; interrupt-parent = <&gpio5>; diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts index 3a723843d562..9984b343cdf0 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-flex-concentrator.dts @@ -130,7 +130,7 @@ * TCG specification - Section 6.4.1 Clocking: * TPM shall support a SPI clock frequency range of 10-24 MHz. */ - st33htph: tpm-tis@0 { + st33htph: tpm@0 { compatible = "st,st33htpm-spi", "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <24000000>; diff --git a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi index b8730aa52ce6..a59331aa58e5 100644 --- a/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/am335x-moxa-uc-2100-common.dtsi @@ -217,7 +217,7 @@ pinctrl-names = "default"; pinctrl-0 = <&spi1_pins>; - tpm_spi_tis@0 { + tpm@0 { compatible = "tcg,tpm_tis-spi"; reg = <0>; spi-max-frequency = <500000>; From f778a45784d34355b481b16b8348374fa2e585eb Mon Sep 17 00:00:00 2001 From: Huang Pei Date: Tue, 23 Jan 2024 09:47:57 +0800 Subject: [PATCH 031/299] MIPS: reserve exception vector space ONLY ONCE [ Upstream commit abcabb9e30a1f9a69c76776f8abffc31c377b542 ] "cpu_probe" is called both by BP and APs, but reserving exception vector (like 0x0-0x1000) called by "cpu_probe" need once and calling on APs is too late since memblock is unavailable at that time. So, reserve exception vector ONLY by BP. Suggested-by: Thomas Bogendoerfer Signed-off-by: Huang Pei Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/kernel/traps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 246c6a6b0261..5b778995d448 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2007,7 +2007,13 @@ unsigned long vi_handlers[64]; void reserve_exception_space(phys_addr_t addr, unsigned long size) { - memblock_reserve(addr, size); + /* + * reserve exception space on CPUs other than CPU0 + * is too late, since memblock is unavailable when APs + * up + */ + if (smp_processor_id() == 0) + memblock_reserve(addr, size); } void __init *set_except_vector(int n, void *addr) From 720751b57f0ad6b1e0e9f78a484093d17b3186be Mon Sep 17 00:00:00 2001 From: Phoenix Chen Date: Fri, 26 Jan 2024 17:53:08 +0800 Subject: [PATCH 032/299] platform/x86: touchscreen_dmi: Add info for the TECLAST X16 Plus tablet [ Upstream commit 1abdf288b0ef5606f76b6e191fa6df05330e3d7e ] Add touch screen info for TECLAST X16 Plus tablet. Signed-off-by: Phoenix Chen Link: https://lore.kernel.org/r/20240126095308.5042-1-asbeltogf@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 0c6733772698..7aee5e9ff2b8 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -944,6 +944,32 @@ static const struct ts_dmi_data teclast_tbook11_data = { .properties = teclast_tbook11_props, }; +static const struct property_entry teclast_x16_plus_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 14), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1916), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1264), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data teclast_x16_plus_data = { + .embedded_fw = { + .name = "silead/gsl3692-teclast-x16-plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 43560, + .sha256 = { 0x9d, 0xb0, 0x3d, 0xf1, 0x00, 0x3c, 0xb5, 0x25, + 0x62, 0x8a, 0xa0, 0x93, 0x4b, 0xe0, 0x4e, 0x75, + 0xd1, 0x27, 0xb1, 0x65, 0x3c, 0xba, 0xa5, 0x0f, + 0xcd, 0xb4, 0xbe, 0x00, 0xbb, 0xf6, 0x43, 0x29 }, + }, + .acpi_name = "MSSL1680:00", + .properties = teclast_x16_plus_props, +}; + static const struct property_entry teclast_x3_plus_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), PROPERTY_ENTRY_U32("touchscreen-size-y", 1500), @@ -1612,6 +1638,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_SKU, "E5A6_A1"), }, }, + { + /* Teclast X16 Plus */ + .driver_data = (void *)&teclast_x16_plus_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_PRODUCT_SKU, "D3A5_A1"), + }, + }, { /* Teclast X3 Plus */ .driver_data = (void *)&teclast_x3_plus_data, From f32d2a745b02123258026e105a008f474f896d6a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:37 +0800 Subject: [PATCH 033/299] ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt [ Upstream commit 993bf0f4c393b3667830918f9247438a8f6fdb5b ] Determine if bb_fragments is 0 instead of determining bb_free to eliminate the risk of dividing by zero when the block bitmap is corrupted. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-6-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7a2d42a84807..9eac63e5a251 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -841,7 +841,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, From 0184747b552d6b5a14db3b7fcc3b792ce64dedd1 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 034/299] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() [ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ] Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9eac63e5a251..bc9630f4c09c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2304,6 +2304,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, return; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2311,6 +2314,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } From d3bbe77a76bc52e9d4d0a120f1509be36e25c916 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 035/299] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() [ Upstream commit 832698373a25950942c04a512daa652c18a9b513 ] Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bc9630f4c09c..ea5ac2636632 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2341,12 +2341,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2379,6 +2377,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); From 2ccbf84ed3fe717caee840cf26142a31a6f2e749 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2023 15:16:50 +0100 Subject: [PATCH 036/299] Input: goodix - accept ACPI resources with gpio_count == 3 && gpio_int_idx == 0 [ Upstream commit 180a8f12c21f41740fee09ca7f7aa98ff5bb99f8 ] Some devices list 3 Gpio resources in the ACPI resource list for the touchscreen: 1. GpioInt resource pointing to the GPIO used for the interrupt 2. GpioIo resource pointing to the reset GPIO 3. GpioIo resource pointing to the GPIO used for the interrupt Note how the third extra GpioIo resource really is a duplicate of the GpioInt provided info. Ignore this extra GPIO, treating this setup the same as gpio_count == 2 && gpio_int_idx == 0 fixes the touchscreen not working on the Thunderbook Colossus W803 rugged tablet and likely also on the CyberBook_T116K. Reported-by: Maarten van der Schrieck Closes: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/-/issues/22 Suggested-by: Maarten van der Schrieck Tested-by: Maarten van der Schrieck Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231223141650.10679-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index af32fbe57b63..b068ff8afbc9 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { From 7b24760f3a3c7ae1a176d343136b6c25174b7b27 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:19:29 +0800 Subject: [PATCH 037/299] dmaengine: ti: edma: Add some null pointer checks to the edma_probe [ Upstream commit 6e2276203ac9ff10fc76917ec9813c660f627369 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118031929.192192-1-chentao@kylinos.cn Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ti/edma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 33d6d931b33b..155c409d2b43 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2404,6 +2404,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2420,6 +2425,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { From 68da1d65b2e3b9a7c7cd8d2b87583afa7a7a82eb Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:21 +0530 Subject: [PATCH 038/299] ASoC: amd: acp: Add check for cpu dai link initialization [ Upstream commit 6cc2aa9a75f2397d42b78d4c159bc06722183c78 ] Add condition check for cpu dai link initialization for amplifier codec path, as same pcm id uses for both headset and speaker path for RENOIR platforms. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/amd/acp/acp-mach-common.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index a06af82b8056..fc4e91535578 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -1416,8 +1416,13 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) if (drv_data->amp_cpu_id == I2S_SP) { links[i].name = "acp-amp-codec"; links[i].id = AMP_BE_ID; - links[i].cpus = sof_sp_virtual; - links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + if (drv_data->platform == RENOIR) { + links[i].cpus = sof_sp; + links[i].num_cpus = ARRAY_SIZE(sof_sp); + } else { + links[i].cpus = sof_sp_virtual; + links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + } links[i].platforms = sof_component; links[i].num_platforms = ARRAY_SIZE(sof_component); links[i].dpcm_playback = 1; From e1b38b919d3d5792031c6682b5a7c7b95c6bacf4 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 039/299] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c92688cac239794e4a1d976afa5203a4d3a2ac0e ] Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 2aff6db748e2..e33d10df7a76 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. From a905b2eccfd25e8a8f8288774a207227daf9e0c3 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Jan 2024 21:32:01 +0100 Subject: [PATCH 040/299] HID: logitech-hidpp: add support for Logitech G Pro X Superlight 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit afa6ac2690bb9904ff883c6e942281e1032a484d ] Let logitech-hidpp driver claim Logitech G Pro X Superlight 2. Reported-by: Marcus Rückert Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 7bf12ca0eb4a..4519ee377aa7 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4650,6 +4650,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) }, { /* Logitech G Pro X Superlight Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, + { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), From 580118d5c6e5ff11513c4335504b0b163e38cc0c Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:12 +0000 Subject: [PATCH 041/299] ALSA: hda: Replace numeric device IDs with constant values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3526860f26febbe46960f9b37f5dbd5ccc109ea8 ] We have self-explanatory constants for Intel HDA devices, let's use them instead of magic numbers and code comments. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-2-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 55d3a78112e0..159615755686 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1735,8 +1735,8 @@ static int default_bdl_pos_adj(struct azx *chip) /* some exceptions: Atoms seem problematic with value 1 */ if (chip->pci->vendor == PCI_VENDOR_ID_INTEL) { switch (chip->pci->device) { - case 0x0f04: /* Baytrail */ - case 0x2284: /* Braswell */ + case PCI_DEVICE_ID_INTEL_HDA_BYT: + case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; } } From 034a0061b2dcf72c8276eadddb6417caf57a9d31 Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:13 +0000 Subject: [PATCH 042/299] ALSA: hda: Increase default bdl_pos_adj for Apollo Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 56beedc88405fd8022edfd1c2e63d1bc6c95efcb ] Apollo Lake seems to also suffer from IRQ timing issues. After being up for ~4 minutes, a Pentium N4200 system ends up falling back to workqueue-based IRQ handling: [ 208.019906] snd_hda_intel 0000:00:0e.0: IRQ timing workaround is activated for card #0. Suggest a bigger bdl_pos_adj. Unfortunately, the Baytrail and Braswell workaround value of 32 samples isn't enough to fix the issue here. Default to 64 samples. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-3-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 159615755686..a6a9d353fe63 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1738,6 +1738,8 @@ static int default_bdl_pos_adj(struct azx *chip) case PCI_DEVICE_ID_INTEL_HDA_BYT: case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; + case PCI_DEVICE_ID_INTEL_HDA_APL: + return 64; } } From 83527a13740f57b45f162e3af4c7db4b88521100 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 14:07:14 +0800 Subject: [PATCH 043/299] HID: nvidia-shield: Add missing null pointer checks to LED initialization [ Upstream commit b6eda11c44dc89a681e1c105f0f4660e69b1e183 ] devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. [jkosina@suse.com: tweak changelog a bit] Signed-off-by: Kunwu Chan Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-nvidia-shield.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index c463e54decbc..edd0b0f1193b 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); + if (!led->name) + return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; @@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev) shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); + if (!shield_dev->battery_dev.desc.name) + return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); From 307fc03dc43341357adb5ba243fad488c8714f3a Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 26 Jan 2024 16:26:43 +0800 Subject: [PATCH 044/299] nvmet-tcp: fix nvme tcp ida memory leak [ Upstream commit 47c5dd66c1840524572dcdd956f4af2bdb6fbdff ] The nvmet_tcp_queue_ida should be destroy when the nvmet-tcp module exit. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index a4f802790ca0..8e5d547aa16c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1927,6 +1927,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); From 21857eed5066b977936c20c748b2bb65e91be219 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:23 +0100 Subject: [PATCH 045/299] usb: ucsi_acpi: Quirk to ack a connector change ack cmd [ Upstream commit f3be347ea42dbb0358cd8b2d8dc543a23b70a976 ] The PPM on some Dell laptops seems to expect that the ACK_CC_CI command to clear the connector change notification is in turn followed by another ACK_CC_CI to acknowledge the ACK_CC_CI command itself. This is in violation of the UCSI spec that states: "The only notification that is not acknowledged by the OPM is the command completion notification for the ACK_CC_CI or the PPM_RESET command." Add a quirk to send this ack anyway. Apply the quirk to all Dell systems. On the first command that acks a connector change send a dummy command to determine if it runs into a timeout. Only activate the quirk if it does. This ensure that we do not break Dell systems that do not need the quirk. Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/ucsi/ucsi_acpi.c | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index fa222080887d..928eacbeb21a 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -126,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -160,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -189,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) From ee277704a44e49e56db391859bf96e7951fb8cb4 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jan 2024 15:12:54 +0300 Subject: [PATCH 046/299] ALSA: usb-audio: Check presence of valid altsetting control [ Upstream commit 346f59d1e8ed0eed41c80e1acb657e484c308e6a ] Many devices with a single alternate setting do not have a Valid Alternate Setting Control and validation performed by validate_sample_rate_table_v2v3() doesn't work on them and is not really needed. So check the presense of control before sending altsetting validation requests. MOTU Microbook IIc is suffering the most without this check. It takes up to 40 seconds to bootup due to how slow it switches sampling rates: [ 2659.164824] usb 3-2: New USB device found, idVendor=07fd, idProduct=0004, bcdDevice= 0.60 [ 2659.164827] usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2659.164829] usb 3-2: Product: MicroBook IIc [ 2659.164830] usb 3-2: Manufacturer: MOTU [ 2659.166204] usb 3-2: Found last interface = 3 [ 2679.322298] usb 3-2: No valid sample rate available for 1:1, assuming a firmware bug [ 2679.322306] usb 3-2: 1:1: add audio endpoint 0x3 [ 2679.322321] usb 3-2: Creating new data endpoint #3 [ 2679.322552] usb 3-2: 1:1 Set sample rate 96000, clock 1 [ 2684.362250] usb 3-2: 2:1: cannot get freq (v2/v3): err -110 [ 2694.444700] usb 3-2: No valid sample rate available for 2:1, assuming a firmware bug [ 2694.444707] usb 3-2: 2:1: add audio endpoint 0x84 [ 2694.444721] usb 3-2: Creating new data endpoint #84 [ 2699.482103] usb 3-2: 2:1 Set sample rate 96000, clock 1 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240129121254.3454481-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/format.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b6..3b45d0ee7693 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; From 04d46a95642e6d407af4783a9b9fe16888eb23af Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:43 +0800 Subject: [PATCH 047/299] ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616 [ Upstream commit 0adf963b8463faa44653e22e56ce55f747e68868 ] The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Since the driver currently only supports the transmit function, support for the H616 is identical to what is currently done for the H6. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-4-wens@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sunxi/sun4i-spdif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index b849bb7cf58e..2347aeb049bc 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -578,6 +578,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); From f7d799076a55aed4accd134442a33e4fef7c2757 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Tue, 30 Jan 2024 20:32:56 +0530 Subject: [PATCH 048/299] regulator (max5970): Fix IRQ handler [ Upstream commit a3fa9838e8140584a6f338e8516f2b05d3bea812 ] The max5970 datasheet gives the impression that IRQ status bits must be cleared by writing a one to set bits, as those are marked with 'R/C', however tests showed that a zero must be written. Fixes an IRQ storm as the interrupt handler actually clears the IRQ status bits. Signed-off-by: Patrick Rudolph Signed-off-by: Naresh Solanki Link: https://msgid.link/r/20240130150257.3643657-1-naresh.solanki@9elements.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/max5970-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index b56a174cde3d..5c2d49ae332f 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -265,7 +265,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, return ret; if (*val) - return regmap_write(map, reg, *val); + return regmap_write(map, reg, 0); return 0; } From 2c889761d47252903bf8203bf9ac2d4fb000b355 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 049/299] spi: sh-msiof: avoid integer overflow in constants [ Upstream commit 6500ad28fd5d67d5ca0fee9da73c463090842440 ] cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index cfc3b1ddbd22..6f12e4fb2e2e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -136,14 +136,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) From 17a6d7a0a7a9d846f321eaab7e2021fb4b2eb631 Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 30 Jan 2024 13:34:16 -0800 Subject: [PATCH 050/299] Input: xpad - add Lenovo Legion Go controllers [ Upstream commit 80441f76ee67002437db61f3b317ed80cce085d2 ] The Lenovo Legion Go is a handheld gaming system, similar to a Steam Deck. It has a gamepad (including rear paddles), 3 gyroscopes, a trackpad, volume buttons, a power button, and 2 LED ring lights. The Legion Go firmware presents these controls as a USB hub with various devices attached. In its default state, the gamepad is presented as an Xbox controller connected to this hub. (By holding a combination of buttons, it can be changed to use the older DirectInput API.) This patch teaches the existing Xbox controller module `xpad` to bind to the controller in the Legion Go, which enables support for the: - directional pad, - analog sticks (including clicks), - X, Y, A, B, - start and select (or menu and capture), - shoulder buttons, and - rumble. The trackpad, touchscreen, volume controls, and power button are already supported via existing kernel modules. Two of the face buttons, the gyroscopes, rear paddles, and LEDs are not. After this patch lands, the Legion Go will be mostly functional in Linux, out-of-the-box. The various components of the USB hub can be synthesized into a single logical controller (including the additional buttons) in userspace with [Handheld Daemon](https://github.com/hhd-dev/hhd), which makes the Go fully functional. Signed-off-by: Brenton Simpson Link: https://lore.kernel.org/r/20240118183546.418064-1-appsforartists@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e2c1848182de..d0bb3edfd0a0 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -491,6 +492,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ From 9e8e25f201915bee294e373f752e43895b193165 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Jan 2024 15:24:10 +0000 Subject: [PATCH 051/299] misc: open-dice: Fix spurious lockdep warning [ Upstream commit ac9762a74c7ca7cbfcb4c65f5871373653a046ac ] When probing the open-dice driver with PROVE_LOCKING=y, lockdep complains that the mutex in 'drvdata->lock' has a non-static key: | INFO: trying to register non-static key. | The code is fine but needs lockdep annotation, or maybe | you didn't initialize this object before use? | turning off the locking correctness validator. Fix the problem by initialising the mutex memory with mutex_init() instead of __MUTEX_INITIALIZER(). Cc: Arnd Bergmann Cc: David Brazdil Cc: Greg Kroah-Hartman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240126152410.10148-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/open-dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index 8aea2d070a40..d279a4f195e2 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -140,7 +140,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -150,6 +149,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); From 4c09593b31a55aff49c79bc41f65129b76167851 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 052/299] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new [ Upstream commit 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 ] The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983c1..4cc97f971264 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", From 5a1bd2143fd70f35a255c02611731b509d1cebac Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 26 Jan 2024 15:14:50 -0500 Subject: [PATCH 053/299] drm/amdkfd: Use correct drm device for cgroup permission check [ Upstream commit 4119734e06a7f30e7e8eb666692a58b85dca0269 ] On GFX 9.4.3, for a given KFD node, fetch the correct drm device from XCP manager when checking for cgroup permissions. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3287a3961395..12ee273e87e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1482,10 +1482,15 @@ void kfd_dec_compute_active(struct kfd_node *dev); /* Cgroup Support */ /* Check with device cgroup if @kfd device is accessible */ -static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) +static inline int kfd_devcgroup_check_permission(struct kfd_node *node) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = adev_to_drm(kfd->adev); + struct drm_device *ddev; + + if (node->xcp) + ddev = node->xcp->ddev; + else + ddev = adev_to_drm(node->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, From e34e4e6d8c6953488e8613780dd529fe8d630852 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Tue, 16 Jan 2024 11:00:00 -0500 Subject: [PATCH 054/299] drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz [ Upstream commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a ] [why] Originally, PMFW said min FCLK is 300Mhz, but min DCFCLK can be increased to 400Mhz because min FCLK is now 600Mhz so FCLK >= 1.5 * DCFCLK hardware requirement will still be satisfied. Increasing min DCFCLK addresses underflow issues (underflow occurs when phantom pipe is turned on for some Sub-Viewport configs). [how] Increasing DCFCLK by raising the min_dcfclk_mhz Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index cf3b400c8619..ec09d5a8876b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2452,7 +2452,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; From 567f1b1da5dad5d8001f13d4a55dcd8cff266d37 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 29 Jan 2024 13:58:13 +0000 Subject: [PATCH 055/299] cifs: make sure that channel scaling is done only once [ Upstream commit ee36a3b345c433a846effcdcfba437c2298eeda5 ] Following a successful cifs_tree_connect, we have the code to scale up/down the number of channels in the session. However, it is not protected by a lock today. As a result, this code can be executed by several processes that select the same channel. The core functions handle this well, as they pick chan_lock. However, we've seen cases where smb2_reconnect throws some warnings. To fix that, this change introduces a flags bitmap inside the cifs_ses structure. A new flag type is used to ensure that only one process enters this section at any time. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 3 +++ fs/smb/client/smb2pdu.c | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index f794b16095e4..dcc41fe33b70 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1018,6 +1018,8 @@ struct cifs_chan { __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; +#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) + /* * Session structure. One of these for each uid session with a particular host */ @@ -1050,6 +1052,7 @@ struct cifs_ses { enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; + unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 5d9c87d2e1e0..ce2d28537bc8 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -399,6 +399,15 @@ skip_sess_setup: goto out; } + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + goto skip_add_channels; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { mutex_unlock(&ses->session_mutex); @@ -428,15 +437,22 @@ skip_sess_setup: if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) + if (ses->chan_count == 1) { cifs_server_dbg(VFS, "supports multichannel now\n"); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } cifs_try_adding_channels(ses); } } else { mutex_unlock(&ses->session_mutex); } + skip_add_channels: + spin_lock(&ses->ses_lock); + ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); From fabab199b1975d58b78e24a1eb54da4f71139a62 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:21 +0000 Subject: [PATCH 056/299] ASoC: wm_adsp: Don't overwrite fwf_name with the default [ Upstream commit daf3f0f99cde93a066240462b7a87cdfeedc04c0 ] There's no need to overwrite fwf_name with a kstrdup() of the cs_dsp part name. It is trivial to select either fwf_name or cs_dsp.part as the string to use when building the filename in wm_adsp_request_firmware_file(). This leaves fwf_name entirely owned by the codec driver. It also avoids problems with freeing the pointer. With the original code fwf_name was either a pointer owned by the codec driver, or a kstrdup() created by wm_adsp. This meant wm_adsp must free it if it set it, but not if the codec driver set it. The code was handling this by using devm_kstrdup(). But there is no absolute requirement that wm_adsp_common_init() must be called from probe(), so this was a pseudo-memory leak - each new call to wm_adsp_common_init() would allocate another block of memory but these would only be freed if the owning codec driver was removed. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240129162737.497-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm_adsp.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index cb654f1b09f1..72b90a7ee4b6 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -739,19 +739,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -863,29 +869,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } From ffd63f243735aebc092f1fee9fc1f9d8ce20bfb4 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 1 Feb 2024 14:53:08 +0300 Subject: [PATCH 057/299] ALSA: usb-audio: Ignore clock selector errors for single connection [ Upstream commit eaa1b01fe709d6a236a9cec74813e0400601fd23 ] For devices with multiple clock sources connected to a selector, we need to check what a clock selector control request has returned. This is needed to ensure that a requested clock source is indeed selected and for autoclock feature to work. For devices with single clock source connected, if we get an error there is nothing else we can do about it. We can't skip clock selector setup as it is required by some devices. So lets just ignore error in this case. This should fix various buggy Mackie devices: [ 649.109785] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.111946] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.113822] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) There is also interesting info from the Windows documentation [1] (this is probably why manufacturers dont't even test this feature): "The USB Audio 2.0 driver doesn't support clock selection. The driver uses the Clock Source Entity, which is selected by default and never issues a Clock Selector Control SET CUR request." Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/usb-2-0-audio-drivers [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=217314 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218175 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218342 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240201115308.17838-1-alexander@tsoy.me Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/clock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e6556..a676ad093d18 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -328,8 +328,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) From baa6b7eb8c66486bd64608adc63fe03b30d3c0b9 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:01 +0100 Subject: [PATCH 058/299] nvme-fc: do not wait in vain when unloading module [ Upstream commit 70fbfc47a392b98e5f8dba70c6efc6839205c982 ] The module exit path has race between deleting all controllers and freeing 'left over IDs'. To prevent double free a synchronization between nvme_delete_ctrl and ida_destroy has been added by the initial commit. There is some logic around trying to prevent from hanging forever in wait_for_completion, though it does not handling all cases. E.g. blktests is able to reproduce the situation where the module unload hangs forever. If we completely rely on the cleanup code executed from the nvme_delete_ctrl path, all IDs will be freed eventually. This makes calling ida_destroy unnecessary. We only have to ensure that all nvme_delete_ctrl code has been executed before we leave nvme_fc_exit_module. This is done by flushing the nvme_delete_wq workqueue. While at it, remove the unused nvme_fc_wq workqueue too. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 46cce0ec35e9..cdb1e706f855 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3893,10 +3886,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3914,7 +3903,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3938,8 +3927,6 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3959,45 +3946,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); From 95a9ff3307343c1c5ea5f42f9432e42813ac5535 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:02 +0100 Subject: [PATCH 059/299] nvmet-fcloop: swap the list_add_tail arguments [ Upstream commit dcfad4ab4d6733f2861cd241d8532a0004fc835a ] The first argument of list_add_tail function is the new element which should be added to the list which is the second argument. Swap the arguments to allow processing more than one element at a time. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fcloop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c65a73433c05..e6d4226827b5 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; From 2baa7272f2d98476fbff68404ddc1d42500efa36 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:03 +0100 Subject: [PATCH 060/299] nvmet-fc: release reference on target port [ Upstream commit c691e6d7e13dab81ac8c7489c83b5dea972522a5 ] In case we return early out of __nvmet_fc_finish_ls_req() we still have to release the reference on the target port. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1ab6601fdd5c..0075d9636b06 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -359,7 +359,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_puttgtport; } list_del(&lsop->lsreq_list); @@ -372,6 +372,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); +out_puttgtport: nvmet_fc_tgtport_put(tgtport); } From ccd49adde0547bd6b6c857909d1c4a3ad6ab93c0 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:04 +0100 Subject: [PATCH 061/299] nvmet-fc: defer cleanup using RCU properly [ Upstream commit 4049dc96b8de7aeb3addcea039446e464726a525 ] When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 83 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 0075d9636b06..c9ef642313c8 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -165,7 +165,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -802,14 +802,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -831,15 +828,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -852,12 +847,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -969,7 +960,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1172,13 +1163,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1208,7 +1204,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1217,29 +1213,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + synchronize_rcu(); + + /* ensure all in-flight I/Os have been processed */ for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } - - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1492,9 +1480,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1547,9 +1534,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1567,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1593,9 +1579,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1625,6 +1610,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1870,9 +1857,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1887,8 +1871,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1904,6 +1886,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2902,6 +2887,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2933,6 +2921,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); From f2879398c295bc6016bc6854e94780ca694b3926 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:06 +0100 Subject: [PATCH 062/299] nvmet-fc: hold reference on hostport match [ Upstream commit ca121a0f7515591dba0eb5532bfa7ace4dc153ce ] The hostport data structure is shared between the association, this why we keep track of the users via a refcount. So we should not decrement the refcount on a match and free the hostport several times. Reported by KASAN. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index c9ef642313c8..64c26b703860 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1069,8 +1069,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; From 399b70e8eadc35fda6c37f560b129cff0bce359d Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:09 +0100 Subject: [PATCH 063/299] nvmet-fc: abort command when there is no binding [ Upstream commit 3146345c2e9c2f661527054e402b0cfad80105a4 ] When the target port has not active port binding, there is no point in trying to process the command as it has to fail anyway. Instead adding checks to all commands abort the command early. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 64c26b703860..b4b2631eb530 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1101,6 +1101,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -2523,8 +2526,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); From eaf0971fdabf2a93c1429dc6bedf3bbe85dffa30 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:10 +0100 Subject: [PATCH 064/299] nvmet-fc: avoid deadlock on delete association path [ Upstream commit 710c69dbaccdac312e32931abcb8499c1525d397 ] When deleting an association the shutdown path is deadlocking because we try to flush the nvmet_wq nested. Avoid this by deadlock by deferring the put work into its own work item. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b4b2631eb530..36cae038eb04 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -248,6 +250,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -359,7 +368,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - goto out_puttgtport; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -372,8 +381,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); -out_puttgtport: - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -1404,6 +1413,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { From fad689fce093d128a88eec2f7ab8643b0223d342 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:11 +0100 Subject: [PATCH 065/299] nvmet-fc: take ref count on tgtport before delete assoc [ Upstream commit fe506a74589326183297d5abdda02d0c76ae5a8b ] We have to ensure that the tgtport is not going away before be have remove all the associations. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/fc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 36cae038eb04..8a02ed63b156 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1092,13 +1092,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1132,7 +1147,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1491,7 +1506,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); @@ -1545,7 +1560,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1590,7 +1605,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return; } @@ -1897,7 +1912,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return false; From 2b1f28ee49dad6d45bb7d8ab4b492570f7aeaa77 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:28 +0000 Subject: [PATCH 066/299] cifs: do not search for channel if server is terminating [ Upstream commit 88675b22d34e6e815ad4bde09c590ccb2d50c59d ] In order to scale down the channels, the following sequence of operations happen: 1. server struct is marked for terminate 2. the channel is deallocated in the ses->chans array 3. at a later point the cifsd thread actually terminates the server Between 2 and 3, there can be calls to find the channel for a server struct. When that happens, there can be an ugly warning that's logged. But this is expected. So this change does two things: 1. in cifs_ses_get_chan_index, if server->terminate is set, return 2. always make sure server->terminate is set with chan_lock held Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/sess.c | 4 ++++ fs/smb/client/smb2pdu.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index a20a5d0836dc..52f7a411e2bb 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, { unsigned int i; + /* if the channel is waiting for termination */ + if (server->terminate) + return CIFS_INVAL_CHAN_INDEX; + for (i = 0; i < ses->chan_count; i++) { if (ses->chans[i].server == server) return i; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index ce2d28537bc8..97fc2f85b429 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, } ses->chans[chan_index].server = NULL; + server->terminate = true; spin_unlock(&ses->chan_lock); /* @@ -188,7 +189,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, */ cifs_put_tcp_session(server, from_reconnect); - server->terminate = true; cifs_signal_cifsd_for_reconnect(server, false); /* mark primary server as needing reconnect */ From 2fdb5551e35b0dcb61d85f70d0963813f53d377f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 29 Jan 2024 21:04:44 -0300 Subject: [PATCH 067/299] smb: client: increase number of PDUs allowed in a compound request [ Upstream commit 11d4d1dba3315f73d2d1d386f5bf4811a8241d45 ] With the introduction of SMB2_OP_QUERY_WSL_EA, the client may now send 5 commands in a single compound request in order to query xattrs from potential WSL reparse points, which should be fine as we currently allow up to 5 PDUs in a single compound request. However, if encryption is enabled (e.g. 'seal' mount option) or enforced by the server, current MAX_COMPOUND(5) won't be enough as we require an extra PDU for the transform header. Fix this by increasing MAX_COMPOUND to 7 and, while we're at it, add an WARN_ON_ONCE() and return -EIO instead of -ENOMEM in case we attempt to send a compound request that couldn't include the extra transform header. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/transport.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index dcc41fe33b70..462554917e5a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -82,7 +82,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e00278fcfa4f..994d70193432 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -435,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); From f5411b7666275d67bf91beb2f84bbf12ccb3dead Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:02 +0800 Subject: [PATCH 068/299] ext4: correct the hole length returned by ext4_map_blocks() [ Upstream commit 6430dea07e85958fa87d0276c0c4388dd51e630b ] In ext4_map_blocks(), if we can't find a range of mapping in the extents cache, we are calling ext4_ext_map_blocks() to search the real path and ext4_ext_determine_hole() to determine the hole range. But if the querying range was partially or completely overlaped by a delalloc extent, we can't find it in the real extent path, so the returned hole length could be incorrect. Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc extent, but it searches start from the expanded hole_start, doesn't start from the querying range, so the delalloc extent found could not be the one that overlaped the querying range, plus, it also didn't adjust the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle delalloc and insert adjusted hole extent in ext4_ext_determine_hole(). Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-4-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4c3e2f38349d..d393df22431a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4062,6 +4038,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4179,22 +4218,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } From 4390f74d09a092e24e83805750906bd80c5eee5c Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Fri, 2 Feb 2024 10:28:59 -0800 Subject: [PATCH 069/299] Input: i8042 - add Fujitsu Lifebook U728 to i8042 quirk table [ Upstream commit 4255447ad34c5c3785fcdcf76cfa0271d6e5ed39 ] Another Fujitsu-related patch. In the initial boot stage the integrated keyboard of Fujitsu Lifebook U728 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:3092) not working at all. So this notebook uses a hid-over-i2c touchpad which is managed by the i2c_designware input driver. Since you can't find a PS/2 mouse port on this computer and you can't connect a PS/2 mouse to it even with an official port replicator I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20240103014717.127307-2-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index cd45a65e17f2..dfc6c581873b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { From 15735a62add3c4dbe50437f78b5719edf8c9c547 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:04:53 +0300 Subject: [PATCH 070/299] fs/ntfs3: Improve alternative boot processing [ Upstream commit c39de951282df9a60ef70664e4378d88006b2670 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/super.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f763e3256ccc..aa3b2c262ab6 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -865,6 +865,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u16 fn, ao; u8 cluster_bits; u32 boot_off = 0; + sector_t boot_block = 0; const char *hint = "Primary boot"; /* Save original dev_size. Used with alternative boot. */ @@ -872,11 +873,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sbi->volume.blocks = dev_size >> PAGE_SHIFT; - bh = ntfs_bread(sb, 0); +read_boot: + bh = ntfs_bread(sb, boot_block); if (!bh) - return -EIO; + return boot_block ? -EINVAL : -EIO; -check_boot: err = -EINVAL; /* Corrupted image; do not read OOB */ @@ -1107,26 +1108,24 @@ check_boot: } out: - if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) { + brelse(bh); + + if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); u64 lbo = dev_size0 - sizeof(*boot); - /* - * Try alternative boot (last sector) - */ - brelse(bh); - - sb_set_blocksize(sb, block_size); - bh = ntfs_bread(sb, lbo >> blksize_bits(block_size)); - if (!bh) - return -EINVAL; - + boot_block = lbo >> blksize_bits(block_size); boot_off = lbo & (block_size - 1); - hint = "Alternative boot"; - dev_size = dev_size0; /* restore original size. */ - goto check_boot; + if (boot_block && block_size >= boot_off + sizeof(*boot)) { + /* + * Try alternative boot (last sector) + */ + sb_set_blocksize(sb, block_size); + hint = "Alternative boot"; + dev_size = dev_size0; /* restore original size. */ + goto read_boot; + } } - brelse(bh); return err; } From 06144cdddad607ab7bb1af95052c2d72d4d5a264 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:17:07 +0300 Subject: [PATCH 071/299] fs/ntfs3: Modified fix directory element type detection [ Upstream commit 22457c047ed971f2f2e33be593ddfabd9639a409 ] Unfortunately reparse attribute is used for many purposes (several dozens). It is not possible here to know is this name symlink or not. To get exactly the type of name we should to open inode (read mft). getattr for opened file (fstat) correctly returns symlink. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index ec0566b322d5..22ede4da0450 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } From d316783dfdc1822e742f7c9220e41486938bd1c4 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:24:33 +0300 Subject: [PATCH 072/299] fs/ntfs3: Improve ntfs_dir_count [ Upstream commit 6a799c928b78b14999b7705c4cca0f88e297fe96 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 22ede4da0450..726122ecd39b 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -515,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -563,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -573,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -584,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: From d46c2ef091807279b22d94d07dad80b74d755c23 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:26:31 +0300 Subject: [PATCH 073/299] fs/ntfs3: Correct hard links updating when dealing with DOS names [ Upstream commit 1918c10e137eae266b8eb0ab1cc14421dcb0e3e2 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/record.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 53629b1f65e9..7b6423584eae 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -535,8 +535,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; From fa2a041a490d791a3e5bceb47dab25bd6042dca3 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:34:24 +0300 Subject: [PATCH 074/299] fs/ntfs3: Print warning while fixing hard links count [ Upstream commit 85ba2a75faee759809a7e43b4c103ac59bac1026 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index d6d021e19aaa..fb0466c11504 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -410,7 +410,6 @@ end_enum: goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -425,6 +424,7 @@ end_enum: if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } From e0b64e4ad2eb013fd3299e34e7fe5e19f321e140 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:42:04 +0300 Subject: [PATCH 075/299] fs/ntfs3: Reduce stack usage [ Upstream commit 865e7a7700d930d34895a70f8af2eb4e778a5b0e ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fslog.c | 214 +++++++++++++++++++++-------------------------- 1 file changed, 96 insertions(+), 118 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 98ccb6650858..7dbb000fc691 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -974,6 +974,16 @@ skip_looking: return e; } +struct restart_info { + u64 last_lsn; + struct RESTART_HDR *r_page; + u32 vbo; + bool chkdsk_was_run; + bool valid_page; + bool initialized; + bool restart; +}; + #define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001) #define NTFSLOG_WRAPPED 0x00000001 @@ -987,6 +997,7 @@ struct ntfs_log { struct ntfs_inode *ni; u32 l_size; + u32 orig_file_size; u32 sys_page_size; u32 sys_page_mask; u32 page_size; @@ -1040,6 +1051,8 @@ struct ntfs_log { struct CLIENT_ID client_id; u32 client_undo_commit; + + struct restart_info rst_info, rst_info2; }; static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn) @@ -1105,16 +1118,6 @@ static inline bool verify_client_lsn(struct ntfs_log *log, lsn <= le64_to_cpu(log->ra->current_lsn) && lsn; } -struct restart_info { - u64 last_lsn; - struct RESTART_HDR *r_page; - u32 vbo; - bool chkdsk_was_run; - bool valid_page; - bool initialized; - bool restart; -}; - static int read_log_page(struct ntfs_log *log, u32 vbo, struct RECORD_PAGE_HDR **buffer, bool *usa_error) { @@ -1176,7 +1179,7 @@ out: * restart page header. It will stop the first time we find a * valid page header. */ -static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, +static int log_read_rst(struct ntfs_log *log, bool first, struct restart_info *info) { u32 skip, vbo; @@ -1192,7 +1195,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, } /* Loop continuously until we succeed. */ - for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) { + for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) { bool usa_error; bool brst, bchk; struct RESTART_AREA *ra; @@ -1285,22 +1288,17 @@ check_result: /* * Ilog_init_pg_hdr - Init @log from restart page header. */ -static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, - u32 page_size, u16 major_ver, u16 minor_ver) +static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver) { - log->sys_page_size = sys_page_size; - log->sys_page_mask = sys_page_size - 1; - log->page_size = page_size; - log->page_mask = page_size - 1; - log->page_bits = blksize_bits(page_size); + log->sys_page_size = log->page_size; + log->sys_page_mask = log->page_mask; log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits; if (!log->clst_per_page) log->clst_per_page = 1; - log->first_page = major_ver >= 2 ? - 0x22 * page_size : - ((sys_page_size << 1) + (page_size << 1)); + log->first_page = major_ver >= 2 ? 0x22 * log->page_size : + 4 * log->page_size; log->major_ver = major_ver; log->minor_ver = minor_ver; } @@ -1308,12 +1306,11 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, /* * log_create - Init @log in cases when we don't have a restart area to use. */ -static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn, +static void log_create(struct ntfs_log *log, const u64 last_lsn, u32 open_log_count, bool wrapped, bool use_multi_page) { - log->l_size = l_size; /* All file offsets must be quadword aligned. */ - log->file_data_bits = blksize_bits(l_size) - 3; + log->file_data_bits = blksize_bits(log->l_size) - 3; log->seq_num_mask = (8 << log->file_data_bits) - 1; log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits; log->seq_num = (last_lsn >> log->file_data_bits) + 2; @@ -3720,10 +3717,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct ntfs_sb_info *sbi = ni->mi.sbi; struct ntfs_log *log; - struct restart_info rst_info, rst_info2; - u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0; + u64 rec_lsn, checkpt_lsn = 0, rlsn = 0; struct ATTR_NAME_ENTRY *attr_names = NULL; - struct ATTR_NAME_ENTRY *ane; struct RESTART_TABLE *dptbl = NULL; struct RESTART_TABLE *trtbl = NULL; const struct RESTART_TABLE *rt; @@ -3741,9 +3736,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct TRANSACTION_ENTRY *tr; struct DIR_PAGE_ENTRY *dp; u32 i, bytes_per_attr_entry; - u32 l_size = ni->vfs_inode.i_size; - u32 orig_file_size = l_size; - u32 page_size, vbo, tail, off, dlen; + u32 vbo, tail, off, dlen; u32 saved_len, rec_len, transact_id; bool use_second_page; struct RESTART_AREA *ra2, *ra = NULL; @@ -3758,52 +3751,50 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) u16 t16; u32 t32; - /* Get the size of page. NOTE: To replay we can use default page. */ -#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2 - page_size = norm_file_page(PAGE_SIZE, &l_size, true); -#else - page_size = norm_file_page(PAGE_SIZE, &l_size, false); -#endif - if (!page_size) - return -EINVAL; - log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS); if (!log) return -ENOMEM; log->ni = ni; - log->l_size = l_size; - log->one_page_buf = kmalloc(page_size, GFP_NOFS); + log->l_size = log->orig_file_size = ni->vfs_inode.i_size; + /* Get the size of page. NOTE: To replay we can use default page. */ +#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2 + log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true); +#else + log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false); +#endif + if (!log->page_size) { + err = -EINVAL; + goto out; + } + + log->one_page_buf = kmalloc(log->page_size, GFP_NOFS); if (!log->one_page_buf) { err = -ENOMEM; goto out; } - log->page_size = page_size; - log->page_mask = page_size - 1; - log->page_bits = blksize_bits(page_size); + log->page_mask = log->page_size - 1; + log->page_bits = blksize_bits(log->page_size); /* Look for a restart area on the disk. */ - memset(&rst_info, 0, sizeof(struct restart_info)); - err = log_read_rst(log, l_size, true, &rst_info); + err = log_read_rst(log, true, &log->rst_info); if (err) goto out; /* remember 'initialized' */ - *initialized = rst_info.initialized; + *initialized = log->rst_info.initialized; - if (!rst_info.restart) { - if (rst_info.initialized) { + if (!log->rst_info.restart) { + if (log->rst_info.initialized) { /* No restart area but the file is not initialized. */ err = -EINVAL; goto out; } - log_init_pg_hdr(log, page_size, page_size, 1, 1); - log_create(log, l_size, 0, get_random_u32(), false, false); - - log->ra = ra; + log_init_pg_hdr(log, 1, 1); + log_create(log, 0, get_random_u32(), false, false); ra = log_create_ra(log); if (!ra) { @@ -3820,25 +3811,26 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) * If the restart offset above wasn't zero then we won't * look for a second restart. */ - if (rst_info.vbo) + if (log->rst_info.vbo) goto check_restart_area; - memset(&rst_info2, 0, sizeof(struct restart_info)); - err = log_read_rst(log, l_size, false, &rst_info2); + err = log_read_rst(log, false, &log->rst_info2); if (err) goto out; /* Determine which restart area to use. */ - if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn) + if (!log->rst_info2.restart || + log->rst_info2.last_lsn <= log->rst_info.last_lsn) goto use_first_page; use_second_page = true; - if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) { + if (log->rst_info.chkdsk_was_run && + log->page_size != log->rst_info.vbo) { struct RECORD_PAGE_HDR *sp = NULL; bool usa_error; - if (!read_log_page(log, page_size, &sp, &usa_error) && + if (!read_log_page(log, log->page_size, &sp, &usa_error) && sp->rhdr.sign == NTFS_CHKD_SIGNATURE) { use_second_page = false; } @@ -3846,52 +3838,43 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) } if (use_second_page) { - kfree(rst_info.r_page); - memcpy(&rst_info, &rst_info2, sizeof(struct restart_info)); - rst_info2.r_page = NULL; + kfree(log->rst_info.r_page); + memcpy(&log->rst_info, &log->rst_info2, + sizeof(struct restart_info)); + log->rst_info2.r_page = NULL; } use_first_page: - kfree(rst_info2.r_page); + kfree(log->rst_info2.r_page); check_restart_area: /* * If the restart area is at offset 0, we want * to write the second restart area first. */ - log->init_ra = !!rst_info.vbo; + log->init_ra = !!log->rst_info.vbo; /* If we have a valid page then grab a pointer to the restart area. */ - ra2 = rst_info.valid_page ? - Add2Ptr(rst_info.r_page, - le16_to_cpu(rst_info.r_page->ra_off)) : + ra2 = log->rst_info.valid_page ? + Add2Ptr(log->rst_info.r_page, + le16_to_cpu(log->rst_info.r_page->ra_off)) : NULL; - if (rst_info.chkdsk_was_run || + if (log->rst_info.chkdsk_was_run || (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) { bool wrapped = false; bool use_multi_page = false; u32 open_log_count; /* Do some checks based on whether we have a valid log page. */ - if (!rst_info.valid_page) { - open_log_count = get_random_u32(); - goto init_log_instance; - } - open_log_count = le32_to_cpu(ra2->open_log_count); + open_log_count = log->rst_info.valid_page ? + le32_to_cpu(ra2->open_log_count) : + get_random_u32(); - /* - * If the restart page size isn't changing then we want to - * check how much work we need to do. - */ - if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size)) - goto init_log_instance; + log_init_pg_hdr(log, 1, 1); -init_log_instance: - log_init_pg_hdr(log, page_size, page_size, 1, 1); - - log_create(log, l_size, rst_info.last_lsn, open_log_count, - wrapped, use_multi_page); + log_create(log, log->rst_info.last_lsn, open_log_count, wrapped, + use_multi_page); ra = log_create_ra(log); if (!ra) { @@ -3916,28 +3899,27 @@ init_log_instance: * use the log file. We must use the system page size instead of the * default size if there is not a clean shutdown. */ - t32 = le32_to_cpu(rst_info.r_page->sys_page_size); - if (page_size != t32) { - l_size = orig_file_size; - page_size = - norm_file_page(t32, &l_size, t32 == DefaultLogPageSize); + t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size); + if (log->page_size != t32) { + log->l_size = log->orig_file_size; + log->page_size = norm_file_page(t32, &log->l_size, + t32 == DefaultLogPageSize); } - if (page_size != t32 || - page_size != le32_to_cpu(rst_info.r_page->page_size)) { + if (log->page_size != t32 || + log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) { err = -EINVAL; goto out; } /* If the file size has shrunk then we won't mount it. */ - if (l_size < le64_to_cpu(ra2->l_size)) { + if (log->l_size < le64_to_cpu(ra2->l_size)) { err = -EINVAL; goto out; } - log_init_pg_hdr(log, page_size, page_size, - le16_to_cpu(rst_info.r_page->major_ver), - le16_to_cpu(rst_info.r_page->minor_ver)); + log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver), + le16_to_cpu(log->rst_info.r_page->minor_ver)); log->l_size = le64_to_cpu(ra2->l_size); log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits); @@ -3945,7 +3927,7 @@ init_log_instance: log->seq_num_mask = (8 << log->file_data_bits) - 1; log->last_lsn = le64_to_cpu(ra2->current_lsn); log->seq_num = log->last_lsn >> log->file_data_bits; - log->ra_off = le16_to_cpu(rst_info.r_page->ra_off); + log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off); log->restart_size = log->sys_page_size - log->ra_off; log->record_header_len = le16_to_cpu(ra2->rec_hdr_len); log->ra_size = le16_to_cpu(ra2->ra_len); @@ -4045,7 +4027,7 @@ find_oldest: log->current_avail = current_log_avail(log); /* Remember which restart area to write first. */ - log->init_ra = rst_info.vbo; + log->init_ra = log->rst_info.vbo; process_log: /* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */ @@ -4105,7 +4087,7 @@ process_log: log->client_id.seq_num = cr->seq_num; log->client_id.client_idx = client; - err = read_rst_area(log, &rst, &ra_lsn); + err = read_rst_area(log, &rst, &checkpt_lsn); if (err) goto out; @@ -4114,9 +4096,8 @@ process_log: bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28; - checkpt_lsn = le64_to_cpu(rst->check_point_start); - if (!checkpt_lsn) - checkpt_lsn = ra_lsn; + if (rst->check_point_start) + checkpt_lsn = le64_to_cpu(rst->check_point_start); /* Allocate and Read the Transaction Table. */ if (!rst->transact_table_len) @@ -4330,23 +4311,20 @@ check_attr_table: lcb = NULL; check_attribute_names2: - if (!rst->attr_names_len) - goto trace_attribute_table; - - ane = attr_names; - if (!oatbl) - goto trace_attribute_table; - while (ane->off) { - /* TODO: Clear table on exit! */ - oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); - t16 = le16_to_cpu(ane->name_bytes); - oe->name_len = t16 / sizeof(short); - oe->ptr = ane->name; - oe->is_attr_name = 2; - ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16); + if (rst->attr_names_len && oatbl) { + struct ATTR_NAME_ENTRY *ane = attr_names; + while (ane->off) { + /* TODO: Clear table on exit! */ + oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); + t16 = le16_to_cpu(ane->name_bytes); + oe->name_len = t16 / sizeof(short); + oe->ptr = ane->name; + oe->is_attr_name = 2; + ane = Add2Ptr(ane, + sizeof(struct ATTR_NAME_ENTRY) + t16); + } } -trace_attribute_table: /* * If the checkpt_lsn is zero, then this is a freshly * formatted disk and we have no work to do. @@ -5189,7 +5167,7 @@ out: kfree(oatbl); kfree(dptbl); kfree(attr_names); - kfree(rst_info.r_page); + kfree(log->rst_info.r_page); kfree(ra); kfree(log->one_page_buf); From 289257127a18ee3fba092834e1d904b5999f3698 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:46:16 +0300 Subject: [PATCH 076/299] fs/ntfs3: Fix multithreaded stress test [ Upstream commit a8b0c9fc3a2dba07f697ef7825e04363ff12f071 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrib.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 63f70259edc0..4b78b669a3bd 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -886,7 +886,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, struct runs_tree *run = &ni->file.run; struct ntfs_sb_info *sbi; u8 cluster_bits; - struct ATTRIB *attr = NULL, *attr_b; + struct ATTRIB *attr, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; @@ -904,12 +904,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, *len = 0; up_read(&ni->file.run_lock); - if (*len) { - if (*lcn != SPARSE_LCN || !new) - return 0; /* Fast normal way without allocation. */ - else if (clen > *len) - clen = *len; - } + if (*len && (*lcn != SPARSE_LCN || !new)) + return 0; /* Fast normal way without allocation. */ /* No cluster in cache or we need to allocate cluster in hole. */ sbi = ni->mi.sbi; @@ -918,6 +914,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, ni_lock(ni); down_write(&ni->file.run_lock); + /* Repeat the code above (under write lock). */ + if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + *len = 0; + + if (*len) { + if (*lcn != SPARSE_LCN || !new) + goto out; /* normal way without allocation. */ + if (clen > *len) + clen = *len; + } + le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); if (!attr_b) { From 07b918639367e96bff70fc77e46419b0b86a792a Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:47:30 +0300 Subject: [PATCH 077/299] fs/ntfs3: Fix detected field-spanning write (size 8) of single field "le->name" [ Upstream commit d155617006ebc172a80d3eb013c4b867f9a8ada4 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 86aecbb01a92..13e96fc63dae 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -523,7 +523,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) From c28efa873a2a67a69e5e2b73ccab081d755cd3cc Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:16:49 +0300 Subject: [PATCH 078/299] fs/ntfs3: Add file_modified [ Upstream commit 4dea9cd522424d3002894c20b729c6fbfb6fc22b ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 1f7a194983c5..6e1c456c9ae7 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -632,11 +632,17 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) &ni->file.run, i_size, &ni->i_valid, true, NULL); ni_unlock(ni); + if (err) + goto out; } else if (new_size > i_size) { inode->i_size = new_size; } } + err = file_modified(file); + if (err) + goto out; + out: if (map_locked) filemap_invalidate_unlock(mapping); @@ -1040,6 +1046,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; + int err; struct ntfs_inode *ni = ntfs_i(inode); if (is_encrypted(ni)) { @@ -1067,6 +1074,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret <= 0) goto out; + err = file_modified(iocb->ki_filp); + if (err) { + ret = err; + goto out; + } + if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open(). */ ret = -EOPNOTSUPP; From 7e0aff0aab65608c385b90f27667faa4e3acc56e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:17:46 +0300 Subject: [PATCH 079/299] fs/ntfs3: Drop suid and sgid bits as a part of fpunch [ Upstream commit e50f9560b8168a625703a3e7fe1fde9fa53f0837 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 6e1c456c9ae7..5530b4cf1ee5 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -498,10 +498,14 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_punch_hole(ni, vbo, len, &frame_size); ni_unlock(ni); + if (!err) + goto ok; + if (err != E_NTFS_NOTALIGNED) goto out; /* Process not aligned punch. */ + err = 0; mask = frame_size - 1; vbo_a = (vbo + mask) & ~mask; end_a = end & ~mask; @@ -524,6 +528,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); ni_unlock(ni); + if (err) + goto out; } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { /* @@ -563,6 +569,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_insert_range(ni, vbo, len); ni_unlock(ni); + if (err) + goto out; } else { /* Check new size. */ u8 cluster_bits = sbi->cluster_bits; @@ -639,6 +647,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) } } +ok: err = file_modified(file); if (err) goto out; From f73f939792fce29af5b896be1bd99c8cb7eaf0e9 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:19:37 +0300 Subject: [PATCH 080/299] fs/ntfs3: Implement super_operations::shutdown [ Upstream commit 6c3684e703837d2116b5cf4beb37aa7145a66b60 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 18 ++++++++++++++++++ fs/ntfs3/frecord.c | 3 +++ fs/ntfs3/inode.c | 21 +++++++++++++++++++-- fs/ntfs3/namei.c | 12 ++++++++++++ fs/ntfs3/ntfs_fs.h | 9 ++++++++- fs/ntfs3/super.c | 12 ++++++++++++ fs/ntfs3/xattr.c | 3 +++ 7 files changed, 75 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5530b4cf1ee5..a5bbf7b5775a 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -260,6 +260,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) bool rw = vma->vm_flags & VM_WRITE; int err; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "mmap encrypted not supported"); return -EOPNOTSUPP; @@ -677,6 +680,9 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode = inode->i_mode; int err; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + err = setattr_prepare(idmap, dentry, attr); if (err) goto out; @@ -732,6 +738,9 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = file->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -766,6 +775,9 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -1058,6 +1070,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) int err; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -1118,6 +1133,9 @@ int ntfs_file_open(struct inode *inode, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (unlikely((is_compressed(ni) || is_encrypted(ni)) && (file->f_flags & O_DIRECT))) { return -EOPNOTSUPP; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index dad976a68985..86b43051f39c 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3259,6 +3259,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (is_bad_inode(inode) || sb_rdonly(sb)) return 0; + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + if (!ni_trylock(ni)) { /* 'ni' is under modification, skip for now. */ mark_inode_dirty_sync(inode); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index fb0466c11504..012dbc8ac5ec 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -851,9 +851,13 @@ static int ntfs_resident_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; - struct ntfs_inode *ni = ntfs_i(mapping->host); + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); int ret; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + ni_lock(ni); ret = attr_data_write_resident(ni, &folio->page); ni_unlock(ni); @@ -867,7 +871,12 @@ static int ntfs_resident_writepage(struct folio *folio, static int ntfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - if (is_resident(ntfs_i(mapping->host))) + struct inode *inode = mapping->host; + + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + + if (is_resident(ntfs_i(inode))) return write_cache_pages(mapping, wbc, ntfs_resident_writepage, mapping); return mpage_writepages(mapping, wbc, ntfs_get_block); @@ -887,6 +896,9 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + *pagep = NULL; if (is_resident(ni)) { struct page *page = @@ -1303,6 +1315,11 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, goto out1; } + if (unlikely(ntfs3_forced_shutdown(sb))) { + err = -EIO; + goto out2; + } + /* Mark rw ntfs as dirty. it will be cleared at umount. */ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index eedacf94edd8..b5687d74b449 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -181,6 +181,9 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + ni_lock_dir(ni); err = ntfs_unlink_inode(dir, dentry); @@ -199,6 +202,9 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir, u32 size = strlen(symname); struct inode *inode; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0, symname, size, NULL); @@ -227,6 +233,9 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + ni_lock_dir(ni); err = ntfs_unlink_inode(dir, dentry); @@ -264,6 +273,9 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, 1024); static_assert(PATH_MAX >= 4 * 1024); + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + if (flags & ~RENAME_NOREPLACE) return -EINVAL; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 29a9b0b29e4f..466ee5bd6226 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -61,6 +61,8 @@ enum utf16_endian; /* sbi->flags */ #define NTFS_FLAGS_NODISCARD 0x00000001 +/* ntfs in shutdown state. */ +#define NTFS_FLAGS_SHUTDOWN 0x00000002 /* Set when LogFile is replaying. */ #define NTFS_FLAGS_LOG_REPLAYING 0x00000008 /* Set when we changed first MFT's which copy must be updated in $MftMirr. */ @@ -226,7 +228,7 @@ struct ntfs_sb_info { u64 maxbytes; // Maximum size for normal files. u64 maxbytes_sparse; // Maximum size for sparse file. - u32 flags; // See NTFS_FLAGS_XXX. + unsigned long flags; // See NTFS_FLAGS_ CLST zone_max; // Maximum MFT zone length in clusters CLST bad_clusters; // The count of marked bad clusters. @@ -999,6 +1001,11 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb) return sb->s_fs_info; } +static inline bool ntfs3_forced_shutdown(struct super_block *sb) +{ + return test_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); +} + /* * ntfs_up_cluster - Align up on cluster boundary. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index aa3b2c262ab6..a313b4ddee16 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -714,6 +714,14 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +/* + * ntfs_shutdown - super_operations::shutdown + */ +static void ntfs_shutdown(struct super_block *sb) +{ + set_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); +} + /* * ntfs_sync_fs - super_operations::sync_fs */ @@ -724,6 +732,9 @@ static int ntfs_sync_fs(struct super_block *sb, int wait) struct ntfs_inode *ni; struct inode *inode; + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + ni = sbi->security.ni; if (ni) { inode = &ni->vfs_inode; @@ -763,6 +774,7 @@ static const struct super_operations ntfs_sops = { .put_super = ntfs_put_super, .statfs = ntfs_statfs, .show_options = ntfs_show_options, + .shutdown = ntfs_shutdown, .sync_fs = ntfs_sync_fs, .write_inode = ntfs3_write_inode, }; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 4920548192a0..838a79157fb9 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -744,6 +744,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, int err; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + /* Dispatch request. */ if (!strcmp(name, SYSTEM_DOS_ATTRIB)) { /* system.dos_attrib */ From 323b0ab3f235f043c1be616ad495b57169bb4b18 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:21:12 +0300 Subject: [PATCH 081/299] fs/ntfs3: ntfs3_forced_shutdown use int instead of bool [ Upstream commit 97ec56d390a3a0077b36cb38627f671c72dddce6 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 3 ++- fs/ntfs3/ntfs_fs.h | 6 +++--- fs/ntfs3/super.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index fbfe21dbb425..350461d8cece 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -853,7 +853,8 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) /* * sb can be NULL here. In this case sbi->flags should be 0 too. */ - if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR)) + if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) || + unlikely(ntfs3_forced_shutdown(sb))) return; blocksize = sb->s_blocksize; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 466ee5bd6226..c71a1eca427a 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -62,7 +62,7 @@ enum utf16_endian; /* sbi->flags */ #define NTFS_FLAGS_NODISCARD 0x00000001 /* ntfs in shutdown state. */ -#define NTFS_FLAGS_SHUTDOWN 0x00000002 +#define NTFS_FLAGS_SHUTDOWN_BIT 0x00000002 /* == 4*/ /* Set when LogFile is replaying. */ #define NTFS_FLAGS_LOG_REPLAYING 0x00000008 /* Set when we changed first MFT's which copy must be updated in $MftMirr. */ @@ -1001,9 +1001,9 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb) return sb->s_fs_info; } -static inline bool ntfs3_forced_shutdown(struct super_block *sb) +static inline int ntfs3_forced_shutdown(struct super_block *sb) { - return test_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); + return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } /* diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index a313b4ddee16..378e261e23b0 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -719,7 +719,7 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) */ static void ntfs_shutdown(struct super_block *sb) { - set_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); + set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } /* From 947c3f3d31ea185ddc8e7f198873f17d36deb24c Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:09:34 +0300 Subject: [PATCH 082/299] fs/ntfs3: Add NULL ptr dereference checking at the end of attr_allocate_frame() [ Upstream commit aaab47f204aaf47838241d57bf8662c8840de60a ] It is preferable to exit through the out: label because internal debugging functions are located there. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrib.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 4b78b669a3bd..646e2dad1b75 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1743,8 +1743,10 @@ repack: le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1825,13 +1827,15 @@ ins_ext: ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; From adcc0ab3e79f83f6e5aa5b38ee2224c791509aac Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 21 Dec 2023 13:59:43 +0300 Subject: [PATCH 083/299] fs/ntfs3: Disable ATTR_LIST_ENTRY size check [ Upstream commit 4cdfb6e7bc9c80142d33bf1d4653a73fa678ba56 ] The use of sizeof(struct ATTR_LIST_ENTRY) has been replaced with le_size(0) due to alignment peculiarities on different platforms. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312071005.g6YrbaIe-lkp@intel.com/ Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrlist.c | 8 ++++---- fs/ntfs3/ntfs.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 7c01735d1219..48e7da47c6b7 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 13e96fc63dae..f61f5b3adb03 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -527,8 +527,6 @@ struct ATTR_LIST_ENTRY { }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + From 86cd46312a30ae5ee61ffc68ec56a96c096d8335 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 16 Jan 2024 10:32:20 +0300 Subject: [PATCH 084/299] fs/ntfs3: Use kvfree to free memory allocated by kvmalloc [ Upstream commit ddb17dc880eeaac37b5a6e984de07b882de7d78d ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrlist.c | 4 ++-- fs/ntfs3/bitmap.c | 4 ++-- fs/ntfs3/frecord.c | 4 ++-- fs/ntfs3/super.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 48e7da47c6b7..9f4bd8d26090 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -29,7 +29,7 @@ static inline bool al_is_valid_le(const struct ntfs_inode *ni, void al_destroy(struct ntfs_inode *ni) { run_close(&ni->attr_list.run); - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.size = 0; ni->attr_list.dirty = false; @@ -318,7 +318,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, memcpy(ptr, al->le, off); memcpy(Add2Ptr(ptr, off + sz), le, old_size - off); le = Add2Ptr(ptr, off); - kfree(al->le); + kvfree(al->le); al->le = ptr; } else { memmove(Add2Ptr(le, sz), le, old_size - off); diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 63f14a0232f6..845f9b22deef 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -124,7 +124,7 @@ void wnd_close(struct wnd_bitmap *wnd) { struct rb_node *node, *next; - kfree(wnd->free_bits); + kvfree(wnd->free_bits); wnd->free_bits = NULL; run_close(&wnd->run); @@ -1360,7 +1360,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); - kfree(wnd->free_bits); + kvfree(wnd->free_bits); wnd->free_bits = new_free; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 86b43051f39c..a918d7283aca 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -778,7 +778,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) run_deallocate(sbi, &ni->attr_list.run, true); run_close(&ni->attr_list.run); ni->attr_list.size = 0; - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.dirty = false; @@ -927,7 +927,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) return 0; out: - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.size = 0; return err; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 378e261e23b0..eb5060229740 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -625,7 +625,7 @@ static void ntfs3_free_sbi(struct ntfs_sb_info *sbi) { kfree(sbi->new_rec); kvfree(ntfs_put_shared(sbi->upcase)); - kfree(sbi->def_table); + kvfree(sbi->def_table); kfree(sbi->compress.lznt); #ifdef CONFIG_NTFS3_LZX_XPRESS xpress_free_decompressor(sbi->compress.xpress); From 8525c77e2f7f245468defd83b718ba6ab82d0cd3 Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Tue, 26 Dec 2023 16:51:41 +0800 Subject: [PATCH 085/299] fs/ntfs3: use non-movable memory for ntfs3 MFT buffer cache [ Upstream commit d6d33f03baa43d763fe094ca926eeae7d3421d07 ] Since the buffer cache for ntfs3 metadata is not released until the file system is unmounted, allocating from the movable zone may result in cma allocation failures. This is due to the page still being used by ntfs3, leading to migration failures. To address this, this commit use sb_bread_umovable() instead of sb_bread(). This change prevents allocation from the movable zone, ensuring compatibility with scenarios where the buffer head is not released until unmount. This patch is inspired by commit a8ac900b8163("ext4: use non-movable memory for the ext4 superblock"). The issue is found when playing video files stored in NTFS on the Android TV platform. During this process, the media parser reads the video file, causing ntfs3 to allocate buffer cache from the CMA area. Subsequently, the hardware decoder attempts to allocate memory from the same CMA area. However, the page is still in use by ntfs3, resulting in a migrate failure in alloc_contig_range(). The pinned page and allocating stacktrace reported by page owner shows below: page:ffffffff00b68880 refcount:3 mapcount:0 mapping:ffffff80046aa828 index:0xc0040 pfn:0x20fa4 aops:def_blk_aops ino:0 flags: 0x2020(active|private) page dumped because: migration failure page last allocated via order 0, migratetype Movable, gfp_mask 0x108c48 (GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE), page_owner tracks the page as allocated prep_new_page get_page_from_freelist __alloc_pages_nodemask pagecache_get_page __getblk_gfp __bread_gfp ntfs_read_run_nb ntfs_read_bh mi_read ntfs_iget5 dir_search_u ntfs_lookup __lookup_slow lookup_slow walk_component path_lookupat Signed-off-by: Ism Hong Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index c71a1eca427a..98906e467517 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1035,7 +1035,7 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) static inline struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { - struct buffer_head *bh = sb_bread(sb, block); + struct buffer_head *bh = sb_bread_unmovable(sb, block); if (bh) return bh; From c55deec3ffddf94a0e47761ea0db9e60aea2ade1 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:03:21 +0300 Subject: [PATCH 086/299] fs/ntfs3: Prevent generic message "attempt to access beyond end of device" [ Upstream commit 5ca87d01eba7bdfe9536a157ca33c1455bb8d16c ] It used in test environment. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fsntfs.c | 24 ++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 14 +------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 350461d8cece..c66b0eab6a16 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1007,6 +1007,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 98906e467517..1ca40c1d966b 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -586,6 +586,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1032,19 +1033,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread_unmovable(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); From f4cf29c6772ea9024dbfe07989fbf5b90b28d510 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:12:38 +0300 Subject: [PATCH 087/299] fs/ntfs3: Use i_size_read and i_size_write [ Upstream commit 4fd6c08a16d7f1ba10212c9ef7bc73218144b463 ] Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/attrib.c | 4 ++-- fs/ntfs3/dir.c | 2 +- fs/ntfs3/file.c | 11 ++++++----- fs/ntfs3/frecord.c | 10 +++++----- fs/ntfs3/index.c | 8 ++++---- fs/ntfs3/inode.c | 2 +- 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 646e2dad1b75..7aadf5010999 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -2084,7 +2084,7 @@ next_attr: /* Update inode size. */ ni->i_valid = valid_size; - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, total_size); ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mark_inode_dirty(&ni->vfs_inode); @@ -2499,7 +2499,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) mi_b->dirty = true; done: - ni->vfs_inode.i_size += bytes; + i_size_write(&ni->vfs_inode, ni->vfs_inode.i_size + bytes); ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mark_inode_dirty(&ni->vfs_inode); diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 726122ecd39b..2c73ca469d51 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -517,7 +517,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, u32 e_size, off, end; size_t drs = 0, fles = 0, bit = 0; struct indx_node *node = NULL; - size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; + size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits; if (is_empty) *is_empty = true; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index a5bbf7b5775a..0f6a78aef90f 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -646,7 +646,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (err) goto out; } else if (new_size > i_size) { - inode->i_size = new_size; + i_size_write(inode, new_size); } } @@ -696,7 +696,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out; } inode_dio_wait(inode); - oldsize = inode->i_size; + oldsize = i_size_read(inode); newsize = attr->ia_size; if (newsize <= oldsize) @@ -708,7 +708,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out; ni->ni_flags |= NI_FLAG_UPDATE_PARENT; - inode->i_size = newsize; + i_size_write(inode, newsize); } setattr_copy(idmap, inode, attr); @@ -847,7 +847,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); - loff_t i_size = inode->i_size; + loff_t i_size = i_size_read(inode); struct address_space *mapping = inode->i_mapping; struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; @@ -1177,7 +1177,8 @@ static int ntfs_file_release(struct inode *inode, struct file *file) down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - inode->i_size, &ni->i_valid, false, NULL); + i_size_read(inode), &ni->i_valid, false, + NULL); up_write(&ni->file.run_lock); ni_unlock(ni); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index a918d7283aca..61c51650266e 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2099,7 +2099,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) gfp_t gfp_mask; struct page *pg; - if (vbo >= ni->vfs_inode.i_size) { + if (vbo >= i_size_read(&ni->vfs_inode)) { SetPageUptodate(page); err = 0; goto out; @@ -2173,7 +2173,7 @@ int ni_decompress_file(struct ntfs_inode *ni) { struct ntfs_sb_info *sbi = ni->mi.sbi; struct inode *inode = &ni->vfs_inode; - loff_t i_size = inode->i_size; + loff_t i_size = i_size_read(inode); struct address_space *mapping = inode->i_mapping; gfp_t gfp_mask = mapping_gfp_mask(mapping); struct page **pages = NULL; @@ -2457,6 +2457,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct ATTR_LIST_ENTRY *le = NULL; struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; + loff_t i_size = i_size_read(&ni->vfs_inode); u64 vbo_disk; size_t unc_size; u32 frame_size, i, npages_disk, ondisk_size; @@ -2548,7 +2549,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, } } - frames = (ni->vfs_inode.i_size - 1) >> frame_bits; + frames = (i_size - 1) >> frame_bits; err = attr_wof_frame_info(ni, attr, run, frame64, frames, frame_bits, &ondisk_size, &vbo_data); @@ -2556,8 +2557,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, goto out2; if (frame64 == frames) { - unc_size = 1 + ((ni->vfs_inode.i_size - 1) & - (frame_size - 1)); + unc_size = 1 + ((i_size - 1) & (frame_size - 1)); ondisk_size = attr_size(attr) - vbo_data; } else { unc_size = frame_size; diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index cf92b2433f7a..daabaad63aaf 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1462,7 +1462,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out2; if (in->name == I30_NAME) { - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, alloc_size); } @@ -1544,7 +1544,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, } if (in->name == I30_NAME) - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); *vbn = bit << indx->idx2vbn_bits; @@ -2090,7 +2090,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, return err; if (in->name == I30_NAME) - ni->vfs_inode.i_size = new_data; + i_size_write(&ni->vfs_inode, new_data); bpb = bitmap_size(bit); if (bpb * 8 == nbits) @@ -2576,7 +2576,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, 0, NULL, false, NULL); if (in->name == I30_NAME) - ni->vfs_inode.i_size = 0; + i_size_write(&ni->vfs_inode, 0); err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, false, NULL); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 012dbc8ac5ec..34f2e16f3f5b 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -983,7 +983,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, } if (pos + err > inode->i_size) { - inode->i_size = pos + err; + i_size_write(inode, pos + err); dirty = true; } From eac2e00f3c315d5979b23431cd4e3be4d913cd41 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:13:59 +0300 Subject: [PATCH 088/299] fs/ntfs3: Correct function is_rst_area_valid [ Upstream commit 1b7dd28e14c4728ae1a815605ca33ffb4ce1b309 ] Reported-by: Robert Morris Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/fslog.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 7dbb000fc691..855519713bf7 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) From 8c77398c72618101d66480b94b34fe9087ee3d08 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:14:31 +0300 Subject: [PATCH 089/299] fs/ntfs3: Fixed overflow check in mi_enum_attr() [ Upstream commit 652cfeb43d6b9aba5c7c4902bed7a7340df131fb ] Reported-by: Robert Morris Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 7b6423584eae..6aa3a9d44df1 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -279,7 +279,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (t16 > asize) return NULL; - if (t16 + le32_to_cpu(attr->res.data_size) > asize) + if (le32_to_cpu(attr->res.data_size) > asize - t16) return NULL; t32 = sizeof(short) * attr->name_len; From ded8bf5b36878c291c67f7e5fbf033fad78eb881 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 29 Jan 2024 10:30:09 +0300 Subject: [PATCH 090/299] fs/ntfs3: Update inode->i_size after success write into compressed file [ Upstream commit d68968440b1a75dee05cfac7f368f1aa139e1911 ] Reported-by: Giovanni Santini Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 0f6a78aef90f..dfd5402a42e4 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1054,6 +1054,8 @@ out: iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } From 52fff5799e3d1b5803ecd2f5f19c13c65f4f7b23 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sat, 30 Dec 2023 17:00:03 +0800 Subject: [PATCH 091/299] fs/ntfs3: Fix oob in ntfs_listxattr [ Upstream commit 731ab1f9828800df871c5a7ab9ffe965317d3f15 ] The length of name cannot exceed the space occupied by ea. Reported-and-tested-by: syzbot+65e940cfb8f99a97aca7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 838a79157fb9..b50010494e6d 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -219,6 +219,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) From c7a4f932b3a602e94cf4cb4c42b9b1389e548d1d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:53:55 +0100 Subject: [PATCH 092/299] wifi: mac80211: set station RX-NSS on reconfig [ Upstream commit dd6c064cfc3fc18d871107c6f5db8837e88572e4 ] When a station is added/reconfigured by userspace, e.g. a TDLS peer or a SoftAP client STA, rx_nss is currently not always set, so that it might be left zero. Set it up properly. Link: https://msgid.link/20240129155354.98f148a3d654.I193a02155f557ea54dc9d0232da66cf96734119a@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f7cb50b0dd4e..daf5212e283d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1887,6 +1887,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } From 415f8e9854bb0e0582de83257b56ceac6014282b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:23 +0100 Subject: [PATCH 093/299] wifi: mac80211: adding missing drv_mgd_complete_tx() call [ Upstream commit c042600c17d8c490279f0ae2baee29475fe8047d ] There's a call to drv_mgd_prepare_tx() and so there should be one to drv_mgd_complete_tx(), but on this path it's not. Add it. Link: https://msgid.link/20240131164824.2f0922a514e1.I5aac89b93bcead88c374187d70cad0599d29d2c8@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d9e716f38b0e..c6044ab4e7fc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7800,6 +7800,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } From c9da889a37fc913e1a4f762ccc361920e88fe888 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:09:07 +0100 Subject: [PATCH 094/299] wifi: mac80211: accept broadcast probe responses on 6 GHz [ Upstream commit 62a6183c13319e4d2227473a04abd104c4f56dcf ] On the 6 GHz band, probe responses are sent as broadcast to optimise medium usage. However, without OCE configuration we weren't accepting them, which is wrong, even if wpa_s is by default enabling OCE. Accept them without the OCE config as well. Link: https://msgid.link/20240129200907.5a89c2821897.I92e9dfa0f9b350bc7f37dd4bb38031d156d78d8a@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/scan.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 68ec2124c3db..a52813f2b08c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -222,14 +222,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local, } static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, u32 scan_flags, const u8 *da) { if (!sdata) return false; - /* accept broadcast for OCE */ - if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && - is_broadcast_ether_addr(da)) + + /* accept broadcast on 6 GHz and for OCE */ + if (is_broadcast_ether_addr(da) && + (channel->band == NL80211_BAND_6GHZ || + scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP)) return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) return true; return ether_addr_equal(da, sdata->vif.addr); @@ -278,6 +282,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } + channel = ieee80211_get_channel_khz(local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -295,19 +305,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) /* ignore ProbeResp to foreign address or non-bcast (OCE) * unless scanning with randomised address */ - if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + if (!ieee80211_scan_accept_presp(sdata1, channel, + scan_req_flags, mgmt->da) && - !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + !ieee80211_scan_accept_presp(sdata2, channel, + sched_scan_req_flags, mgmt->da)) return; } - channel = ieee80211_get_channel_khz(local->hw.wiphy, - ieee80211_rx_status_to_khz(rx_status)); - - if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) - return; - bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, channel); From 3dce50ca9939caef0162ecc5d95713963ea78b91 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 1 Feb 2024 16:17:30 +0200 Subject: [PATCH 095/299] wifi: iwlwifi: do not announce EPCS support [ Upstream commit a23c0af103e184bb1252dddddda040f6641bea7b ] mac80211 does not have proper support for EPCS currently as that would require changing the ECDA parameters if EPCS (Emergency Preparedness Communications Service) is in use. As such, do not announce support for it in the capabilities. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240201155157.59d71656addc.Idde91b3018239c49fc6ed231b411d05354fb9fb1@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index e3120ab893f4..878d9416a108 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -668,7 +668,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2, @@ -792,7 +791,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2, @@ -1003,8 +1001,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, if (CSR_HW_REV_TYPE(trans->hw_rev) == IWL_CFG_MAC_TYPE_GL && iftype_data->eht_cap.has_eht) { iftype_data->eht_cap.eht_cap_elem.mac_cap_info[0] &= - ~(IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | - IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | + ~(IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2); iftype_data->eht_cap.eht_cap_elem.phy_cap_info[3] &= ~(IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO | From cf3d6813601fe496de7f023435e31bfffa74ae70 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:03 -0800 Subject: [PATCH 096/299] efi: runtime: Fix potential overflow of soft-reserved region size [ Upstream commit de1034b38a346ef6be25fe8792f5d1e0684d5ff4 ] md_size will have been narrowed if we have >= 4GB worth of pages in a soft-reserved region. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/riscv-runtime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 83f5bb57fa4c..83092d93f36a 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index 09525fb5c240..01f0f90ea418 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) From 5c7ed4d957a8d3ffec226e98232cfc6fb19488ce Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:04 -0800 Subject: [PATCH 097/299] efi: Don't add memblocks for soft-reserved memory [ Upstream commit 0bcff59ef7a652fcdc6d535554b63278c2406c8f ] Adding memblocks for soft-reserved regions prevents them from later being hotplugged in by dax_kmem. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi-init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index ef0820f1a924..59b0d7197b68 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -134,15 +134,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -187,6 +178,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) From b9c3a26bf95c3d65709cbdc2d5a77ef6e1135470 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:36 +0800 Subject: [PATCH 098/299] hwmon: (coretemp) Enlarge per package core count limit [ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ] Currently, coretemp driver supports only 128 cores per package. This loses some core temperature information on systems that have more than 128 cores per package. [ 58.685033] coretemp coretemp.0: Adding Core 128 failed [ 58.692009] coretemp coretemp.0: Adding Core 129 failed ... Enlarge the limitation to 512 because there are platforms with more than 256 cores per package. Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 95f4c0b00b2d..b8fc8d1ef20d 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -41,7 +41,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) From 51a5ca984866b83a2724ed8c836b246bf72d931a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 20 Dec 2023 17:26:58 +0100 Subject: [PATCH 099/299] scsi: lpfc: Use unsigned type for num_sge [ Upstream commit d6c1b19153f92e95e5e1801d540e98771053afae ] LUNs going into "failed ready running" state observed on >1T and on even numbers of size (2T, 4T, 6T, 8T and 10T). The issue occurs when DIF is enabled at the host. The kernel logs: Cannot setup S/G List for HBAIO segs 1/1 SGL 512 SCSI 256: 3 0 The host lpfc driver is failing to setup scatter/gather list (protection data) for the I/Os. The return type lpfc_bg_setup_sgl()/lpfc_bg_setup_sgl_prot() causes the compiler to remove the most significant bit. Use an unsigned type instead. Signed-off-by: Hannes Reinecke [dwagner: added commit message] Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20231220162658.12392-1-dwagner@suse.de Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index d26941b131fd..bf879d81846b 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1918,7 +1918,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1926,8 +1926,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2099,7 +2099,7 @@ out: * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2123,8 +2123,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); From 7ac9e18f5d66087cd22751c5c5bf0090eb0038fe Mon Sep 17 00:00:00 2001 From: Alice Chao Date: Mon, 5 Feb 2024 18:49:04 +0800 Subject: [PATCH 100/299] scsi: ufs: core: Fix shift issue in ufshcd_clear_cmd() [ Upstream commit b513d30d59bb383a6a5d6b533afcab2cee99a8f8 ] When task_tag >= 32 (in MCQ mode) and sizeof(unsigned int) == 4, 1U << task_tag will out of bounds for a u32 mask. Fix this up to prevent SHIFT_ISSUE (bitwise shifts that are out of bounds for their data type). [name:debug_monitors&]Unexpected kernel BRK exception at EL1 [name:traps&]Internal error: BRK handler: 00000000f2005514 [#1] PREEMPT SMP [name:mediatek_cpufreq_hw&]cpufreq stop DVFS log done [name:mrdump&]Kernel Offset: 0x1ba5800000 from 0xffffffc008000000 [name:mrdump&]PHYS_OFFSET: 0x80000000 [name:mrdump&]pstate: 22400005 (nzCv daif +PAN -UAO) [name:mrdump&]pc : [0xffffffdbaf52bb2c] ufshcd_clear_cmd+0x280/0x288 [name:mrdump&]lr : [0xffffffdbaf52a774] ufshcd_wait_for_dev_cmd+0x3e4/0x82c [name:mrdump&]sp : ffffffc0081471b0 Workqueue: ufs_eh_wq_0 ufshcd_err_handler Call trace: dump_backtrace+0xf8/0x144 show_stack+0x18/0x24 dump_stack_lvl+0x78/0x9c dump_stack+0x18/0x44 mrdump_common_die+0x254/0x480 [mrdump] ipanic_die+0x20/0x30 [mrdump] notify_die+0x15c/0x204 die+0x10c/0x5f8 arm64_notify_die+0x74/0x13c do_debug_exception+0x164/0x26c el1_dbg+0x64/0x80 el1h_64_sync_handler+0x3c/0x90 el1h_64_sync+0x68/0x6c ufshcd_clear_cmd+0x280/0x288 ufshcd_wait_for_dev_cmd+0x3e4/0x82c ufshcd_exec_dev_cmd+0x5bc/0x9ac ufshcd_verify_dev_init+0x84/0x1c8 ufshcd_probe_hba+0x724/0x1ce0 ufshcd_host_reset_and_restore+0x260/0x574 ufshcd_reset_and_restore+0x138/0xbd0 ufshcd_err_handler+0x1218/0x2f28 process_one_work+0x5fc/0x1140 worker_thread+0x7d8/0xe20 kthread+0x25c/0x468 ret_from_fork+0x10/0x20 Signed-off-by: Alice Chao Link: https://lore.kernel.org/r/20240205104905.24929-1-alice.chao@mediatek.com Reviewed-by: Stanley Jhu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 44e0437bd19d..f6c83dcff8a8 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2949,7 +2949,7 @@ bool ufshcd_cmd_inflight(struct scsi_cmnd *cmd) */ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag) { - u32 mask = 1U << task_tag; + u32 mask; unsigned long flags; int err; @@ -2967,6 +2967,8 @@ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag) return 0; } + mask = 1U << task_tag; + /* clear outstanding transaction before retry */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_utrl_clear(hba, mask); From 4dbbd8195a68365408f78293c1fe62f2f03fd4d0 Mon Sep 17 00:00:00 2001 From: SEO HOYOUNG Date: Mon, 22 Jan 2024 17:33:24 +0900 Subject: [PATCH 101/299] scsi: ufs: core: Remove the ufshcd_release() in ufshcd_err_handling_prepare() [ Upstream commit 17e94b2585417e04dabc2f13bc03b4665ae687f3 ] If ufshcd_err_handler() is called in a suspend/resume situation, ufs_release() can be called twice and active_reqs end up going negative. This is because ufshcd_err_handling_prepare() and ufshcd_err_handling_unprepare() both call ufshcd_release(). Remove superfluous call to ufshcd_release(). Signed-off-by: SEO HOYOUNG Link: https://lore.kernel.org/r/20240122083324.11797-1-hy50.seo@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f6c83dcff8a8..ee9119b708f0 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6253,7 +6253,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { From a0a48dd597f5e0ffc53f532dc6a7f63368760308 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 102/299] LoongArch: Select ARCH_ENABLE_THP_MIGRATION instead of redefining it [ Upstream commit b3ff2d9c3a9c64cd0a011cdd407ffc38a6ea8788 ] ARCH_ENABLE_THP_MIGRATION is supposed to be selected by arch Kconfig. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e14396a2ddcb..f29a0f2a4f18 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -11,6 +11,7 @@ config LOONGARCH select ARCH_DISABLE_KASAN_INLINE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE @@ -638,10 +639,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG From 3ed93e781a488796e09c317368320e98bade5072 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 103/299] LoongArch: Select HAVE_ARCH_SECCOMP to use the common SECCOMP menu [ Upstream commit 6b79ecd084c99b31c8b4d0beda08893716d5558e ] LoongArch missed the refactoring made by commit 282a181b1a0d ("seccomp: Move config option SECCOMP to arch/Kconfig") because LoongArch was not mainlined at that time. The 'depends on PROC_FS' statement is stale as described in that commit. Select HAVE_ARCH_SECCOMP, and remove the duplicated config entry. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/Kconfig | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index f29a0f2a4f18..9fd8644a9a4c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -98,6 +98,7 @@ config LOONGARCH select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB if PERF_EVENTS select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -604,23 +605,6 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL From 88e189bd16e5889e44a41b3309558ebab78b9280 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 104/299] LoongArch: Change acpi_core_pic[NR_CPUS] to acpi_core_pic[MAX_CORE_PIC] [ Upstream commit 4551b30525cf3d2f026b92401ffe241eb04dfebe ] With default config, the value of NR_CPUS is 64. When HW platform has more then 64 cpus, system will crash on these platforms. MAX_CORE_PIC is the maximum cpu number in MADT table (max physical number) which can exceed the supported maximum cpu number (NR_CPUS, max logical number), but kernel should not crash. Kernel should boot cpus with NR_CPUS, let the remainder cpus stay in BIOS. The potential crash reason is that the array acpi_core_pic[NR_CPUS] can be overflowed when parsing MADT table, and it is obvious that CORE_PIC should be corresponding to physical core rather than logical core, so it is better to define the array as acpi_core_pic[MAX_CORE_PIC]. With the patch, system can boot up 64 vcpus with qemu parameter -smp 128, otherwise system will crash with the following message. [ 0.000000] CPU 0 Unable to handle kernel paging request at virtual address 0000420000004259, era == 90000000037a5f0c, ra == 90000000037a46ec [ 0.000000] Oops[#1]: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.8.0-rc2+ #192 [ 0.000000] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 0.000000] pc 90000000037a5f0c ra 90000000037a46ec tp 9000000003c90000 sp 9000000003c93d60 [ 0.000000] a0 0000000000000019 a1 9000000003d93bc0 a2 0000000000000000 a3 9000000003c93bd8 [ 0.000000] a4 9000000003c93a74 a5 9000000083c93a67 a6 9000000003c938f0 a7 0000000000000005 [ 0.000000] t0 0000420000004201 t1 0000000000000000 t2 0000000000000001 t3 0000000000000001 [ 0.000000] t4 0000000000000003 t5 0000000000000000 t6 0000000000000030 t7 0000000000000063 [ 0.000000] t8 0000000000000014 u0 ffffffffffffffff s9 0000000000000000 s0 9000000003caee98 [ 0.000000] s1 90000000041b0480 s2 9000000003c93da0 s3 9000000003c93d98 s4 9000000003c93d90 [ 0.000000] s5 9000000003caa000 s6 000000000a7fd000 s7 000000000f556b60 s8 000000000e0a4330 [ 0.000000] ra: 90000000037a46ec platform_init+0x214/0x250 [ 0.000000] ERA: 90000000037a5f0c efi_runtime_init+0x30/0x94 [ 0.000000] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 0.000000] PRMD: 00000000 (PPLV0 -PIE -PWE) [ 0.000000] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 0.000000] ECFG: 00070800 (LIE=11 VS=7) [ 0.000000] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 0.000000] BADV: 0000420000004259 [ 0.000000] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 0.000000] Modules linked in: [ 0.000000] Process swapper (pid: 0, threadinfo=(____ptrval____), task=(____ptrval____)) [ 0.000000] Stack : 9000000003c93a14 9000000003800898 90000000041844f8 90000000037a46ec [ 0.000000] 000000000a7fd000 0000000008290000 0000000000000000 0000000000000000 [ 0.000000] 0000000000000000 0000000000000000 00000000019d8000 000000000f556b60 [ 0.000000] 000000000a7fd000 000000000f556b08 9000000003ca7700 9000000003800000 [ 0.000000] 9000000003c93e50 9000000003800898 9000000003800108 90000000037a484c [ 0.000000] 000000000e0a4330 000000000f556b60 000000000a7fd000 000000000f556b08 [ 0.000000] 9000000003ca7700 9000000004184000 0000000000200000 000000000e02b018 [ 0.000000] 000000000a7fd000 90000000037a0790 9000000003800108 0000000000000000 [ 0.000000] 0000000000000000 000000000e0a4330 000000000f556b60 000000000a7fd000 [ 0.000000] 000000000f556b08 000000000eaae298 000000000eaa5040 0000000000200000 [ 0.000000] ... [ 0.000000] Call Trace: [ 0.000000] [<90000000037a5f0c>] efi_runtime_init+0x30/0x94 [ 0.000000] [<90000000037a46ec>] platform_init+0x214/0x250 [ 0.000000] [<90000000037a484c>] setup_arch+0x124/0x45c [ 0.000000] [<90000000037a0790>] start_kernel+0x90/0x670 [ 0.000000] [<900000000378b0d8>] kernel_entry+0xd8/0xdc Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/include/asm/acpi.h | 4 +++- arch/loongarch/kernel/acpi.c | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 8de6c4b83a61..49e29b29996f 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -32,8 +32,10 @@ static inline bool acpi_has_cpu_in_madt(void) return true; } +#define MAX_CORE_PIC 256 + extern struct list_head acpi_wakeup_device_list; -extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; extern int __init parse_acpi_topology(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 8e00a754e548..55d6a48c76a8 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -29,11 +29,9 @@ int disabled_cpus; u64 acpi_saved_sp; -#define MAX_CORE_PIC 256 - #define PREFIX "ACPI: " -struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size) { From d382f733c8ba207a9d2c1fe130a828e376222466 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 105/299] LoongArch: vDSO: Disable UBSAN instrumentation [ Upstream commit cca5efe77a6a2d02b3da4960f799fa233e460ab1 ] The vDSO executes in userspace, so the kernel's UBSAN should not instrument it. Solves these kind of build errors: loongarch64-linux-ld: arch/loongarch/vdso/vgettimeofday.o: in function `vdso_shift_ns': lib/vdso/gettimeofday.c:23:(.text+0x3f8): undefined reference to `__ubsan_handle_shift_out_of_bounds' Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401310530.lZHCj1Zl-lkp@intel.com/ Cc: Huacai Chen Cc: WANG Xuerui Cc: Vincenzo Frascino Cc: Nathan Chancellor Cc: Masahiro Yamada Cc: Fangrui Song Cc: loongarch@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 5c97d1463328..4305d99b3313 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -2,6 +2,7 @@ # Objects to go into the VDSO. KASAN_SANITIZE := n +UBSAN_SANITIZE := n KCOV_INSTRUMENT := n # Include the generic Makefile to check the built vdso. From e11aa132160c02f45e90affe866d7b79cba8cfd6 Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Fri, 26 Jan 2024 13:27:58 +0100 Subject: [PATCH 106/299] accel/ivpu: Force snooping for MMU writes [ Upstream commit c9da9a1f17bf4fa96b115950fd389c917b583c1c ] Set AW_SNOOP_OVERRIDE bit in VPU_37/40XX_HOST_IF_TCU_PTW_OVERRIDES to force snooping for MMU write accesses (setting event queue events). MMU event queue buffer is the only buffer written by MMU and mapped as write-back which break cache coherency. Force write transactions to be snooped solving the problem. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-2-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_hw_37xx.c | 2 +- drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +- drivers/accel/ivpu/ivpu_mmu.c | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index ddf03498fd4c..c0de7c0c991f 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -562,7 +562,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 03600a7a5aca..65c6a82bb13f 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -523,7 +523,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); - val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index baefaf7bb3cb..d04a28e05248 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -491,7 +491,6 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) mmu->cmdq.cons = 0; memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); - clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); mmu->evtq.prod = 0; mmu->evtq.cons = 0; @@ -805,8 +804,6 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) return NULL; - clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); - evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); From 9895188644eee6567a41d0372f5332f50fd3a53a Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 26 Jan 2024 13:28:00 +0100 Subject: [PATCH 107/299] accel/ivpu: Disable d3hot_delay on all NPU generations [ Upstream commit a7f31091ddf457352e3dd7ac183fdbd26b4dcd04 ] NPU does not require this delay regardless of the generation. All generations are integrated into the SOC. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-4-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 7e9359611d69..8fb70e3c7b9c 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -467,9 +467,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev) /* Clear any pending errors */ pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); - /* VPU 37XX does not require 10m D3hot delay */ - if (ivpu_hw_gen(vdev) == IVPU_HW_37XX) - pdev->d3hot_delay = 0; + /* NPU does not require 10m D3hot delay */ + pdev->d3hot_delay = 0; ret = pcim_enable_device(pdev); if (ret) { From 5175a72c8e99dec0ba205ed8550125c47fd0b051 Mon Sep 17 00:00:00 2001 From: Krystian Pradzynski Date: Fri, 26 Jan 2024 13:28:03 +0100 Subject: [PATCH 108/299] accel/ivpu/40xx: Stop passing SKU boot parameters to FW [ Upstream commit 553099da45397914a995dce6307d6c26523c2567 ] This parameter was never used by the 40xx FW. Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-7-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_hw_40xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 65c6a82bb13f..ae6be0d5aacc 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -697,7 +697,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; u32 tile_disable; - u32 tile_enable; u32 fuse; fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE); @@ -718,10 +717,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) else ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM); - tile_enable = (~tile_disable) & TILE_MAX_MASK; - - hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku); - hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku); hw->tile_fuse = tile_disable; hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; From 9fe6ad655fe0b8bd7f29243bafc377e1bb8fbf37 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 7 Feb 2024 08:01:17 +0900 Subject: [PATCH 109/299] firewire: core: send bus reset promptly on gap count error [ Upstream commit 7ed4380009e96d9e9c605e12822e987b35b05648 ] If we are bus manager and the bus has inconsistent gap counts, send a bus reset immediately instead of trying to read the root node's config ROM first. Otherwise, we could spend a lot of time trying to read the config ROM but never succeeding. This eliminates a 50+ second delay before the FireWire bus is usable after a newly connected device is powered on in certain circumstances. The delay occurs if a gap count inconsistency occurs, we are not the root node, and we become bus manager. One scenario that causes this is with a TI XIO2213B OHCI, the first time a Sony DSR-25 is powered on after being connected to the FireWire cable. In this configuration, the Linux box will not receive the initial PHY configuration packet sent by the DSR-25 as IRM, resulting in the DSR-25 having a gap count of 44 while the Linux box has a gap count of 63. FireWire devices have a gap count parameter, which is set to 63 on power-up and can be changed with a PHY configuration packet. This determines the duration of the subaction and arbitration gaps. For reliable communication, all nodes on a FireWire bus must have the same gap count. A node may have zero or more of the following roles: root node, bus manager (BM), isochronous resource manager (IRM), and cycle master. Unless a root node was forced with a PHY configuration packet, any node might become root node after a bus reset. Only the root node can become cycle master. If the root node is not cycle master capable, the BM or IRM should force a change of root node. After a bus reset, each node sends a self-ID packet, which contains its current gap count. A single bus reset does not change the gap count, but two bus resets in a row will set the gap count to 63. Because a consistent gap count is required for reliable communication, IEEE 1394a-2000 requires that the bus manager generate a bus reset if it detects that the gap count is inconsistent. When the gap count is inconsistent, build_tree() will notice this after the self identification process. It will set card->gap_count to the invalid value 0. If we become bus master, this will force bm_work() to send a bus reset when it performs gap count optimization. After a bus reset, there is no bus manager. We will almost always try to become bus manager. Once we become bus manager, we will first determine whether the root node is cycle master capable. Then, we will determine if the gap count should be changed. If either the root node or the gap count should be changed, we will generate a bus reset. To determine if the root node is cycle master capable, we read its configuration ROM. bm_work() will wait until we have finished trying to read the configuration ROM. However, an inconsistent gap count can make this take a long time. read_config_rom() will read the first few quadlets from the config ROM. Due to the gap count inconsistency, eventually one of the reads will time out. When read_config_rom() fails, fw_device_init() calls it again until MAX_RETRIES is reached. This takes 50+ seconds. Once we give up trying to read the configuration ROM, bm_work() will wake up, assume that the root node is not cycle master capable, and do a bus reset. Hopefully, this will resolve the gap count inconsistency. This change makes bm_work() check for an inconsistent gap count before waiting for the root node's configuration ROM. If the gap count is inconsistent, bm_work() will immediately do a bus reset. This eliminates the 50+ second delay and rapidly brings the bus to a working state. I considered that if the gap count is inconsistent, a PHY configuration packet might not be successful, so it could be desirable to skip the PHY configuration packet before the bus reset in this case. However, IEEE 1394a-2000 and IEEE 1394-2008 say that the bus manager may transmit a PHY configuration packet before a bus reset when correcting a gap count error. Since the standard endorses this, I decided it's safe to retain the PHY configuration packet transmission. Normally, after a topology change, we will reset the bus a maximum of 5 times to change the root node and perform gap count optimization. However, if there is a gap count inconsistency, we must always generate a bus reset. Otherwise the gap count inconsistency will persist and communication will be unreliable. For that reason, if there is a gap count inconstency, we generate a bus reset even if we already reached the 5 reset limit. Signed-off-by: Adam Goldman Reference: https://sourceforge.net/p/linux1394/mailman/message/58727806/ Signed-off-by: Takashi Sakamoto Signed-off-by: Sasha Levin --- drivers/firewire/core-card.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a2fe..8aaa7fcb2630 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. From 7d7046a6caf2dd99c73de72267c3a7caa319882d Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Oct 2023 13:55:44 +0800 Subject: [PATCH 110/299] libceph: fail sparse-read if the data length doesn't match [ Upstream commit cd7d469c25704d414d71bf3644f163fb74e7996b ] Once this happens that means there have bugs. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- include/linux/ceph/osd_client.h | 3 ++- net/ceph/osd_client.c | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index bf9823956758..f703fb8030de 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 8d9760397b88..3babcd5e65e1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5856,8 +5856,8 @@ static int osd_sparse_read(struct ceph_connection *con, struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; u32 count = sr->sr_count; - u64 eoff, elen; - int ret; + u64 eoff, elen, len = 0; + int i, ret; switch (sr->sr_state) { case CEPH_SPARSE_READ_HDR: @@ -5909,8 +5909,20 @@ next_op: convert_extent_map(sr); ret = sizeof(sr->sr_datalen); *pbuf = (char *)&sr->sr_datalen; - sr->sr_state = CEPH_SPARSE_READ_DATA; + sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; break; + case CEPH_SPARSE_READ_DATA_PRE: + /* Convert sr_datalen to host-endian */ + sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); + for (i = 0; i < count; i++) + len += sr->sr_extent[i].len; + if (sr->sr_datalen != len) { + pr_warn_ratelimited("data len %u != extent len %llu\n", + sr->sr_datalen, len); + return -EREMOTEIO; + } + sr->sr_state = CEPH_SPARSE_READ_DATA; + fallthrough; case CEPH_SPARSE_READ_DATA: if (sr->sr_index >= count) { sr->sr_state = CEPH_SPARSE_READ_HDR; From 7a3a0b0c7f4720cbac79d991ebb3ab1cd2cd065a Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 16 Jan 2024 19:10:45 +0800 Subject: [PATCH 111/299] drm/amdgpu: skip to program GFXDEC registers for suspend abort [ Upstream commit 93bafa32a6918154aa0caf9f66679a32c2431357 ] In the suspend abort cases, the gfx power rail doesn't turn off so some GFXDEC registers/CSB can't reset to default value and at this moment reinitialize GFXDEC/CSB will result in an unexpected error. So let skip those program sequence for the suspend abort case. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a79d53bdbe13..85efd686e538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1009,6 +1009,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2c35036e4ba2..3204c3a42f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2409,6 +2409,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2423,6 +2424,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2e23d08b45f4..d7d15b618c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3033,6 +3033,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); From 61c0a633bdc644b064f0c9fe6412df247b2fbcca Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jan 2024 13:39:37 +0800 Subject: [PATCH 112/299] drm/amdgpu: reset gpu for s3 suspend abort case [ Upstream commit 6ef82ac664bb9568ca3956e0d9c9c478e25077ff ] In the s3 suspend abort case some type of gfx9 power rail not turn off from FCH side and this will put the GPU in an unknown power status, so let's reset the gpu to a known good power state before reinitialize gpu device. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3667f9a54841..2a7c606d1d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1296,10 +1296,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } From cb4541cabb531ce230489a2930c5eaaa91d779ce Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 5 Feb 2024 15:55:48 +0800 Subject: [PATCH 113/299] drm/amdgpu: Fix shared buff copy to user [ Upstream commit 2dcf82a8e8dc930655787797ef8a3692b527c7a9 ] ta if invoke node buffer |-------- ta type ----------| |-------- ta id ----------| |-------- cmd id ----------| |------ shared buf len -----| |------ shared buffer ------| ta if invoke node buffer is as above, copy shared buffer data to correct location Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 468a67b302d4..ca5c86e5f7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size } } - if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; err_free_shared_buf: From c19453cc16ad5d90f9ed40686203103e57085580 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 8 Dec 2023 13:48:22 +0530 Subject: [PATCH 114/299] drm/amdgpu: Fix HDP flush for VFs on nbio v7.9 [ Upstream commit 534c8a5b9d5d41d30cdcac93cfa1bca5e17be009 ] HDP flush remapping is not done for VFs. Keep the original offsets in VF environment. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index ae45656eb877..0a601336cf69 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -426,6 +426,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) u32 inst_mask; int i; + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET( + NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) + << 2; WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, 0xff & ~(adev->gfx.xcc_mask)); From cacc0a9c34a82d7a48419451710858a81b7c6808 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 2 Feb 2024 12:38:24 -0300 Subject: [PATCH 115/299] smb: client: set correct d_type for reparse points under DFS mounts [ Upstream commit 55c7788c37242702868bfac7861cdf0c358d6c3d ] Send query dir requests with an info level of SMB_FIND_FILE_FULL_DIRECTORY_INFO rather than SMB_FIND_FILE_DIRECTORY_INFO when the client is generating its own inode numbers (e.g. noserverino) so that reparse tags still can be parsed directly from the responses, but server won't send UniqueId (server inode number) Signed-off-by: Paulo Alcantara Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/readdir.c | 15 ++++++++------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index d30ea2005eb3..e23cd216bffb 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -299,14 +299,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -420,7 +422,7 @@ ffirst_retry: } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -1014,10 +1016,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 97fc2f85b429..9d34a55fdb5e 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5092,6 +5092,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5161,6 +5164,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); From 8946924ff324853df6b7c525a7467d964dfd11c3 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 29 Jan 2024 16:52:50 +0800 Subject: [PATCH 116/299] virtio-blk: Ensure no requests in virtqueues before deleting vqs. [ Upstream commit 4ce6e2db00de8103a0687fb0f65fd17124a51aaa ] Ensure no remaining requests in virtqueues before resetting vdev and deleting virtqueues. Otherwise these requests will never be completed. It may cause the system to become unresponsive. Function blk_mq_quiesce_queue() can ensure that requests have become in_flight status, but it cannot guarantee that requests have been processed by the device. Virtqueues should never be deleted before all requests become complete status. Function blk_mq_freeze_queue() ensure that all requests in virtqueues become complete status. And no requests can enter in virtqueues. Signed-off-by: Yi Sun Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240129085250.1550594-1-yi.sun@unisoc.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 225c86c74d4e..41b2fd7e1b9e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1629,14 +1629,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1654,7 +1655,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif From cd743cfead99fbfa176c9b317706da22fb846542 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 6 Feb 2024 15:00:46 +0000 Subject: [PATCH 117/299] cifs: change tcon status when need_reconnect is set on it [ Upstream commit c6e02eefd6ace3da3369c764f15429f5647056af ] When a tcon is marked for need_reconnect, the intention is to have it reconnected. This change adjusts tcon->status in cifs_tree_connect when need_reconnect is set. Also, this change has a minor correction in resetting need_reconnect on success. It makes sure that it is done with tc_lock held. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/connect.c | 5 +++++ fs/smb/client/dfs.c | 7 ++++++- fs/smb/client/file.c | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 19440255944b..c19eae07fa69 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4226,6 +4226,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index a8a1d386da65..449c59830039 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -565,6 +565,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; @@ -625,8 +630,8 @@ out: spin_lock(&tcon->tc_lock); if (tcon->status == TID_IN_TCON) tcon->status = TID_GOOD; - spin_unlock(&tcon->tc_lock); tcon->need_reconnect = false; + spin_unlock(&tcon->tc_lock); } return rc; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 32a8525415d9..4cbb5487bd8d 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -175,6 +175,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->need_reconnect) + tcon->status = TID_NEED_RECON; + if (tcon->status != TID_NEED_RECON) { spin_unlock(&tcon->tc_lock); return; From dd40cbafb1d2e8d5b77d5dbb33577a095490aa7d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 6 Feb 2024 15:00:47 +0000 Subject: [PATCH 118/299] cifs: handle cases where multiple sessions share connection [ Upstream commit a39c757bf0596b17482a507f31c3ef0af0d1d2b4 ] Based on our implementation of multichannel, it is entirely possible that a server struct may not be found in any channel of an SMB session. In such cases, we should be prepared to move on and search for the server struct in the next session. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/connect.c | 6 ++++++ fs/smb/client/sess.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c19eae07fa69..a4147e999736 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -229,6 +229,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { /* check if iface is still active */ spin_lock(&ses->chan_lock); + if (cifs_ses_get_chan_index(ses, server) == + CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + continue; + } + if (!cifs_chan_is_iface_active(ses, server)) { spin_unlock(&ses->chan_lock); cifs_chan_update_iface(ses, server); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 52f7a411e2bb..07726b456a0a 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -88,7 +88,6 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, if (server) cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", server->conn_id); - WARN_ON(1); return CIFS_INVAL_CHAN_INDEX; } From c5e3ec783484ad850341a32ef51af3b32964c5b0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 Feb 2024 23:57:18 -0600 Subject: [PATCH 119/299] smb3: clarify mount warning [ Upstream commit a5cc98eba2592d6e3c5a4351319595ddde2a5901 ] When a user tries to use the "sec=krb5p" mount parameter to encrypt data on connection to a server (when authenticating with Kerberos), we indicate that it is not supported, but do not note the equivalent recommended mount parameter ("sec=krb5,seal") which turns on encryption for that mount (and uses Kerberos for auth). Update the warning message. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 75f2c8734ff5..6ecbf48d0f0c 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -210,7 +210,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; From 5655754731398d056c1be0b80688fa95ecbb96b9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 22 Dec 2023 13:47:22 +0100 Subject: [PATCH 120/299] mptcp: add CurrEstab MIB counter support [ Upstream commit d9cd27b8cd191133e287e5de107f971136abe8a2 ] Add a new MIB counter named MPTCP_MIB_CURRESTAB to count current established MPTCP connections, similar to TCP_MIB_CURRESTAB. This is useful to quickly list the number of MPTCP connections without having to iterate over all of them. This patch adds a new helper function mptcp_set_state(): if the state switches from or to ESTABLISHED state, this newly added counter is incremented. This helper is going to be used in the following patch. Similar to MPTCP_INC_STATS(), a new helper called MPTCP_DEC_STATS() is also needed to decrement a MIB counter. Signed-off-by: Geliang Tang Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization") Signed-off-by: Sasha Levin --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 8 ++++++++ net/mptcp/protocol.c | 18 ++++++++++++++++++ net/mptcp/protocol.h | 1 + 4 files changed, 28 insertions(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index a0990c365a2e..c30405e76833 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), + SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index cae71d947252..dd7fd1f246b5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -65,6 +65,7 @@ enum linux_mptcp_mib_field { * conflict with another subflow while updating msk rcv wnd */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ + MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ __MPTCP_MIB_MAX }; @@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net, __SNMP_INC_STATS(net->mib.mptcp_statistics, field); } +static inline void MPTCP_DEC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_DEC_STATS(net->mib.mptcp_statistics, field); +} + bool mptcp_mib_alloc(struct net *net); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9d4d5dbdbb53..7765514451de 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2877,6 +2877,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) release_sock(ssk); } +void mptcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; + + default: + if (oldstate == TCP_ESTABLISHED) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + + inet_sk_state_store(sk, state); +} + static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 094d3fd47a92..01778ffa86be 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -642,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); From d52b3c2b29510673472744972adc6f31bd4917ca Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 22 Dec 2023 13:47:23 +0100 Subject: [PATCH 121/299] mptcp: use mptcp_set_state [ Upstream commit c693a8516429908da3ea111b0caa3c042ab1e6e9 ] This patch replaces all the 'inet_sk_state_store()' calls under net/mptcp with the new helper mptcp_set_state(). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/460 Signed-off-by: Geliang Tang Acked-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization") Signed-off-by: Sasha Levin --- net/mptcp/pm_netlink.c | 5 +++++ net/mptcp/protocol.c | 38 +++++++++++++++++++------------------- net/mptcp/subflow.c | 2 +- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3011bc378462..44c0e96210a4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7765514451de..2f794924ae5d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -445,11 +445,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_FIN_WAIT2); + mptcp_set_state(sk, TCP_FIN_WAIT2); break; case TCP_CLOSING: case TCP_LAST_ACK: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; } @@ -610,13 +610,13 @@ static bool mptcp_check_data_fin(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); + mptcp_set_state(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); + mptcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; default: /* Other states not expected */ @@ -791,7 +791,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) */ ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); + mptcp_set_state(sk, ssk_state); WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -2470,7 +2470,7 @@ out: inet_sk_state_load(msk->first) == TCP_CLOSE) { if (sk->sk_state != TCP_ESTABLISHED || msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_close_wake_up(sk); } else { mptcp_start_tout_timer(sk); @@ -2565,7 +2565,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) WRITE_ONCE(sk->sk_err, ECONNRESET); } - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2700,7 +2700,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2917,7 +2917,7 @@ static int mptcp_close_state(struct sock *sk) int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; - inet_sk_state_store(sk, ns); + mptcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } @@ -3035,7 +3035,7 @@ bool __mptcp_close(struct sock *sk, long timeout) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); goto cleanup; } @@ -3078,7 +3078,7 @@ cleanup: * state, let's not keep resources busy for no reasons */ if (subflows_alive == 0) - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); @@ -3144,7 +3144,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) return -EBUSY; mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); @@ -3231,7 +3231,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store(nsk, TCP_ESTABLISHED); + mptcp_set_state(nsk, TCP_ESTABLISHED); /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); @@ -3686,7 +3686,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(ssk)) return PTR_ERR(ssk); - inet_sk_state_store(sk, TCP_SYN_SENT); + mptcp_set_state(sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -3736,7 +3736,7 @@ out: if (unlikely(err)) { /* avoid leaving a dangling token in an unconnected socket */ mptcp_token_destroy(msk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); return err; } @@ -3826,13 +3826,13 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } - inet_sk_state_store(sk, TCP_LISTEN); + mptcp_set_state(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); release_sock(ssk); - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + mptcp_set_state(sk, inet_sk_state_load(ssk)); if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3892,7 +3892,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); if (unlikely(list_is_singular(&msk->conn_list))) - inet_sk_state_store(newsk, TCP_CLOSE); + mptcp_set_state(newsk, TCP_CLOSE); } } release_sock(newsk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8c7e22a9a37b..15f456fb2897 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -427,7 +427,7 @@ void __mptcp_sync_state(struct sock *sk, int state) if (!msk->rcvspace_init) mptcp_rcv_space_init(msk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, state); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } From 9326d0357ab0850cdb4fef377eb0c20a67defa43 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:51 +0100 Subject: [PATCH 122/299] mptcp: fix more tx path fields initialization [ Upstream commit 3f83d8a77eeeb47011b990fd766a421ee64f1d73 ] The 'msk->write_seq' and 'msk->snd_nxt' are always updated under the msk socket lock, except at MPC handshake completiont time. Builds-up on the previous commit to move such init under the relevant lock. There are no known problems caused by the potential race, the primary goal is consistency. Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Stable-dep-of: e4a0fa47e816 ("mptcp: corner case locking for rx path fields initialization") Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 6 ++---- net/mptcp/subflow.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2f794924ae5d..55a90a7b7b51 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3525,10 +3525,8 @@ void mptcp_finish_connect(struct sock *ssk) * accessing the field below */ WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->write_seq, subflow->idsn + 1); - WRITE_ONCE(msk->snd_nxt, msk->write_seq); - WRITE_ONCE(msk->snd_una, msk->write_seq); - WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); mptcp_pm_new_connection(msk, ssk, 0); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 15f456fb2897..ba739e700922 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -421,12 +421,21 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_sync_state(struct sock *sk, int state) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk = msk->first; - __mptcp_propagate_sndbuf(sk, msk->first); + subflow = mptcp_subflow_ctx(ssk); + __mptcp_propagate_sndbuf(sk, ssk); if (!msk->rcvspace_init) - mptcp_rcv_space_init(msk, msk->first); + mptcp_rcv_space_init(msk, ssk); + if (sk->sk_state == TCP_SYN_SENT) { + /* subflow->idsn is always available is TCP_SYN_SENT state, + * even for the FASTOPEN scenarios + */ + WRITE_ONCE(msk->write_seq, subflow->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); mptcp_set_state(sk, state); sk->sk_state_change(sk); } From b45df837fe8710944e18d4a422c558c95aa77d80 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:52 +0100 Subject: [PATCH 123/299] mptcp: corner case locking for rx path fields initialization [ Upstream commit e4a0fa47e816e186f6b4c0055d07eeec42d11871 ] Most MPTCP-level related fields are under the mptcp data lock protection, but are written one-off without such lock at MPC complete time, both for the client and the server Leverage the mptcp_propagate_state() infrastructure to move such initialization under the proper lock client-wise. The server side critical init steps are done by mptcp_subflow_fully_established(): ensure the caller properly held the relevant lock, and avoid acquiring the same lock in the nested scopes. There are no real potential races, as write access to such fields is implicitly serialized by the MPTCP state machine; the primary goal is consistency. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/mptcp/fastopen.c | 6 ++--- net/mptcp/options.c | 9 +++---- net/mptcp/protocol.c | 9 ++++--- net/mptcp/protocol.h | 9 +++---- net/mptcp/subflow.c | 56 +++++++++++++++++++++++++------------------- 5 files changed, 50 insertions(+), 39 deletions(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 74698582a285..ad28da655f8b 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { struct sock *sk = (struct sock *)msk; struct sk_buff *skb; - mptcp_data_lock(sk); skb = skb_peek_tail(&sk->sk_receive_queue); if (skb) { WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); @@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_ } pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); - mptcp_data_unlock(sk); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d2527d189a79..e3e96a49f922 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ - subflow->fully_established = 1; - WRITE_ONCE(msk->fully_established, true); - goto check_notify; + goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data @@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); - mptcp_subflow_fully_established(subflow, mp_opt); + + mptcp_data_lock((struct sock *)msk); + __mptcp_subflow_fully_established(msk, subflow, mp_opt); + mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 55a90a7b7b51..d36927411310 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3195,6 +3195,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk; if (!nsk) @@ -3235,7 +3236,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); - list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + subflow = mptcp_subflow_ctx(ssk); + list_add(&subflow->node, &msk->conn_list); sock_hold(ssk); /* new mpc subflow takes ownership of the newly @@ -3250,6 +3252,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); + + if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) + __mptcp_subflow_fully_established(msk, subflow, mp_opt); bh_unlock_sock(nsk); /* note: the newly allocated socket refcount is 2 now */ @@ -3525,8 +3530,6 @@ void mptcp_finish_connect(struct sock *ssk) * accessing the field below */ WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->snd_una, subflow->idsn + 1); - WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); mptcp_pm_new_connection(msk, ssk, 0); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 01778ffa86be..c9516882cdd4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -623,8 +623,9 @@ int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); @@ -938,8 +939,8 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ba739e700922..43d6ee432814 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -441,20 +441,6 @@ void __mptcp_sync_state(struct sock *sk, int state) } } -static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) { - __mptcp_sync_state(sk, ssk->sk_state); - } else { - msk->pending_state = ssk->sk_state; - __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); - } - mptcp_data_unlock(sk); -} - static void subflow_set_remote_key(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) @@ -476,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk, atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); } +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_data_lock(sk); + if (mp_opt) { + /* Options are available only in the non fallback cases + * avoid updating rx path fields otherwise + */ + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); + subflow_set_remote_key(msk, subflow, mp_opt); + } + + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -510,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (mp_opt.deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; - subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -556,7 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } return; @@ -741,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk) kfree_rcu(ctx, rcu); } -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - subflow_set_remote_key(msk, subflow, mp_opt); subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) - mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); + __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -844,7 +853,6 @@ create_child: * mpc option */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { - mptcp_subflow_fully_established(ctx, &mp_opt); mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } @@ -1748,7 +1756,7 @@ static void subflow_state_change(struct sock *sk) mptcp_do_fallback(sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } /* as recvmsg() does not acquire the subflow socket for ssk selection From c3682b63c60fdef04fc503d36d08bb84ee9758ad Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Tue, 5 Dec 2023 00:01:15 -0500 Subject: [PATCH 124/299] drm/amd/display: Add dpia display mode validation logic [ Upstream commit 59f1622a5f05d948a7c665a458a3dd76ba73015e ] [Why] If bandwidth allocation feature is enabled, connection manager wont limit the dp tunnel bandwidth. So, need to do display mode validation for streams on dpia links to avoid oversubscription of dp tunnel bandwidth. [How] - To read non reduced link rate and lane count and update reported link capability. - To calculate the bandwidth required for streams of dpia links per host router and validate against the allocated bandwidth for the host router. Tested-by: Daniel Wheeler Reviewed-by: PeiChen Huang Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Alex Deucher Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations") Signed-off-by: Sasha Levin --- .../drm/amd/display/dc/core/dc_link_exports.c | 2 +- drivers/gpu/drm/amd/display/dc/dc.h | 4 +- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 6 + drivers/gpu/drm/amd/display/dc/dc_types.h | 2 + .../dc/link/protocols/link_dp_dpia_bw.c | 126 +++++++++++++----- 5 files changed, 102 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index ed94187c2afa..f365773d5714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -497,7 +497,7 @@ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) link->dc->link_srv->enable_hpd_filter(link, enable); } -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count) { return dc->link_srv->validate_dpia_bandwidth(streams, count); } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3f33740e2f65..5f2eac868b74 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -2116,11 +2116,11 @@ int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( * * @dc: pointer to dc struct * @stream: pointer to all possible streams - * @num_streams: number of valid DPIA streams + * @count: number of valid DPIA streams * * return: TRUE if bw used by DPIAs doesn't exceed available BW else return FALSE */ -bool dc_link_validate(struct dc *dc, const struct dc_stream_state *streams, +bool dc_link_dp_dpia_validate(struct dc *dc, const struct dc_stream_state *streams, const unsigned int count); /* Sink Interfaces - A sink corresponds to a display output device */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index cfaa39c5dd16..83719f5bea49 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1433,6 +1433,12 @@ struct dp_trace { #ifndef DP_TUNNELING_STATUS #define DP_TUNNELING_STATUS 0xE0025 /* 1.4a */ #endif +#ifndef DP_TUNNELING_MAX_LINK_RATE +#define DP_TUNNELING_MAX_LINK_RATE 0xE0028 /* 1.4a */ +#endif +#ifndef DP_TUNNELING_MAX_LANE_COUNT +#define DP_TUNNELING_MAX_LANE_COUNT 0xE0029 /* 1.4a */ +#endif #ifndef DPTX_BW_ALLOCATION_MODE_CONTROL #define DPTX_BW_ALLOCATION_MODE_CONTROL 0xE0030 /* 1.4a */ #endif diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index accffba5a683..19b7314811ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1121,6 +1121,8 @@ struct dc_dpia_bw_alloc { int bw_granularity; // BW Granularity bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side + uint8_t nrd_max_lane_count; // Non-reduced max lane count + uint8_t nrd_max_link_rate; // Non-reduced max link rate }; #define MAX_SINKS_PER_LINK 4 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index d6e1f969bfd5..a7aa8c9da868 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -59,6 +59,7 @@ static void reset_bw_alloc_struct(struct dc_link *link) link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; link->dpia_bw_alloc_config.response_ready = false; + link->dpia_bw_alloc_config.sink_allocated_bw = 0; } #define BW_GRANULARITY_0 4 // 0.25 Gbps @@ -104,6 +105,32 @@ static int get_estimated_bw(struct dc_link *link) return bw_estimated_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); } +static int get_non_reduced_max_link_rate(struct dc_link *link) +{ + uint8_t nrd_max_link_rate = 0; + + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LINK_RATE, + &nrd_max_link_rate, + sizeof(uint8_t)); + + return nrd_max_link_rate; +} + +static int get_non_reduced_max_lane_count(struct dc_link *link) +{ + uint8_t nrd_max_lane_count = 0; + + core_link_read_dpcd( + link, + DP_TUNNELING_MAX_LANE_COUNT, + &nrd_max_lane_count, + sizeof(uint8_t)); + + return nrd_max_lane_count; +} + /* * Read all New BW alloc configuration ex: estimated_bw, allocated_bw, * granuality, Driver_ID, CM_Group, & populate the BW allocation structs @@ -111,13 +138,20 @@ static int get_estimated_bw(struct dc_link *link) */ static void init_usb4_bw_struct(struct dc_link *link) { - // Init the known values + reset_bw_alloc_struct(link); + + /* init the known values */ link->dpia_bw_alloc_config.bw_granularity = get_bw_granularity(link); link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + link->dpia_bw_alloc_config.nrd_max_link_rate = get_non_reduced_max_link_rate(link); + link->dpia_bw_alloc_config.nrd_max_lane_count = get_non_reduced_max_lane_count(link); DC_LOG_DEBUG("%s: bw_granularity(%d), estimated_bw(%d)\n", __func__, link->dpia_bw_alloc_config.bw_granularity, link->dpia_bw_alloc_config.estimated_bw); + DC_LOG_DEBUG("%s: nrd_max_link_rate(%d), nrd_max_lane_count(%d)\n", + __func__, link->dpia_bw_alloc_config.nrd_max_link_rate, + link->dpia_bw_alloc_config.nrd_max_lane_count); } static uint8_t get_lowest_dpia_index(struct dc_link *link) @@ -142,39 +176,50 @@ static uint8_t get_lowest_dpia_index(struct dc_link *link) } /* - * Get the Max Available BW or Max Estimated BW for each Host Router + * Get the maximum dp tunnel banwidth of host router * - * @link: pointer to the dc_link struct instance - * @type: ESTIMATD BW or MAX AVAILABLE BW + * @dc: pointer to the dc struct instance + * @hr_index: host router index * - * return: response_ready flag from dc_link struct + * return: host router maximum dp tunnel bandwidth */ -static int get_host_router_total_bw(struct dc_link *link, uint8_t type) +static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_index) { - const struct dc *dc_struct = link->dc; - uint8_t lowest_dpia_index = get_lowest_dpia_index(link); - uint8_t idx = (link->link_index - lowest_dpia_index) / 2, idx_temp = 0; - struct dc_link *link_temp; + uint8_t lowest_dpia_index = get_lowest_dpia_index(dc->links[0]); + uint8_t hr_index_temp = 0; + struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - int i; - for (i = 0; i < MAX_PIPES * 2; ++i) { + for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { - if (!dc_struct->links[i] || dc_struct->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; - link_temp = dc_struct->links[i]; - if (!link_temp || !link_temp->hpd_status) - continue; + hr_index_temp = (dc->links[i]->link_index - lowest_dpia_index) / 2; - idx_temp = (link_temp->link_index - lowest_dpia_index) / 2; + if (hr_index_temp == hr_index) { + link_dpia_primary = dc->links[i]; + link_dpia_secondary = dc->links[i + 1]; - if (idx_temp == idx) { - - if (type == HOST_ROUTER_BW_ESTIMATED) - total_bw += link_temp->dpia_bw_alloc_config.estimated_bw; - else if (type == HOST_ROUTER_BW_ALLOCATED) - total_bw += link_temp->dpia_bw_alloc_config.sink_allocated_bw; + /** + * If BW allocation enabled on both DPIAs, then + * HR BW = Estimated(dpia_primary) + Allocated(dpia_secondary) + * otherwise HR BW = Estimated(bw alloc enabled dpia) + */ + if ((link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && + (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + } else if (link_dpia_primary->hpd_status && + link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; + } else if (link_dpia_secondary->hpd_status && + link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled) { + total_bw += link_dpia_secondary->dpia_bw_alloc_config.estimated_bw; + } + break; } } @@ -194,7 +239,6 @@ static void dpia_bw_alloc_unplug(struct dc_link *link) if (link) { DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", __func__, link->link_index); - link->dpia_bw_alloc_config.sink_allocated_bw = 0; reset_bw_alloc_struct(link); } } @@ -397,7 +441,7 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = get_host_router_total_bw(link, HOST_ROUTER_BW_ALLOCATED); + ret = link->dpia_bw_alloc_config.sink_allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -439,29 +483,41 @@ out: bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) { bool ret = true; - int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }; - uint8_t lowest_dpia_index = 0, dpia_index = 0; - uint8_t i; + int bw_needed_per_hr[MAX_HR_NUM] = { 0, 0 }, host_router_total_dp_bw = 0; + uint8_t lowest_dpia_index, i, hr_index; if (!num_dpias || num_dpias > MAX_DPIA_NUM) return ret; - //Get total Host Router BW & Validate against each Host Router max BW + lowest_dpia_index = get_lowest_dpia_index(link[0]); + + /* get total Host Router BW with granularity for the given modes */ for (i = 0; i < num_dpias; ++i) { + int granularity_Gbps = 0; + int bw_granularity = 0; if (!link[i]->dpia_bw_alloc_config.bw_alloc_enabled) continue; - lowest_dpia_index = get_lowest_dpia_index(link[i]); if (link[i]->link_index < lowest_dpia_index) continue; - dpia_index = (link[i]->link_index - lowest_dpia_index) / 2; - bw_needed_per_hr[dpia_index] += bw_needed_per_dpia[i]; - if (bw_needed_per_hr[dpia_index] > get_host_router_total_bw(link[i], HOST_ROUTER_BW_ALLOCATED)) { + granularity_Gbps = (Kbps_TO_Gbps / link[i]->dpia_bw_alloc_config.bw_granularity); + bw_granularity = (bw_needed_per_dpia[i] / granularity_Gbps) * granularity_Gbps + + ((bw_needed_per_dpia[i] % granularity_Gbps) ? granularity_Gbps : 0); - ret = false; - break; + hr_index = (link[i]->link_index - lowest_dpia_index) / 2; + bw_needed_per_hr[hr_index] += bw_granularity; + } + + /* validate against each Host Router max BW */ + for (hr_index = 0; hr_index < MAX_HR_NUM; ++hr_index) { + if (bw_needed_per_hr[hr_index]) { + host_router_total_dp_bw = get_host_router_total_dp_tunnel_bw(link[0]->dc, hr_index); + if (bw_needed_per_hr[hr_index] > host_router_total_dp_bw) { + ret = false; + break; + } } } From 622c827544ef300899a757f5b1d61554c2c3abcc Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Thu, 14 Dec 2023 23:16:34 +0800 Subject: [PATCH 125/299] drm/amd/display: Request usb4 bw for mst streams [ Upstream commit 5f3bce13266e6fe2f7a46f94d8bc94d5274e276b ] [WHY] When usb4 bandwidth allocation mode is enabled, driver need to request bandwidth from connection manager. For mst link, the requested bandwidth should be big enough for all remote streams. [HOW] - If mst link, the requested bandwidth should be the sum of all mst streams bandwidth added with dp MTPH overhead. - Allocate/deallcate usb4 bandwidth when setting dpms on/off. - When doing display mode validation, driver also need to consider total bandwidth of all mst streams for mst link. Reviewed-by: Cruise Hung Acked-by: Rodrigo Siqueira Signed-off-by: Peichen Huang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: 0484e05d048b ("drm/amd/display: fixed integer types and null check locations") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dc_types.h | 12 ++-- .../gpu/drm/amd/display/dc/link/link_dpms.c | 42 ++++++++++--- .../drm/amd/display/dc/link/link_validation.c | 60 +++++++++++++++---- .../dc/link/protocols/link_dp_dpia_bw.c | 59 +++++++++++++----- .../dc/link/protocols/link_dp_dpia_bw.h | 9 +++ 5 files changed, 144 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 19b7314811ae..cc173ecf78e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1110,23 +1110,25 @@ struct dc_panel_config { } ilr; }; +#define MAX_SINKS_PER_LINK 4 + /* * USB4 DPIA BW ALLOCATION STRUCTS */ struct dc_dpia_bw_alloc { - int sink_verified_bw; // The Verified BW that sink can allocated and use that has been verified already - int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated - int sink_max_bw; // The Max BW that sink can require/support + int remote_sink_req_bw[MAX_SINKS_PER_LINK]; // BW requested by remote sinks + int link_verified_bw; // The Verified BW that link can allocated and use that has been verified already + int link_max_bw; // The Max BW that link can require/support + int allocated_bw; // The Actual Allocated BW for this DPIA int estimated_bw; // The estimated available BW for this DPIA int bw_granularity; // BW Granularity + int dp_overhead; // DP overhead in dp tunneling bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM bool response_ready; // Response ready from the CM side uint8_t nrd_max_lane_count; // Non-reduced max lane count uint8_t nrd_max_link_rate; // Non-reduced max link rate }; -#define MAX_SINKS_PER_LINK 4 - enum dc_hpd_enable_select { HPD_EN_FOR_ALL_EDP = 0, HPD_EN_FOR_PRIMARY_EDP_ONLY, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index b9768cd9b8a0..4901e27f678b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2071,17 +2071,11 @@ static enum dc_status enable_link_dp(struct dc_state *state, } } - /* - * If the link is DP-over-USB4 do the following: - * - Train with fallback when enabling DPIA link. Conventional links are + /* Train with fallback when enabling DPIA link. Conventional links are * trained with fallback during sink detection. - * - Allocate only what the stream needs for bw in Gbps. Inform the CM - * in case stream needs more or less bw from what has been allocated - * earlier at plug time. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) do_fallback = true; - } /* * Temporary w/a to get DP2.0 link rates to work with SST. @@ -2263,6 +2257,32 @@ static enum dc_status enable_link( return status; } +static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, int bw) +{ + return true; +} + +static bool allocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + int bw = dc_bandwidth_in_kbps_from_timing(&stream->timing, + dc_link_get_highest_encoding_format(stream->sink->link)); + + ret = allocate_usb4_bandwidth_for_stream(stream, bw); + + return ret; +} + +static bool deallocate_usb4_bandwidth(struct dc_stream_state *stream) +{ + bool ret; + + ret = allocate_usb4_bandwidth_for_stream(stream, 0); + + return ret; +} + void link_set_dpms_off(struct pipe_ctx *pipe_ctx) { struct dc *dc = pipe_ctx->stream->ctx->dc; @@ -2299,6 +2319,9 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx) update_psp_stream_config(pipe_ctx, true); dc->hwss.blank_stream(pipe_ctx); + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + deallocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && @@ -2520,6 +2543,9 @@ void link_set_dpms_on( } } + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + allocate_usb4_bandwidth(pipe_ctx->stream); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) allocate_mst_payload(pipe_ctx); else if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT && diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index b45fda96eaf6..8fe66c367850 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -346,23 +346,61 @@ enum dc_status link_validate_mode_timing( return DC_OK; } +/* + * This function calculates the bandwidth required for the stream timing + * and aggregates the stream bandwidth for the respective dpia link + * + * @stream: pointer to the dc_stream_state struct instance + * @num_streams: number of streams to be validated + * + * return: true if validation is succeeded + */ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const unsigned int num_streams) { - bool ret = true; - int bw_needed[MAX_DPIA_NUM]; - struct dc_link *link[MAX_DPIA_NUM]; - - if (!num_streams || num_streams > MAX_DPIA_NUM) - return ret; + int bw_needed[MAX_DPIA_NUM] = {0}; + struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; + int num_dpias = 0; for (uint8_t i = 0; i < num_streams; ++i) { + if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { + /* new dpia sst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; - link[i] = stream[i].link; - bw_needed[i] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, - dc_link_get_highest_encoding_format(link[i])); + dpia_link[num_dpias] = stream[i].link; + bw_needed[num_dpias] = dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[num_dpias])); + num_dpias++; + } else if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + uint8_t j = 0; + /* check whether its a known dpia link */ + for (; j < num_dpias; ++j) { + if (dpia_link[j] == stream[i].link) + break; + } + + if (j == num_dpias) { + /* new dpia mst stream, check whether it exceeds max dpia */ + if (num_dpias >= MAX_DPIA_NUM) + return false; + else { + dpia_link[j] = stream[i].link; + num_dpias++; + } + } + + bw_needed[j] += dc_bandwidth_in_kbps_from_timing(&stream[i].timing, + dc_link_get_highest_encoding_format(dpia_link[j])); + } } - ret = dpia_validate_usb4_bw(link, bw_needed, num_streams); + /* Include dp overheads */ + for (uint8_t i = 0; i < num_dpias; ++i) { + int dp_overhead = 0; - return ret; + dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(dpia_link[i]); + bw_needed[i] += dp_overhead; + } + + return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index a7aa8c9da868..4ef1a6a1d129 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -54,12 +54,18 @@ static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) static void reset_bw_alloc_struct(struct dc_link *link) { link->dpia_bw_alloc_config.bw_alloc_enabled = false; - link->dpia_bw_alloc_config.sink_verified_bw = 0; - link->dpia_bw_alloc_config.sink_max_bw = 0; + link->dpia_bw_alloc_config.link_verified_bw = 0; + link->dpia_bw_alloc_config.link_max_bw = 0; + link->dpia_bw_alloc_config.allocated_bw = 0; link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; + link->dpia_bw_alloc_config.dp_overhead = 0; link->dpia_bw_alloc_config.response_ready = false; - link->dpia_bw_alloc_config.sink_allocated_bw = 0; + link->dpia_bw_alloc_config.nrd_max_lane_count = 0; + link->dpia_bw_alloc_config.nrd_max_link_rate = 0; + for (int i = 0; i < MAX_SINKS_PER_LINK; i++) + link->dpia_bw_alloc_config.remote_sink_req_bw[i] = 0; + DC_LOG_DEBUG("reset usb4 bw alloc of link(%d)\n", link->link_index); } #define BW_GRANULARITY_0 4 // 0.25 Gbps @@ -210,8 +216,8 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) && (link_dpia_secondary->hpd_status && link_dpia_secondary->dpia_bw_alloc_config.bw_alloc_enabled)) { - total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + - link_dpia_secondary->dpia_bw_alloc_config.sink_allocated_bw; + total_bw += link_dpia_primary->dpia_bw_alloc_config.estimated_bw + + link_dpia_secondary->dpia_bw_alloc_config.allocated_bw; } else if (link_dpia_primary->hpd_status && link_dpia_primary->dpia_bw_alloc_config.bw_alloc_enabled) { total_bw = link_dpia_primary->dpia_bw_alloc_config.estimated_bw; @@ -264,7 +270,7 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - if (req_bw == link->dpia_bw_alloc_config.sink_allocated_bw) { + if (req_bw == link->dpia_bw_alloc_config.allocated_bw) { DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", __func__, link->link_index); } @@ -387,9 +393,9 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", __func__, link->link_index); DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.sink_allocated_bw, bw_needed); + __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); - link->dpia_bw_alloc_config.sink_allocated_bw = bw_needed; + link->dpia_bw_alloc_config.allocated_bw = bw_needed; link->dpia_bw_alloc_config.response_ready = true; break; @@ -427,8 +433,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (link->hpd_status && peak_bw > 0) { // If DP over USB4 then we need to check BW allocation - link->dpia_bw_alloc_config.sink_max_bw = peak_bw; - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.sink_max_bw); + link->dpia_bw_alloc_config.link_max_bw = peak_bw; + set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); do { if (timeout > 0) @@ -440,8 +446,8 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea if (!timeout) ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.sink_allocated_bw > 0) - ret = link->dpia_bw_alloc_config.sink_allocated_bw; + else if (link->dpia_bw_alloc_config.allocated_bw > 0) + ret = link->dpia_bw_alloc_config.allocated_bw; } //2. Cold Unplug else if (!link->hpd_status) @@ -450,7 +456,6 @@ int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int pea out: return ret; } - bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { bool ret = false; @@ -458,7 +463,7 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", __func__, link->link_index, link->hpd_status, - link->dpia_bw_alloc_config.sink_allocated_bw, req_bw); + link->dpia_bw_alloc_config.allocated_bw, req_bw); if (!get_bw_alloc_proceed_flag(link)) goto out; @@ -523,3 +528,29 @@ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const return ret; } + +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) +{ + int dp_overhead = 0, link_mst_overhead = 0; + + if (!get_bw_alloc_proceed_flag((link))) + return dp_overhead; + + /* if its mst link, add MTPH overhead */ + if ((link->type == dc_connection_mst_branch) && + !link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { + /* For 8b/10b encoding: MTP is 64 time slots long, slot 0 is used for MTPH + * MST overhead is 1/64 of link bandwidth (excluding any overhead) + */ + const struct dc_link_settings *link_cap = + dc_link_get_link_cap(link); + uint32_t link_bw_in_kbps = + link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; + link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); + } + + /* add all the overheads */ + dp_overhead = link_mst_overhead; + + return dp_overhead; +} diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 981bc4eb6120..3b6d8494f9d5 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -99,4 +99,13 @@ void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t res */ bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed, const unsigned int num_dpias); +/* + * Obtain all the DP overheads in dp tunneling for the dpia link + * + * @link: pointer to the dc_link struct instance + * + * return: DP overheads in DP tunneling + */ +int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link); + #endif /* DC_INC_LINK_DP_DPIA_BW_H_ */ From 71783d1ff65204d69207fd156d4b2eb1d3882375 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Wed, 31 Jan 2024 16:40:37 -0500 Subject: [PATCH 126/299] drm/amd/display: fixed integer types and null check locations [ Upstream commit 0484e05d048b66d01d1f3c1d2306010bb57d8738 ] [why]: issues fixed: - comparison with wider integer type in loop condition which can cause infinite loops - pointer dereference before null check Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Josip Pavic Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/display/dc/bios/bios_parser2.c | 16 ++++++++++------ .../drm/amd/display/dc/link/link_validation.c | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index bbf2a465f400..4c3c4c8de1cf 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1860,19 +1860,21 @@ static enum bp_result get_firmware_info_v3_2( /* Vega12 */ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); if (!smu_info_v3_2) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; } else if (revision.minor == 3) { /* Vega20 */ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); if (!smu_info_v3_3) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; } @@ -2435,10 +2437,11 @@ static enum bp_result get_integrated_info_v11( info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); if (info_v11 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v11->gpucapinfo); /* @@ -2650,11 +2653,12 @@ static enum bp_result get_integrated_info_v2_1( info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); if (info_v2_1 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_1->gpucapinfo); /* @@ -2812,11 +2816,11 @@ static enum bp_result get_integrated_info_v2_2( info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); - if (info_v2_2 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_2->gpucapinfo); /* diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 8fe66c367850..5b0bc7f6a188 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; int num_dpias = 0; - for (uint8_t i = 0; i < num_streams; ++i) { + for (unsigned int i = 0; i < num_streams; ++i) { if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { /* new dpia sst stream, check whether it exceeds max dpia */ if (num_dpias >= MAX_DPIA_NUM) From 3c8f5965a99397368d3762a9814a21a3e442e1a4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 16 Oct 2023 12:41:26 +0530 Subject: [PATCH 127/299] xen: evtchn: Allow shared registration of IRQ handers [ Upstream commit 9e90e58c11b74c2bddac4b2702cf79d36b981278 ] Currently the handling of events is supported either in the kernel or userspace, but not both. In order to support fast delivery of interrupts from the guest to the backend, we need to handle the Queue notify part of Virtio protocol in kernel and the rest in userspace. Update the interrupt handler registration flag to IRQF_SHARED for event channels, which would allow multiple entities to bind their interrupt handler for the same event channel port. Also increment the reference count of irq_info when multiple entities try to bind event channel to irqchip, so the unbinding happens only after all the users are gone. Signed-off-by: Viresh Kumar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/99b1edfd3147c6b5d22a5139dab5861e767dc34a.1697439990.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup") Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 3 ++- drivers/xen/evtchn.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index c50419638ac0..cd33a418344a 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1235,7 +1235,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, bind_evtchn_to_cpu(evtchn, 0, false); } else { struct irq_info *info = info_for_irq(irq); - WARN_ON(info == NULL || info->type != IRQT_EVTCHN); + if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) + info->refcnt++; } out: diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 9139a7364df5..59717628ca42 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -397,7 +397,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port, if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, IRQF_SHARED, u->name, evtchn); if (rc < 0) goto err; From b79345efd03851121db47f09174722123058cca8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 08:58:05 +0200 Subject: [PATCH 128/299] xen/events: reduce externally visible helper functions [ Upstream commit 686464514fbebb6c8de4415238319e414c3500a4 ] get_evtchn_to_irq() has only one external user while irq_from_evtchn() provides the same functionality and is exported for a wider user base. Modify the only external user of get_evtchn_to_irq() to use irq_from_evtchn() instead and make get_evtchn_to_irq() static. evtchn_from_irq() and irq_from_virq() have a single external user and can easily be combined to a new helper irq_evtchn_from_virq() allowing to drop irq_from_virq() and to make evtchn_from_irq() static. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup") Signed-off-by: Sasha Levin --- drivers/xen/events/events_2l.c | 8 ++++---- drivers/xen/events/events_base.c | 13 +++++++++---- drivers/xen/events/events_internal.h | 1 - include/xen/events.h | 4 ++-- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index b8f2f971c2f0..e3585330cf98 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) int i; struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + evtchn_port_t evtchn; /* Timer interrupt has highest priority. */ - irq = irq_from_virq(cpu, VIRQ_TIMER); + irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn); if (irq != -1) { - evtchn_port_t evtchn = evtchn_from_irq(irq); word_idx = evtchn / BITS_PER_LONG; bit_idx = evtchn % BITS_PER_LONG; if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) @@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) { if (sync_test_bit(i, BM(sh->evtchn_pending))) { int word_idx = i / BITS_PER_EVTCHN_WORD; - printk(" %d: event %d -> irq %d%s%s%s\n", + printk(" %d: event %d -> irq %u%s%s%s\n", cpu_from_evtchn(i), i, - get_evtchn_to_irq(i), + irq_from_evtchn(i), sync_test_bit(word_idx, BM(&v->evtchn_pending_sel)) ? "" : " l2-clear", !sync_test_bit(i, BM(sh->evtchn_mask)) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index cd33a418344a..57dfb512cdc5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -248,7 +248,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) return 0; } -int get_evtchn_to_irq(evtchn_port_t evtchn) +static int get_evtchn_to_irq(evtchn_port_t evtchn) { if (evtchn >= xen_evtchn_max_channels()) return -1; @@ -415,7 +415,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) /* * Accessors for packed IRQ information. */ -evtchn_port_t evtchn_from_irq(unsigned irq) +static evtchn_port_t evtchn_from_irq(unsigned int irq) { const struct irq_info *info = NULL; @@ -433,9 +433,14 @@ unsigned int irq_from_evtchn(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(irq_from_evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq) +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn) { - return per_cpu(virq_to_irq, cpu)[virq]; + int irq = per_cpu(virq_to_irq, cpu)[virq]; + + *evtchn = evtchn_from_irq(irq); + + return irq; } static enum ipi_vector ipi_from_irq(unsigned irq) diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 4d3398eff9cd..19ae31695edc 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -33,7 +33,6 @@ struct evtchn_ops { extern const struct evtchn_ops *evtchn_ops; -int get_evtchn_to_irq(evtchn_port_t evtchn); void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); unsigned int cpu_from_evtchn(evtchn_port_t evtchn); diff --git a/include/xen/events.h b/include/xen/events.h index 23932b0673dc..7488cd51fbf4 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -101,8 +101,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout); /* Determine the IRQ which is bound to an event channel */ unsigned int irq_from_evtchn(evtchn_port_t evtchn); -int irq_from_virq(unsigned int cpu, unsigned int virq); -evtchn_port_t evtchn_from_irq(unsigned irq); +int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, + evtchn_port_t *evtchn); int xen_set_callback_via(uint64_t via); int xen_evtchn_do_upcall(void); From 666860d56d83a755f8148724e5bb59ee042db611 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 08:24:46 +0200 Subject: [PATCH 129/299] xen/events: remove some simple helpers from events_base.c [ Upstream commit 3bdb0ac350fe5e6301562143e4573971dd01ae0b ] The helper functions type_from_irq() and cpu_from_irq() are just one line functions used only internally. Open code them where needed. At the same time modify and rename get_evtchn_to_irq() to return a struct irq_info instead of the IRQ number. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup") Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 97 +++++++++++++------------------- 1 file changed, 38 insertions(+), 59 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 57dfb512cdc5..d3d750162838 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -248,15 +248,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) return 0; } -static int get_evtchn_to_irq(evtchn_port_t evtchn) -{ - if (evtchn >= xen_evtchn_max_channels()) - return -1; - if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) - return -1; - return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); -} - /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -274,6 +265,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +static struct irq_info *evtchn_to_info(evtchn_port_t evtchn) +{ + int irq; + + if (evtchn >= xen_evtchn_max_channels()) + return NULL; + if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) + return NULL; + irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); + + return (irq < 0) ? NULL : info_for_irq(irq); +} + /* Per CPU channel accounting */ static void channels_on_cpu_dec(struct irq_info *info) { @@ -429,7 +433,9 @@ static evtchn_port_t evtchn_from_irq(unsigned int irq) unsigned int irq_from_evtchn(evtchn_port_t evtchn) { - return get_evtchn_to_irq(evtchn); + struct irq_info *info = evtchn_to_info(evtchn); + + return info ? info->irq : -1; } EXPORT_SYMBOL_GPL(irq_from_evtchn); @@ -473,25 +479,11 @@ static unsigned pirq_from_irq(unsigned irq) return info->u.pirq.pirq; } -static enum xen_irq_type type_from_irq(unsigned irq) -{ - return info_for_irq(irq)->type; -} - -static unsigned cpu_from_irq(unsigned irq) -{ - return info_for_irq(irq)->cpu; -} - unsigned int cpu_from_evtchn(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - unsigned ret = 0; + struct irq_info *info = evtchn_to_info(evtchn); - if (irq != -1) - ret = cpu_from_irq(irq); - - return ret; + return info ? info->cpu : 0; } static void do_mask(struct irq_info *info, u8 reason) @@ -540,13 +532,12 @@ static bool pirq_needs_eoi_flag(unsigned irq) static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, bool force_affinity) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info = info_for_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); - BUG_ON(irq == -1); + BUG_ON(info == NULL); if (IS_ENABLED(CONFIG_SMP) && force_affinity) { - struct irq_data *data = irq_get_irq_data(irq); + struct irq_data *data = irq_get_irq_data(info->irq); irq_data_update_affinity(data, cpumask_of(cpu)); irq_data_update_effective_affinity(data, cpumask_of(cpu)); @@ -979,13 +970,13 @@ static void __unbind_from_irq(unsigned int irq) } if (VALID_EVTCHN(evtchn)) { - unsigned int cpu = cpu_from_irq(irq); + unsigned int cpu = info->cpu; struct xenbus_device *dev; if (!info->is_static) xen_evtchn_close(evtchn); - switch (type_from_irq(irq)) { + switch (info->type) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; break; @@ -1208,15 +1199,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, { int irq; int ret; + struct irq_info *info; if (evtchn >= xen_evtchn_max_channels()) return -ENOMEM; mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); + info = evtchn_to_info(evtchn); - if (irq == -1) { + if (!info) { irq = xen_allocate_irq_dynamic(); if (irq < 0) goto out; @@ -1239,9 +1231,9 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, */ bind_evtchn_to_cpu(evtchn, 0, false); } else { - struct irq_info *info = info_for_irq(irq); - if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) + if (!WARN_ON(info->type != IRQT_EVTCHN)) info->refcnt++; + irq = info->irq; } out: @@ -1579,13 +1571,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority); int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) { - int irq = get_evtchn_to_irq(evtchn); - struct irq_info *info; - - if (irq == -1) - return -ENOENT; - - info = info_for_irq(irq); + struct irq_info *info = evtchn_to_info(evtchn); if (!info) return -ENOENT; @@ -1601,7 +1587,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted); int evtchn_get(evtchn_port_t evtchn) { - int irq; struct irq_info *info; int err = -ENOENT; @@ -1610,11 +1595,7 @@ int evtchn_get(evtchn_port_t evtchn) mutex_lock(&irq_mapping_update_lock); - irq = get_evtchn_to_irq(evtchn); - if (irq == -1) - goto done; - - info = info_for_irq(irq); + info = evtchn_to_info(evtchn); if (!info) goto done; @@ -1634,10 +1615,11 @@ EXPORT_SYMBOL_GPL(evtchn_get); void evtchn_put(evtchn_port_t evtchn) { - int irq = get_evtchn_to_irq(evtchn); - if (WARN_ON(irq == -1)) + struct irq_info *info = evtchn_to_info(evtchn); + + if (WARN_ON(!info)) return; - unbind_from_irq(irq); + unbind_from_irq(info->irq); } EXPORT_SYMBOL_GPL(evtchn_put); @@ -1667,12 +1649,10 @@ struct evtchn_loop_ctrl { void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) { - int irq; - struct irq_info *info; + struct irq_info *info = evtchn_to_info(port); struct xenbus_device *dev; - irq = get_evtchn_to_irq(port); - if (irq == -1) + if (!info) return; /* @@ -1697,7 +1677,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } } - info = info_for_irq(irq); if (xchg_acquire(&info->is_active, 1)) return; @@ -1711,7 +1690,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) info->eoi_time = get_jiffies_64() + event_eoi_delay; } - generic_handle_irq(irq); + generic_handle_irq(info->irq); } int xen_evtchn_do_upcall(void) @@ -1769,7 +1748,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ - BUG_ON(get_evtchn_to_irq(evtchn) != -1); + BUG_ON(evtchn_to_info(evtchn)); /* Expect irq to have been bound before, so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); From 40f14760da839620d5a1d257205a514aac0ac2b1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 27 Sep 2023 10:29:02 +0200 Subject: [PATCH 130/299] xen/events: drop xen_allocate_irqs_dynamic() [ Upstream commit 5dd9ad32d7758b1a76742f394acf0eb3ac8a636a ] Instead of having a common function for allocating a single IRQ or a consecutive number of IRQs, split up the functionality into the callers of xen_allocate_irqs_dynamic(). This allows to handle any allocation error in xen_irq_init() gracefully instead of panicing the system. Let xen_irq_init() return the irq_info pointer or NULL in case of an allocation error. Additionally set the IRQ into irq_info already at allocation time, as otherwise the IRQ would be '0' (which is a valid IRQ number) until being set. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup") Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 78 +++++++++++++++++++------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d3d750162838..4dfd68382465 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -304,6 +304,13 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void xen_irq_free_desc(unsigned int irq) +{ + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + static void delayed_free_irq(struct work_struct *work) { struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, @@ -315,9 +322,7 @@ static void delayed_free_irq(struct work_struct *work) kfree(info); - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq >= nr_legacy_irqs()) - irq_free_desc(irq); + xen_irq_free_desc(irq); } /* Constructors for packed IRQ information. */ @@ -332,7 +337,6 @@ static int xen_irq_info_common_setup(struct irq_info *info, BUG_ON(info->type != IRQT_UNBOUND && info->type != type); info->type = type; - info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; info->mask_reason = EVT_MASK_REASON_EXPLICIT; @@ -733,45 +737,43 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); -static void xen_irq_init(unsigned irq) +static struct irq_info *xen_irq_init(unsigned int irq) { struct irq_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); - if (info == NULL) - panic("Unable to allocate metadata for IRQ%d\n", irq); + if (info) { + info->irq = irq; + info->type = IRQT_UNBOUND; + info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); - info->type = IRQT_UNBOUND; - info->refcnt = -1; - INIT_RCU_WORK(&info->rwork, delayed_free_irq); + set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_info_for_irq(irq, info); - /* - * Interrupt affinity setting can be immediate. No point - * in delaying it until an interrupt is handled. - */ - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - - INIT_LIST_HEAD(&info->eoi_list); - list_add_tail(&info->list, &xen_irq_list_head); -} - -static int __must_check xen_allocate_irqs_dynamic(int nvec) -{ - int i, irq = irq_alloc_descs(-1, 0, nvec, -1); - - if (irq >= 0) { - for (i = 0; i < nvec; i++) - xen_irq_init(irq + i); + INIT_LIST_HEAD(&info->eoi_list); + list_add_tail(&info->list, &xen_irq_list_head); } - return irq; + return info; } static inline int __must_check xen_allocate_irq_dynamic(void) { + int irq = irq_alloc_desc_from(0, -1); - return xen_allocate_irqs_dynamic(1); + if (irq >= 0) { + if (!xen_irq_init(irq)) { + xen_irq_free_desc(irq); + irq = -1; + } + } + + return irq; } static int __must_check xen_allocate_irq_gsi(unsigned gsi) @@ -793,7 +795,10 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) else irq = irq_alloc_desc_at(gsi, -1); - xen_irq_init(irq); + if (!xen_irq_init(irq)) { + xen_irq_free_desc(irq); + irq = -1; + } return irq; } @@ -963,6 +968,11 @@ static void __unbind_from_irq(unsigned int irq) evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = info_for_irq(irq); + if (!info) { + xen_irq_free_desc(irq); + return; + } + if (info->refcnt > 0) { info->refcnt--; if (info->refcnt != 0) @@ -1101,11 +1111,14 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, mutex_lock(&irq_mapping_update_lock); - irq = xen_allocate_irqs_dynamic(nvec); + irq = irq_alloc_descs(-1, 0, nvec, -1); if (irq < 0) goto out; for (i = 0; i < nvec; i++) { + if (!xen_irq_init(irq + i)) + goto error_irq; + irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, @@ -1753,6 +1766,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); + info->irq = irq; (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); From 636ac94cee721c7030f54c9e8134bdaa74780fa3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 28 Sep 2023 09:09:52 +0200 Subject: [PATCH 131/299] xen/events: modify internal [un]bind interfaces [ Upstream commit 3fcdaf3d7634338c3f5cbfa7451eb0b6b0024844 ] Modify the internal bind- and unbind-interfaces to take a struct irq_info parameter. When allocating a new IRQ pass the pointer from the allocating function further up. This will reduce the number of info_for_irq() calls and make the code more efficient. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Signed-off-by: Juergen Gross Stable-dep-of: fa765c4b4aed ("xen/events: close evtchn after mapping cleanup") Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 259 +++++++++++++++---------------- 1 file changed, 124 insertions(+), 135 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 4dfd68382465..6f57ef78f550 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -327,7 +327,6 @@ static void delayed_free_irq(struct work_struct *work) /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, - unsigned irq, enum xen_irq_type type, evtchn_port_t evtchn, unsigned short cpu) @@ -342,23 +341,22 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->mask_reason = EVT_MASK_REASON_EXPLICIT; raw_spin_lock_init(&info->lock); - ret = set_evtchn_to_irq(evtchn, irq); + ret = set_evtchn_to_irq(evtchn, info->irq); if (ret < 0) return ret; - irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); + irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN); return xen_evtchn_port_setup(evtchn); } -static int xen_irq_info_evtchn_setup(unsigned irq, +static int xen_irq_info_evtchn_setup(struct irq_info *info, evtchn_port_t evtchn, struct xenbus_device *dev) { - struct irq_info *info = info_for_irq(irq); int ret; - ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); + ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0); info->u.interdomain = dev; if (dev) atomic_inc(&dev->event_channels); @@ -366,50 +364,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq, return ret; } -static int xen_irq_info_ipi_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - enum ipi_vector ipi) +static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, enum ipi_vector ipi) { - struct irq_info *info = info_for_irq(irq); - info->u.ipi = ipi; - per_cpu(ipi_to_irq, cpu)[ipi] = irq; + per_cpu(ipi_to_irq, cpu)[ipi] = info->irq; per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; - return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0); } -static int xen_irq_info_virq_setup(unsigned cpu, - unsigned irq, - evtchn_port_t evtchn, - unsigned virq) +static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu, + evtchn_port_t evtchn, unsigned int virq) { - struct irq_info *info = info_for_irq(irq); - info->u.virq = virq; - per_cpu(virq_to_irq, cpu)[virq] = irq; + per_cpu(virq_to_irq, cpu)[virq] = info->irq; - return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0); } -static int xen_irq_info_pirq_setup(unsigned irq, - evtchn_port_t evtchn, - unsigned pirq, - unsigned gsi, - uint16_t domid, - unsigned char flags) +static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn, + unsigned int pirq, unsigned int gsi, + uint16_t domid, unsigned char flags) { - struct irq_info *info = info_for_irq(irq); - info->u.pirq.pirq = pirq; info->u.pirq.gsi = gsi; info->u.pirq.domid = domid; info->u.pirq.flags = flags; - return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0); + return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0); } static void xen_irq_info_cleanup(struct irq_info *info) @@ -453,20 +438,16 @@ int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, return irq; } -static enum ipi_vector ipi_from_irq(unsigned irq) +static enum ipi_vector ipi_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_IPI); return info->u.ipi; } -static unsigned virq_from_irq(unsigned irq) +static unsigned int virq_from_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - BUG_ON(info == NULL); BUG_ON(info->type != IRQT_VIRQ); @@ -533,13 +514,9 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, +static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu, bool force_affinity) { - struct irq_info *info = evtchn_to_info(evtchn); - - BUG_ON(info == NULL); - if (IS_ENABLED(CONFIG_SMP) && force_affinity) { struct irq_data *data = irq_get_irq_data(info->irq); @@ -547,7 +524,7 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, irq_data_update_effective_affinity(data, cpumask_of(cpu)); } - xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu); channels_on_cpu_dec(info); info->cpu = cpu; @@ -762,23 +739,24 @@ static struct irq_info *xen_irq_init(unsigned int irq) return info; } -static inline int __must_check xen_allocate_irq_dynamic(void) +static struct irq_info *xen_allocate_irq_dynamic(void) { int irq = irq_alloc_desc_from(0, -1); + struct irq_info *info = NULL; if (irq >= 0) { - if (!xen_irq_init(irq)) { + info = xen_irq_init(irq); + if (!info) xen_irq_free_desc(irq); - irq = -1; - } } - return irq; + return info; } -static int __must_check xen_allocate_irq_gsi(unsigned gsi) +static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi) { int irq; + struct irq_info *info; /* * A PV guest has no concept of a GSI (since it has no ACPI @@ -795,18 +773,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) else irq = irq_alloc_desc_at(gsi, -1); - if (!xen_irq_init(irq)) { + info = xen_irq_init(irq); + if (!info) xen_irq_free_desc(irq); - irq = -1; - } - return irq; + return info; } -static void xen_free_irq(unsigned irq) +static void xen_free_irq(struct irq_info *info) { - struct irq_info *info = info_for_irq(irq); - if (WARN_ON(!info)) return; @@ -897,7 +872,7 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0, false); + bind_evtchn_to_cpu(info, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -963,10 +938,9 @@ int xen_irq_from_gsi(unsigned gsi) } EXPORT_SYMBOL_GPL(xen_irq_from_gsi); -static void __unbind_from_irq(unsigned int irq) +static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); - struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn; if (!info) { xen_irq_free_desc(irq); @@ -979,6 +953,8 @@ static void __unbind_from_irq(unsigned int irq) return; } + evtchn = info->evtchn; + if (VALID_EVTCHN(evtchn)) { unsigned int cpu = info->cpu; struct xenbus_device *dev; @@ -988,11 +964,11 @@ static void __unbind_from_irq(unsigned int irq) switch (info->type) { case IRQT_VIRQ: - per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; + per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1; break; case IRQT_IPI: - per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; - per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(irq)] = 0; + per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1; + per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0; break; case IRQT_EVTCHN: dev = info->u.interdomain; @@ -1006,7 +982,7 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); } - xen_free_irq(irq); + xen_free_irq(info); } /* @@ -1022,24 +998,24 @@ static void __unbind_from_irq(unsigned int irq) int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) { - int irq; + struct irq_info *info; struct physdev_irq irq_op; int ret; mutex_lock(&irq_mapping_update_lock); - irq = xen_irq_from_gsi(gsi); - if (irq != -1) { + ret = xen_irq_from_gsi(gsi); + if (ret != -1) { pr_info("%s: returning irq %d for gsi %u\n", - __func__, irq, gsi); + __func__, ret, gsi); goto out; } - irq = xen_allocate_irq_gsi(gsi); - if (irq < 0) + info = xen_allocate_irq_gsi(gsi); + if (!info) goto out; - irq_op.irq = irq; + irq_op.irq = info->irq; irq_op.vector = 0; /* Only the privileged domain can do this. For non-priv, the pcifront @@ -1047,20 +1023,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * this in the priv domain. */ if (xen_initial_domain() && HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { - xen_free_irq(irq); - irq = -ENOSPC; + xen_free_irq(info); + ret = -ENOSPC; goto out; } - ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF, + ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF, shareable ? PIRQ_SHAREABLE : 0); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } - pirq_query_unmask(irq); + pirq_query_unmask(info->irq); /* We try to use the handler with the appropriate semantic for the * type of interrupt: if the interrupt is an edge triggered * interrupt we use handle_edge_irq. @@ -1077,16 +1052,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, * is the right choice either way. */ if (shareable) - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_fasteoi_irq, name); else - irq_set_chip_and_handler_name(irq, &xen_pirq_chip, + irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip, handle_edge_irq, name); + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } #ifdef CONFIG_PCI_MSI @@ -1108,6 +1085,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, int pirq, int nvec, const char *name, domid_t domid) { int i, irq, ret; + struct irq_info *info; mutex_lock(&irq_mapping_update_lock); @@ -1116,12 +1094,13 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, goto out; for (i = 0; i < nvec; i++) { - if (!xen_irq_init(irq + i)) + info = xen_irq_init(irq + i); + if (!info) goto error_irq; irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); - ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid, + ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid, i == 0 ? 0 : PIRQ_MSI_GROUP); if (ret < 0) goto error_irq; @@ -1133,9 +1112,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, out: mutex_unlock(&irq_mapping_update_lock); return irq; + error_irq: - while (nvec--) - __unbind_from_irq(irq + nvec); + while (nvec--) { + info = info_for_irq(irq + nvec); + __unbind_from_irq(info, irq + nvec); + } mutex_unlock(&irq_mapping_update_lock); return ret; } @@ -1171,7 +1153,7 @@ int xen_destroy_irq(int irq) } } - xen_free_irq(irq); + xen_free_irq(info); out: mutex_unlock(&irq_mapping_update_lock); @@ -1210,8 +1192,7 @@ EXPORT_SYMBOL_GPL(xen_pirq_from_irq); static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, struct xenbus_device *dev) { - int irq; - int ret; + int ret = -ENOMEM; struct irq_info *info; if (evtchn >= xen_evtchn_max_channels()) @@ -1222,17 +1203,16 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, info = evtchn_to_info(evtchn); if (!info) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, chip, + irq_set_chip_and_handler_name(info->irq, chip, handle_edge_irq, "event"); - ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); + ret = xen_irq_info_evtchn_setup(info, evtchn, dev); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* @@ -1242,17 +1222,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, * affinity setting is not invoked on them so nothing would * bind the channel. */ - bind_evtchn_to_cpu(evtchn, 0, false); - } else { - if (!WARN_ON(info->type != IRQT_EVTCHN)) - info->refcnt++; - irq = info->irq; + bind_evtchn_to_cpu(info, 0, false); + } else if (!WARN_ON(info->type != IRQT_EVTCHN)) { + info->refcnt++; } + ret = info->irq; + out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } int bind_evtchn_to_irq(evtchn_port_t evtchn) @@ -1271,18 +1251,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; - int ret, irq; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(ipi_to_irq, cpu)[ipi]; + ret = per_cpu(ipi_to_irq, cpu)[ipi]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -1291,25 +1272,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) BUG(); evtchn = bind_ipi.port; - ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } /* * Force the affinity mask to the target CPU so proc shows * the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, true); + bind_evtchn_to_cpu(info, cpu, true); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_IPI); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, @@ -1377,22 +1358,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn = 0; - int irq, ret; + struct irq_info *info; + int ret; mutex_lock(&irq_mapping_update_lock); - irq = per_cpu(virq_to_irq, cpu)[virq]; + ret = per_cpu(virq_to_irq, cpu)[virq]; - if (irq == -1) { - irq = xen_allocate_irq_dynamic(); - if (irq < 0) + if (ret == -1) { + info = xen_allocate_irq_dynamic(); + if (!info) goto out; if (percpu) - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip, handle_percpu_irq, "virq"); else - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip, handle_edge_irq, "virq"); bind_virq.virq = virq; @@ -1407,10 +1389,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) BUG_ON(ret < 0); } - ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq); if (ret < 0) { - __unbind_from_irq(irq); - irq = ret; + __unbind_from_irq(info, info->irq); goto out; } @@ -1418,22 +1399,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) * Force the affinity mask for percpu interrupts so proc * shows the correct target. */ - bind_evtchn_to_cpu(evtchn, cpu, percpu); + bind_evtchn_to_cpu(info, cpu, percpu); + ret = info->irq; } else { - struct irq_info *info = info_for_irq(irq); + info = info_for_irq(ret); WARN_ON(info == NULL || info->type != IRQT_VIRQ); } out: mutex_unlock(&irq_mapping_update_lock); - return irq; + return ret; } static void unbind_from_irq(unsigned int irq) { + struct irq_info *info; + mutex_lock(&irq_mapping_update_lock); - __unbind_from_irq(irq); + info = info_for_irq(irq); + __unbind_from_irq(info, irq); mutex_unlock(&irq_mapping_update_lock); } @@ -1767,11 +1752,11 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) BUG_ON(info->type == IRQT_UNBOUND); info->irq = irq; - (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); + (void)xen_irq_info_evtchn_setup(info, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu, false); + bind_evtchn_to_cpu(info, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1805,7 +1790,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu, false); + bind_evtchn_to_cpu(info, tcpu, false); do_unmask(info, EVT_MASK_REASON_TEMPORARY); @@ -1956,7 +1941,7 @@ static void restore_pirqs(void) if (rc) { pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n", gsi, irq, pirq, rc); - xen_free_irq(irq); + xen_free_irq(info); continue; } @@ -1970,13 +1955,15 @@ static void restore_cpu_virqs(unsigned int cpu) { struct evtchn_bind_virq bind_virq; evtchn_port_t evtchn; + struct irq_info *info; int virq, irq; for (virq = 0; virq < NR_VIRQS; virq++) { if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(virq_from_irq(irq) != virq); + BUG_ON(virq_from_irq(info) != virq); /* Get a new binding from Xen. */ bind_virq.virq = virq; @@ -1987,9 +1974,9 @@ static void restore_cpu_virqs(unsigned int cpu) evtchn = bind_virq.port; /* Record the new mapping. */ - (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); + xen_irq_info_virq_setup(info, cpu, evtchn, virq); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } @@ -1997,13 +1984,15 @@ static void restore_cpu_ipis(unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; evtchn_port_t evtchn; + struct irq_info *info; int ipi, irq; for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue; + info = info_for_irq(irq); - BUG_ON(ipi_from_irq(irq) != ipi); + BUG_ON(ipi_from_irq(info) != ipi); /* Get a new binding from Xen. */ bind_ipi.vcpu = xen_vcpu_nr(cpu); @@ -2013,9 +2002,9 @@ static void restore_cpu_ipis(unsigned int cpu) evtchn = bind_ipi.port; /* Record the new mapping. */ - (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); + xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); /* The affinity mask is still valid */ - bind_evtchn_to_cpu(evtchn, cpu, false); + bind_evtchn_to_cpu(info, cpu, false); } } From 20980195ec8d2e41653800c45c8c367fa1b1f2b4 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 24 Jan 2024 16:31:28 +0000 Subject: [PATCH 132/299] xen/events: close evtchn after mapping cleanup [ Upstream commit fa765c4b4aed2d64266b694520ecb025c862c5a9 ] shutdown_pirq and startup_pirq are not taking the irq_mapping_update_lock because they can't due to lock inversion. Both are called with the irq_desc->lock being taking. The lock order, however, is first irq_mapping_update_lock and then irq_desc->lock. This opens multiple races: - shutdown_pirq can be interrupted by a function that allocates an event channel: CPU0 CPU1 shutdown_pirq { xen_evtchn_close(e) __startup_pirq { EVTCHNOP_bind_pirq -> returns just freed evtchn e set_evtchn_to_irq(e, irq) } xen_irq_info_cleanup() { set_evtchn_to_irq(e, -1) } } Assume here event channel e refers here to the same event channel number. After this race the evtchn_to_irq mapping for e is invalid (-1). - __startup_pirq races with __unbind_from_irq in a similar way. Because __startup_pirq doesn't take irq_mapping_update_lock it can grab the evtchn that __unbind_from_irq is currently freeing and cleaning up. In this case even though the event channel is allocated, its mapping can be unset in evtchn_to_irq. The fix is to first cleanup the mappings and then close the event channel. In this way, when an event channel gets allocated it's potential previous evtchn_to_irq mappings are guaranteed to be unset already. This is also the reverse order of the allocation where first the event channel is allocated and then the mappings are setup. On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal [un]bind interfaces"), we hit a BUG like the following during probing of NVMe devices. The issue is that during nvme_setup_io_queues, pci_free_irq is called for every device which results in a call to shutdown_pirq. With many nvme devices it's therefore likely to hit this race during boot because there will be multiple calls to shutdown_pirq and startup_pirq are running potentially in parallel. ------------[ cut here ]------------ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled kernel BUG at drivers/xen/events/events_base.c:499! invalid opcode: 0000 [#1] SMP PTI CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 Workqueue: nvme-reset-wq nvme_reset_work RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_trace_log_lvl+0x1c1/0x2d9 ? show_trace_log_lvl+0x1c1/0x2d9 ? set_affinity_irq+0xdc/0x1c0 ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0x90/0x110 ? bind_evtchn_to_cpu+0xdf/0xf0 ? do_error_trap+0x65/0x80 ? bind_evtchn_to_cpu+0xdf/0xf0 ? exc_invalid_op+0x4e/0x70 ? bind_evtchn_to_cpu+0xdf/0xf0 ? asm_exc_invalid_op+0x12/0x20 ? bind_evtchn_to_cpu+0xdf/0xf0 ? bind_evtchn_to_cpu+0xc5/0xf0 set_affinity_irq+0xdc/0x1c0 irq_do_set_affinity+0x1d7/0x1f0 irq_setup_affinity+0xd6/0x1a0 irq_startup+0x8a/0xf0 __setup_irq+0x639/0x6d0 ? nvme_suspend+0x150/0x150 request_threaded_irq+0x10c/0x180 ? nvme_suspend+0x150/0x150 pci_request_irq+0xa8/0xf0 ? __blk_mq_free_request+0x74/0xa0 queue_request_irq+0x6f/0x80 nvme_create_queue+0x1af/0x200 nvme_create_io_queues+0xbd/0xf0 nvme_setup_io_queues+0x246/0x320 ? nvme_irq_check+0x30/0x30 nvme_reset_work+0x1c8/0x400 process_one_work+0x1b0/0x350 worker_thread+0x49/0x310 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Modules linked in: ---[ end trace a11715de1eee1873 ]--- Fixes: d46a78b05c0e ("xen: implement pirq type event channels") Cc: stable@vger.kernel.org Co-debugged-by: Andrew Panyakin Signed-off-by: Maximilian Heyne Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/events/events_base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6f57ef78f550..36ba3ef6ef01 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -908,8 +908,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -941,6 +941,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { evtchn_port_t evtchn; + bool close_evtchn = false; if (!info) { xen_irq_free_desc(irq); @@ -960,7 +961,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) struct xenbus_device *dev; if (!info->is_static) - xen_evtchn_close(evtchn); + close_evtchn = true; switch (info->type) { case IRQT_VIRQ: @@ -980,6 +981,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) } xen_irq_info_cleanup(info); + + if (close_evtchn) + xen_evtchn_close(evtchn); } xen_free_irq(info); From a2fef1d81becf4ff60e1a249477464eae3c3bc2a Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 1 Feb 2024 09:10:08 +0100 Subject: [PATCH 133/299] IB/hfi1: Fix sdma.h tx->num_descs off-by-one error commit e6f57c6881916df39db7d95981a8ad2b9c3458d6 upstream. Unfortunately the commit `fd8958efe877` introduced another error causing the `descs` array to overflow. This reults in further crashes easily reproducible by `sendmsg` system call. [ 1080.836473] general protection fault, probably for non-canonical address 0x400300015528b00a: 0000 [#1] PREEMPT SMP PTI [ 1080.869326] RIP: 0010:hfi1_ipoib_build_ib_tx_headers.constprop.0+0xe1/0x2b0 [hfi1] -- [ 1080.974535] Call Trace: [ 1080.976990] [ 1081.021929] hfi1_ipoib_send_dma_common+0x7a/0x2e0 [hfi1] [ 1081.027364] hfi1_ipoib_send_dma_list+0x62/0x270 [hfi1] [ 1081.032633] hfi1_ipoib_send+0x112/0x300 [hfi1] [ 1081.042001] ipoib_start_xmit+0x2a9/0x2d0 [ib_ipoib] [ 1081.046978] dev_hard_start_xmit+0xc4/0x210 -- [ 1081.148347] __sys_sendmsg+0x59/0xa0 crash> ipoib_txreq 0xffff9cfeba229f00 struct ipoib_txreq { txreq = { list = { next = 0xffff9cfeba229f00, prev = 0xffff9cfeba229f00 }, descp = 0xffff9cfeba229f40, coalesce_buf = 0x0, wait = 0xffff9cfea4e69a48, complete = 0xffffffffc0fe0760 , packet_len = 0x46d, tlen = 0x0, num_desc = 0x0, desc_limit = 0x6, next_descq_idx = 0x45c, coalesce_idx = 0x0, flags = 0x0, descs = {{ qw = {0x8024000120dffb00, 0x4} # SDMA_DESC0_FIRST_DESC_FLAG (bit 63) }, { qw = { 0x3800014231b108, 0x4} }, { qw = { 0x310000e4ee0fcf0, 0x8} }, { qw = { 0x3000012e9f8000, 0x8} }, { qw = { 0x59000dfb9d0000, 0x8} }, { qw = { 0x78000e02e40000, 0x8} }} }, sdma_hdr = 0x400300015528b000, <<< invalid pointer in the tx request structure sdma_status = 0x0, SDMA_DESC0_LAST_DESC_FLAG (bit 62) complete = 0x0, priv = 0x0, txq = 0xffff9cfea4e69880, skb = 0xffff9d099809f400 } If an SDMA send consists of exactly 6 descriptors and requires dword padding (in the 7th descriptor), the sdma_txreq descriptor array is not properly expanded and the packet will overflow into the container structure. This results in a panic when the send completion runs. The exact panic varies depending on what elements of the container structure get corrupted. The fix is to use the correct expression in _pad_sdma_tx_descs() to test the need to expand the descriptor array. With this patch the crashes are no longer reproducible and the machine is stable. Fixes: fd8958efe877 ("IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors") Cc: stable@vger.kernel.org Reported-by: Mats Kronberg Tested-by: Mats Kronberg Signed-off-by: Daniel Vacek Link: https://lore.kernel.org/r/20240201081009.1109442-1-neelx@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 26c62162759b..969c5c3ab859 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); From 48985d64c4c8105aa3497219df063e1489792e59 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:21:35 -0800 Subject: [PATCH 134/299] x86/bugs: Add asm helpers for executing VERW commit baf8361e54550a48a7087b603313ad013cc13386 upstream. MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as: 1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW). To fix this VERW needs to be moved very late in exit-to-user path. In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF. Reported-by: Alyssa Milburn Suggested-by: Andrew Cooper Suggested-by: Peter Zijlstra Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index bfb7bcb362bc..718c00367f9a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0091f1008314..e7b0554be04f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -97,7 +97,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 0396458c201f..a3db9647428b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -329,6 +329,17 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -545,6 +556,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** From 50297906f81c156674af46d05ad889f0a567eb7a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 19 Feb 2024 09:05:38 -0700 Subject: [PATCH 135/299] docs: Instruct LaTeX to cope with deeper nesting commit 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c upstream. The addition of the XFS online fsck documentation starting with commit a8f6c2e54ddc ("xfs: document the motivation for online fsck design") added a deeper level of nesting than LaTeX is prepared to deal with. That caused a pdfdocs build failure with the helpful "Too deeply nested" error message buried deeply in Documentation/output/filesystems.log. Increase the "maxlistdepth" parameter to instruct LaTeX that it needs to deal with the deeper nesting whether it wants to or not. Suggested-by: Akira Yokosawa Tested-by: Akira Yokosawa Cc: stable@vger.kernel.org # v6.4+ Link: https://lore.kernel.org/linux-doc/67f6ac60-7957-4b92-9d72-a08fbad0e028@gmail.com/ Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index d4fdf6a3875a..dfc19c915d5c 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -383,6 +383,12 @@ latex_elements = { verbatimhintsturnover=false, ''', + # + # Some of our authors are fond of deep nesting; tell latex to + # cope. + # + 'maxlistdepth': '10', + # For CJK One-half spacing, need to be in front of hyperref 'extrapackages': r'\usepackage{setspace}', From ea459e6926f00418dd2967c091c4c844e7b2a8ee Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 136/299] LoongArch: Call early_init_fdt_scan_reserved_mem() earlier commit 9fa304b9f8ec440e614af6d35826110c633c4074 upstream. The unflatten_and_copy_device_tree() function contains a call to memblock_alloc(). This means that memblock is allocating memory before any of the reserved memory regions are set aside in the arch_mem_init() function which calls early_init_fdt_scan_reserved_mem(). Therefore, there is a possibility for memblock to allocate from any of the reserved memory regions. Hence, move the call to early_init_fdt_scan_reserved_mem() to be earlier in the init sequence, so that the reserved memory regions are set aside before any allocations are done using memblock. Cc: stable@vger.kernel.org Fixes: 88d4d957edc707e ("LoongArch: Add FDT booting support from efi system table") Signed-off-by: Oreoluwa Babatunde Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index aed65915e932..d7409a3e67a5 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -367,6 +367,8 @@ void __init platform_init(void) acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif + + early_init_fdt_scan_reserved_mem(); unflatten_and_copy_device_tree(); #ifdef CONFIG_NUMA @@ -400,8 +402,6 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); - early_init_fdt_scan_reserved_mem(); - /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate From dffdf7c783ef291eef38a5a0037584fd1a7fa464 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 137/299] LoongArch: Disable IRQ before init_fn() for nonboot CPUs commit 1001db6c42e4012b55e5ee19405490f23e033b5a upstream. Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to silence such warnings (and also avoid potential errors due to unexpected interrupts): WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 ra: 90000000047bd56c tlb_init+0x24c/0x528 ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000000 (PPLV0 -PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071000 (LIE=12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 9000000006456888 9000000006456880 900000010039f950 0000000000000001 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 9000000006366000 900000000619a140 0000000000000000 0000000000000004 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 ... Call Trace: [<90000000047a4828>] show_stack+0x48/0x1a0 [<9000000005b61874>] dump_stack_lvl+0x84/0xcc [<90000000047f60ac>] __warn+0x8c/0x1e0 [<9000000005b0ab34>] report_bug+0x1b4/0x280 [<9000000005b63110>] do_bp+0x2d0/0x480 [<90000000047a2e20>] handle_bp+0x120/0x1c0 [<90000000048e3334>] rcu_cpu_starting+0x214/0x280 [<90000000047bd568>] tlb_init+0x248/0x528 [<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 [<90000000047a19f4>] cpu_probe+0x494/0xa00 [<90000000047b551c>] start_secondary+0x3c/0xc0 [<9000000005b66134>] smpboot_entry+0x50/0x58 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 2fbf541d7b4f..41dc36695f3e 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -334,6 +334,7 @@ void __noreturn arch_cpu_idle_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); From b1ec3d6b86fdd057559a5908e6668279bf770e0e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 138/299] LoongArch: Update cpu_sibling_map when disabling nonboot CPUs commit 752cd08da320a667a833803a8fd6bb266114cce5 upstream. Update cpu_sibling_map when disabling nonboot CPUs by defining & calling clear_cpu_sibling_map(), otherwise we get such errors on SMT systems: jump label: negative count! WARNING: CPU: 6 PID: 45 at kernel/jump_label.c:263 __static_key_slow_dec_cpuslocked+0xec/0x100 CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 pc 90000000004c302c ra 90000000004c302c tp 90000001005bc000 sp 90000001005bfd20 a0 000000000000001b a1 900000000224c278 a2 90000001005bfb58 a3 900000000224c280 a4 900000000224c278 a5 90000001005bfb50 a6 0000000000000001 a7 0000000000000001 t0 ce87a4763eb5234a t1 ce87a4763eb5234a t2 0000000000000000 t3 0000000000000000 t4 0000000000000006 t5 0000000000000000 t6 0000000000000064 t7 0000000000001964 t8 000000000009ebf6 u0 9000000001f2a068 s9 0000000000000000 s0 900000000246a2d8 s1 ffffffffffffffff s2 ffffffffffffffff s3 90000000021518c0 s4 0000000000000040 s5 9000000002151058 s6 9000000009828e40 s7 00000000000000b4 s8 0000000000000006 ra: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 ERA: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1c (LIE=2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 Stack : 0000000000000000 900000000203f258 900000000179afc8 90000001005bc000 90000001005bf980 0000000000000000 90000001005bf988 9000000001fe0be0 900000000224c280 900000000224c278 90000001005bf8c0 0000000000000001 0000000000000001 ce87a4763eb5234a 0000000007f38000 90000001003f8cc0 0000000000000000 0000000000000006 0000000000000000 4c206e6f73676e6f 6f4c203a656d616e 000000000009ec99 0000000007f38000 0000000000000000 900000000214b000 9000000001fe0be0 0000000000000004 0000000000000000 0000000000000107 0000000000000009 ffffffffffafdabe 00000000000000b4 0000000000000006 90000000004c302c 9000000000224528 00005555939a0c7c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c ... Call Trace: [<9000000000224528>] show_stack+0x48/0x1a0 [<900000000179afc8>] dump_stack_lvl+0x78/0xa0 [<9000000000263ed0>] __warn+0x90/0x1a0 [<90000000017419b8>] report_bug+0x1b8/0x280 [<900000000179c564>] do_bp+0x264/0x420 [<90000000004c302c>] __static_key_slow_dec_cpuslocked+0xec/0x100 [<90000000002b4d7c>] sched_cpu_deactivate+0x2fc/0x300 [<9000000000266498>] cpuhp_invoke_callback+0x178/0x8a0 [<9000000000267f70>] cpuhp_thread_fun+0xf0/0x240 [<90000000002a117c>] smpboot_thread_fn+0x1dc/0x2e0 [<900000000029a720>] kthread+0x140/0x160 [<9000000000222288>] ret_from_kernel_thread+0xc/0xa4 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/smp.c | 121 ++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 53 deletions(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 41dc36695f3e..378ffa78ffeb 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec) } } +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); + } + } +} + +static inline void clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_clear_cpu(i, &cpu_sibling_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_sibling_map[i]); + } + } + + cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); +} + +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpus_are_siblings(i, k)) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); +} + /* Send mailbox buffer via Mail_Send */ static void csr_mail_send(uint64_t data, int cpu, int mailbox) { @@ -300,6 +367,7 @@ int loongson_cpu_disable(void) numa_remove_cpu(cpu); #endif set_cpu_online(cpu, false); + clear_cpu_sibling_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); irq_migrate_all_off_this_cpu(); @@ -377,59 +445,6 @@ static int __init ipi_pm_init(void) core_initcall(ipi_pm_init); #endif -static inline void set_cpu_sibling_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - - for_each_cpu(i, &cpu_sibling_setup_map) { - if (cpus_are_siblings(cpu, i)) { - cpumask_set_cpu(i, &cpu_sibling_map[cpu]); - cpumask_set_cpu(cpu, &cpu_sibling_map[i]); - } - } -} - -static inline void set_cpu_core_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_core_setup_map); - - for_each_cpu(i, &cpu_core_setup_map) { - if (cpu_data[cpu].package == cpu_data[i].package) { - cpumask_set_cpu(i, &cpu_core_map[cpu]); - cpumask_set_cpu(cpu, &cpu_core_map[i]); - } - } -} - -/* - * Calculate a new cpu_foreign_map mask whenever a - * new cpu appears or disappears. - */ -void calculate_cpu_foreign_map(void) -{ - int i, k, core_present; - cpumask_t temp_foreign_map; - - /* Re-calculate the mask */ - cpumask_clear(&temp_foreign_map); - for_each_online_cpu(i) { - core_present = 0; - for_each_cpu(k, &temp_foreign_map) - if (cpus_are_siblings(i, k)) - core_present = 1; - if (!core_present) - cpumask_set_cpu(i, &temp_foreign_map); - } - - for_each_online_cpu(i) - cpumask_andnot(&cpu_foreign_map[i], - &temp_foreign_map, &cpu_sibling_map[i]); -} - /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { From 0bb020dca6d8f195b100fcd216b543f922f74e5d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 7 Feb 2024 10:00:42 +1030 Subject: [PATCH 139/299] btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size commit e42b9d8b9ea2672811285e6a7654887ff64d23f3 upstream. [BUG] With the following file extent layout, defrag would do unnecessary IO and result more on-disk space usage. # mkfs.btrfs -f $dev # mount $dev $mnt # xfs_io -f -c "pwrite 0 40m" $mnt/foobar # sync # xfs_io -f -c "pwrite 40m 16k" $mnt/foobar # sync Above command would lead to the following file extent layout: item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 41943040 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 41943040) itemoff 15763 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Which is mostly fine. We can allow the final 16K to be merged with the previous 40M, but it's upon the end users' preference. But if we defrag the file using the default parameters, it would result worse file layout: # btrfs filesystem defrag $mnt/foobar # sync item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 8650752 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 8650752) itemoff 15763 itemsize 53 generation 9 type 1 (regular) extent data disk byte 340787200 nr 33292288 extent data offset 0 nr 33292288 ram 33292288 extent compression 0 (none) item 8 key (257 EXTENT_DATA 41943040) itemoff 15710 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Note the original 40M extent is still there, but a new 32M extent is created for no benefit at all. [CAUSE] There is an existing check to make sure we won't defrag a large enough extent (the threshold is by default 32M). But the check is using the length to the end of the extent: range_len = em->len - (cur - em->start); /* Skip too large extent */ if (range_len >= extent_thresh) goto next; This means, for the first 8MiB of the extent, the range_len is always smaller than the default threshold, and would not be defragged. But after the first 8MiB, the remaining part would fit the requirement, and be defragged. Such different behavior inside the same extent caused the above problem, and we should avoid different defrag decision inside the same extent. [FIX] Instead of using @range_len, just use @em->len, so that we have a consistent decision among the same file extent. Now with this fix, we won't touch the extent, thus not making it any worse. Reported-by: Filipe Manana Fixes: 0cb5950f3f3b ("btrfs: fix deadlock when reserving space during defrag") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Boris Burkov Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/defrag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index f2ff4cbe8656..a2e614cf3c19 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -903,7 +903,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (range_len >= extent_thresh) + if (em->len >= extent_thresh) goto next; /* From 47bacc3c7fbbf573415c7ce1e7223bed3d691bfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Feb 2024 08:33:24 +0100 Subject: [PATCH 140/299] drm/ttm: Fix an invalid freeing on already freed page in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 40510a941d27d405a82dc3320823d875f94625df upstream. If caching mode change fails due to, for example, OOM we free the allocated pages in a two-step process. First the pages for which the caching change has already succeeded. Secondly the pages for which a caching change did not succeed. However the second step was incorrectly freeing the pages already freed in the first step. Fix. Signed-off-by: Thomas Hellström Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Cc: # v6.4+ Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index cddb9151d20f..9b60222511d6 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -384,7 +384,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; From d715ee6cbe7ca795a4297feebd0160b03699fde0 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 15 Feb 2024 23:04:42 +0100 Subject: [PATCH 141/299] drm/meson: Don't remove bridges which are created by other drivers commit bd915ae73a2d78559b376ad2caf5e4ef51de2455 upstream. Stop calling drm_bridge_remove() for bridges allocated/managed by other drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}. drm_bridge_remove() unregisters the bridge so it cannot be used anymore. Doing so for bridges we don't own can lead to the video pipeline not being able to come up after -EPROBE_DEFER of the VPU because we're unregistering a bridge that's managed by another driver. The other driver doesn't know that we have unregistered it's bridge and on subsequent .probe() we're not able to find those bridges anymore (since nobody re-creates them). This fixes probe errors on Meson8b boards with the CVBS outputs enabled. Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time") Fixes: 42dcf15f901c ("drm/meson: add DSI encoder") Cc: Reported-by: Steve Morvai Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Tested-by: Steve Morvai Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 - drivers/gpu/drm/meson/meson_encoder_dsi.c | 1 - drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e..3407450435e2 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c index 3f93c70488ca..311b91630fbe 100644 --- a/drivers/gpu/drm/meson/meson_encoder_dsi.c +++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c @@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_DSI]) { meson_encoder_dsi = priv->encoders[MESON_ENC_DSI]; drm_bridge_remove(&meson_encoder_dsi->bridge); - drm_bridge_remove(meson_encoder_dsi->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 25ea76558690..c4686568c9ca 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } From fec5aea66916069d67b27e70ea88c99f15dc72fa Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 2 Feb 2024 17:34:11 +0800 Subject: [PATCH 142/299] drm/amd/display: adjust few initialization order in dm commit 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb upstream. [Why] Observe error message "Can't retrieve aconnector in hpd_rx_irq_offload_work" when boot up with a mst tbt4 dock connected. After analyzing, there are few parts needed to be adjusted: 1. hpd_rx_offload_wq[].aconnector is not initialzed before the dmub outbox hpd_irq handler get registered which causes the error message. 2. registeration of hpd and hpd_rx_irq event for usb4 dp tunneling is not aligned with legacy interface sequence [How] Put DMUB_NOTIFICATION_HPD and DMUB_NOTIFICATION_HPD_IRQ handler registration into register_hpd_handlers() to align other interfaces and get hpd_rx_offload_wq[].aconnector initialized earlier than that. Leave DMUB_NOTIFICATION_AUX_REPLY registered as it was since we need that while calling dc_link_detect(). USB4 connection status will be proactively detected by dc_link_detect_connection_type() in amdgpu_dm_initialize_drm_device() Cc: Stable Reviewed-by: Aurabindo Pillai Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 83c263e2d717..3194c10f345f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1816,21 +1816,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub aux callback"); goto error; } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - } - - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. - * It is expected that DMUB will resend any pending notifications at this point, for - * example HPD from DPIA. - */ - if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point. Note + * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to + * align legacy interface initialization sequence. Connection status will be proactivly + * detected once in the amdgpu_dm_initialize_drm_device. + */ dc_enable_dmub_outbox(adev->dm.dc); /* DPIA trace goes to dmesg logs only if outbox is enabled */ @@ -3484,6 +3475,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev) int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT; int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT; + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + } + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { @@ -3509,10 +3508,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev) handle_hpd_rx_irq, (void *) aconnector); } - - if (adev->dm.hpd_rx_offload_wq) - adev->dm.hpd_rx_offload_wq[connector->index].aconnector = - aconnector; } } @@ -4481,6 +4476,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) link = dc_get_link_at_index(dm->dc, i); + if (dm->hpd_rx_offload_wq) + dm->hpd_rx_offload_wq[aconnector->base.index].aconnector = + aconnector; + if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); From cf245e831afc725428cd4482788b75cd103fb236 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 14 Feb 2024 16:06:28 +0100 Subject: [PATCH 143/299] s390/cio: fix invalid -EBUSY on ccw_device_start commit 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 upstream. The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code when drivers try to start I/O while a path-verification (PV) process is pending. This can lead to failed device initialization attempts with symptoms like broken network connectivity after boot. Fix this by replacing the -EBUSY return code with a deferred condition code 1 reply to make path-verification handling consistent from a driver's point of view. The problem can be reproduced semi-regularly using the following process, while repeating steps 2-3 as necessary (example assumes an OSA device with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02): 1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group 2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online 3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \ echo on > /sys/devices/css0/chp0.02/status Background information: The common I/O layer starts path-verification I/Os when it receives indications about changes in a device path's availability. This occurs for example when hardware events indicate a change in channel-path status, or when a manual operation such as a CHPID vary or configure operation is performed. If a driver attempts to start I/O while a PV is running, CIO reports a successful I/O start (ccw_device_start() return code 0). Then, after completion of PV, CIO synthesizes an interrupt response that indicates an asynchronous status condition that prevented the start of the I/O (deferred condition code 1). If a PV indication arrives while a device is busy with driver-owned I/O, PV is delayed until after I/O completion was reported to the driver's interrupt handler. To ensure that PV can be started eventually, CIO reports a device busy condition (ccw_device_start() return code -EBUSY) if a driver tries to start another I/O while PV is pending. In some cases this -EBUSY return code causes device drivers to consider a device not operational, resulting in failed device initialization. Note: The code that introduced the problem was added in 2003. Symptoms started appearing with the following CIO commit that causes a PV indication when a device is removed from the cio_ignore list after the associated parent subchannel device was probed, but before online processing of the CCW device has started: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") During boot, the cio_ignore list is modified by the cio_ignore dracut module [1] as well as Linux vendor-specific systemd service scripts[2]. When combined, this commit and boot scripts cause a frequent occurrence of the problem during boot. [1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore [2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service Cc: stable@vger.kernel.org # v5.15+ Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Tested-By: Thorsten Winkler Reviewed-by: Thorsten Winkler Signed-off-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/s390/cio/device_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6b..a5dba3829769 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) From 699e2648e1957caba979f75d0095b3464e880513 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Jan 2024 20:51:22 +0900 Subject: [PATCH 144/299] ata: libata-core: Do not try to set sleeping devices to standby commit 4b085736e44dbbe69b5eea1a8a294f404678a1f4 upstream. In ata ata_dev_power_set_standby(), check that the target device is not sleeping. If it is, there is no need to do anything. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d8cc1e27a125..f12beeb96629 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2034,6 +2034,10 @@ void ata_dev_power_set_active(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Issue READ VERIFY SECTORS command for 1 sector at lba=0 only * if supported by the device. From e7e23fc5d5fe422827c9a43ecb579448f73876c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: [PATCH 145/299] fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio commit b820de741ae48ccf50dd95e297889c286ff4f760 upstream. If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index f8589caef9c1..3235d4e6cc62 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -594,6 +594,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1463,7 +1470,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/include/linux/fs.h b/include/linux/fs.h index d08b97dacd2d..6e558264b4ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -352,6 +352,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ From e6316749d603fe9c4c91f6ec3694e06e4de632a3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 8 Feb 2024 07:30:10 -0800 Subject: [PATCH 146/299] lib/Kconfig.debug: TEST_IOV_ITER depends on MMU commit 1eb1e984379e2da04361763f66eec90dd75cf63e upstream. Trying to run the iov_iter unit test on a nommu system such as the qemu kc705-nommu emulation results in a crash. KTAP version 1 # Subtest: iov_iter # module: kunit_iov_iter 1..9 BUG: failure at mm/nommu.c:318/vmap()! Kernel panic - not syncing: BUG! The test calls vmap() directly, but vmap() is not supported on nommu systems, causing the crash. TEST_IOV_ITER therefore needs to depend on MMU. Link: https://lkml.kernel.org/r/20240208153010.1439753-1-linux@roeck-us.net Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator") Signed-off-by: Guenter Roeck Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1331f3186f2a..d2f73bb4121b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2225,6 +2225,7 @@ config TEST_DIV64 config TEST_IOV_ITER tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS depends on KUNIT + depends on MMU default KUNIT_ALL_TESTS help Enable this to turn on testing of the operation of the I/O iterator From 0f6cf136974a2493e347fa62b8cb0b1782a8e8fe Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:31:11 +0100 Subject: [PATCH 147/299] dm-crypt: recheck the integrity tag after a failure commit 42e15d12070b4ff9af2b980f1b65774c2dab0507 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-crypt reports an authentication error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-crypt, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1a539ec81bac..1777198a24b3 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -62,6 +62,8 @@ struct convert_context { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -82,6 +84,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1376,10 +1380,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1763,6 +1770,8 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; @@ -1774,6 +1783,8 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_queue_read(struct dm_crypt_io *io); + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1787,6 +1798,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1822,15 +1842,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1851,6 +1875,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2113,6 +2153,14 @@ dec: static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2142,11 +2190,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2188,10 +2242,13 @@ static void kcryptd_async_done(void *data, int error) if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -3117,7 +3174,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } From 78d41d9ba0b1fe60fc72e8e0835a2252692bfe9f Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 5 Feb 2024 10:39:20 +0100 Subject: [PATCH 148/299] Revert "parisc: Only list existing CPUs in cpu_possible_mask" commit 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 upstream. This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226. It broke CPU hotplugging because it modifies the __cpu_possible_mask after bootup, so that it will be different than nr_cpu_ids, which then effictively breaks the workqueue setup code and triggers crashes when shutting down CPUs at runtime. Guenter was the first who noticed the wrong values in __cpu_possible_mask, since the cpumask Kunit tests were failig. Reverting this commit fixes both issues, but sadly brings back this uncritical runtime warning: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Reported-by: Guenter Roeck Link: https://lkml.org/lkml/2024/2/4/146 Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/ Cc: stable@vger.kernel.org # 6.0+ Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/processor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 1fc89fa2c2d2..e37c48770b58 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -172,7 +172,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } From d6824a28b244e8a750952848e4bd2167e1e9a17e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:27:39 +0100 Subject: [PATCH 149/299] dm-integrity: recheck the integrity tag after a failure commit c88f5e553fe38b2ffc4c33d08654e5281b297677 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-integrity reports an error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-integrity, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 9261bbebd662..1ef6ca680f8f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -278,6 +278,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1699,6 +1701,79 @@ failed: get_random_bytes(result, ic->tag_size); } +static void integrity_recheck(struct dm_integrity_io *dio) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, + checksum_onstack); + r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1786,15 +1861,8 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4271,6 +4339,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4619,6 +4693,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); From 64ba01a365980755732972523600a961c4266b75 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:30:10 +0100 Subject: [PATCH 150/299] dm-crypt: don't modify the data when using authenticated encryption commit 50c70240097ce41fe6bce6478b80478281e4d0f7 upstream. It was said that authenticated encryption could produce invalid tag when the data that is being encrypted is modified [1]. So, fix this problem by copying the data into the clone bio first and then encrypt them inside the clone bio. This may reduce performance, but it is needed to prevent the user from corrupting the device by writing data with O_DIRECT and modifying them at the same time. [1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-crypt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1777198a24b3..5a296db23feb 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2117,6 +2117,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); From e5cc2309f6b328716ae86924a321d5622906ed78 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:28:09 +0100 Subject: [PATCH 151/299] dm-verity: recheck the hash after a failure commit 9177f3c0dea6143d05cac1bbd28668fd0e216d11 upstream. If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-verity reports an error [1]. This commit fixes dm-verity, so that if hash verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the hash is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-target.c | 86 ++++++++++++++++++++++++++++++++--- drivers/md/dm-verity.h | 6 +++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 82662f5769c4..224469e1efbc 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -941,6 +997,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1379,6 +1439,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f3f607008419..4620a98c9956 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include #include #include #include @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -84,6 +88,8 @@ struct dm_verity_io { struct work_struct work; + char *recheck_buffer; + /* * Three variably-size fields follow this struct: * From 8d584cc8e71e30d3bfd4e11a0664ad43bf0acf03 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2024 19:11:34 -0800 Subject: [PATCH 152/299] cxl/acpi: Fix load failures due to single window creation failure commit 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 upstream. The expectation is that cxl_parse_cfwms() continues in the face the of failure as evidenced by code like: cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) return 0; There are other error paths in that function which mistakenly follow idiomatic expectations and return an error when they should not. Most of those mistakes are innocuous checks that hardly ever fail in practice. However, a recent change succeed in making the implementation more fragile by applying an idiomatic, but still wrong "fix" [1]. In this failure case the kernel reports: cxl root0: Failed to populate active decoder targets cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200] ...which is a real issue with that one window (to be fixed separately), but ends up failing the entirety of cxl_acpi_probe(). Undo that recent breakage while also removing the confusion about ignoring errors. Update all exits paths to return an error per typical expectations and let an outer wrapper function handle dropping the error. Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1] Cc: Cc: Breno Leitao Cc: Alison Schofield Cc: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/acpi.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40d055560e52..431953455830 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -194,31 +194,27 @@ struct cxl_cfmws_context { int id; }; -static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, - const unsigned long end) +static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, + struct cxl_cfmws_context *ctx) { int target_map[CXL_DECODER_MAX_INTERLEAVE]; - struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; struct resource *cxl_res = ctx->cxl_res; struct cxl_cxims_context cxims_ctx; struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; - struct acpi_cedt_cfmws *cfmws; cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; struct resource *res; int rc; - cfmws = (struct acpi_cedt_cfmws *) header; - rc = cxl_acpi_cfmws_verify(dev, cfmws); if (rc) { dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); - return 0; + return rc; } rc = eiw_to_ways(cfmws->interleave_ways, &ways); @@ -254,7 +250,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) - return 0; + return PTR_ERR(cxlrd); cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); @@ -295,16 +291,7 @@ err_xormap: put_device(&cxld->dev); else rc = cxl_decoder_autoremove(dev, cxld); - if (rc) { - dev_err(dev, "Failed to add decode range: %pr", res); - return rc; - } - dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", - dev_name(&cxld->dev), - phys_to_target_node(cxld->hpa_range.start), - cxld->hpa_range.start, cxld->hpa_range.end); - - return 0; + return rc; err_insert: kfree(res->name); @@ -313,6 +300,29 @@ err_name: return -ENOMEM; } +static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header; + struct cxl_cfmws_context *ctx = arg; + struct device *dev = ctx->dev; + int rc; + + rc = __cxl_parse_cfmws(cfmws, ctx); + if (rc) + dev_err(dev, + "Failed to add decode range: [%#llx - %#llx] (%d)\n", + cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1, rc); + else + dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n", + phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1); + + /* never fail cxl_acpi load for a single window failure */ + return 0; +} + __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { From 2cc1a530ab31c65b52daf3cb5d0883c8b614ea69 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 16 Feb 2024 17:01:13 +0100 Subject: [PATCH 153/299] cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window commit 0cab687205986491302cd2e440ef1d253031c221 upstream. The Linux CXL subsystem is built on the assumption that HPA == SPA. That is, the host physical address (HPA) the HDM decoder registers are programmed with are system physical addresses (SPA). During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1, 8.1.3.8) are checked if the memory is enabled and the CXL range is in a HPA window that is described in a CFMWS structure of the CXL host bridge (cxl-3.1, 9.18.1.3). Now, if the HPA is not an SPA, the CXL range does not match a CFMWS window and the CXL memory range will be disabled then. The HDM decoder stops working which causes system memory being disabled and further a system hang during HDM decoder initialization, typically when a CXL enabled kernel boots. Prevent a system hang and do not disable the HDM decoder if the decoder's CXL range is not found in a CFMWS window. Note the change only fixes a hardware hang, but does not implement HPA/SPA translation. Support for this can be added in a follow on patch series. Signed-off-by: Robert Richter Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges") Cc: Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/cxl/core/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c7a7887ebdcf..c963cd9e88d1 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -475,9 +475,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* From 9eb04add2a26ce692a4003773beaa5ad8e486929 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 154/299] scsi: sd: usb_storage: uas: Access media prior to querying device properties commit 321da3dc1f3c92a12e3c5da934090d2992a8814c upstream. It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c2e8d9e27749..1335119a4993 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3404,6 +3404,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3443,7 +3461,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da53..12cf9940e5b6 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 2583ee9815c5..18d6d6e2b44b 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8fa1153f37cb..3e278fc88150 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -208,6 +208,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ From 4ebc079f0c7dcda1270843ab0f38ab4edb8f7921 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Feb 2024 23:43:56 +0900 Subject: [PATCH 155/299] scsi: target: pscsi: Fix bio_put() for error case commit de959094eb2197636f7c803af0943cb9d3b35804 upstream. As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit() and kfree(). That is not done properly for the error case, hitting WARN and NULL pointer dereference in bio_free(). Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_pscsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 0d4f09693ef4..da59c1ac2f2e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -907,12 +907,15 @@ new_bio: return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; From 3c884ee7c5d4715b09062a0b713b76cd8dd83f1c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Feb 2024 17:14:11 -0500 Subject: [PATCH 156/299] scsi: core: Consult supported VPD page list prior to fetching page commit b5fc07a5fb56216a49e6c1d0b172d5464d99a89b upstream. Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") removed the logic which checks whether a VPD page is present on the supported pages list before asking for the page itself. That was done because SPC helpfully states "The Supported VPD Pages VPD page list may or may not include all the VPD pages that are able to be returned by the device server". Testing had revealed a few devices that supported some of the 0xBn pages but didn't actually list them in page 0. Julian Sikorski bisected a problem with his drive resetting during discovery to the commit above. As it turns out, this particular drive firmware will crash if we attempt to fetch page 0xB9. Various approaches were attempted to work around this. In the end, reinstating the logic that consults VPD page 0 before fetching any other page was the path of least resistance. A firmware update for the devices which originally compelled us to remove the check has since been released. Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Cc: stable@vger.kernel.org Cc: Bart Van Assche Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi.c | 22 ++++++++++++++++++++-- include/scsi/scsi_device.h | 4 ---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 89367c4bf0ef..bd66612c0a50 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; + /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 3e278fc88150..9c8b6f611330 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; From 0b34dca1bfd5eb98e8a2e84aa5ed0d137450562a Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Mon, 5 Feb 2024 14:50:56 +0000 Subject: [PATCH 157/299] selftests/mm: uffd-unit-test check if huge page size is 0 commit 7efa6f2c803366f84c3c362f01e822490669d72b upstream. If HUGETLBFS is not enabled then the default_huge_page_size function will return 0 and cause a divide by 0 error. Add a check to see if the huge page size is 0 and skip the hugetlb tests if it is. Link: https://lkml.kernel.org/r/20240205145055.3545806-2-terry.tritton@linaro.org Fixes: 16a45b57cbf2 ("selftests/mm: add framework for uffd-unit-test") Signed-off-by: Terry Tritton Cc: Peter Griffin Cc: Shuah Khan Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/mm/uffd-unit-tests.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 2709a34a39c5..2c68709062da 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1309,6 +1309,12 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); + if ((mem_type->mem_flag == MEM_HUGETLB || + mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && + (default_huge_page_size() == 0)) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; From 305152314df82b22cf9b181f3dc5fc411002079a Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: [PATCH 158/299] mm/swap: fix race when skipping swapcache commit 13ddaf26be324a7f951891ecd9ccd04466d27458 upstream. When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swap.h | 5 +++++ mm/swapfile.c | 13 +++++++++++++ 4 files changed, 43 insertions(+) diff --git a/include/linux/swap.h b/include/linux/swap.h index 493487ed7c38..cb25db2a93dd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -552,6 +552,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/memory.c b/mm/memory.c index b3be18f1f120..78e05d3e9e4a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3726,6 +3726,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3794,6 +3795,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4040,6 +4055,9 @@ unlock: if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4054,6 +4072,8 @@ out_release: folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swap.h b/mm/swap.h index 8a3c7a0ace4f..693d1b281559 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -38,6 +38,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, @@ -96,6 +97,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index e52f486834eb..750314fff0c4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3362,6 +3362,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); From 3c4441b23bf7bed257eddf7920e39b297631fab1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:25 -0800 Subject: [PATCH 159/299] mm/damon/lru_sort: fix quota status loss due to online tunings commit 13d0599ab3b2ff17f798353f24bcbef1659d3cfc upstream. For online parameters change, DAMON_LRU_SORT creates new schemes based on latest values of the parameters and replaces the old schemes with the new one. When creating it, the internal status of the quotas of the old schemes is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: SeongJae Park Cc: [6.0+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 3071e08e8b8f..e84495ab92cf 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -183,9 +183,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -193,18 +205,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, From 8350888b02266f620205190c73894a88d9c188ee Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 13 Feb 2024 03:16:34 -0500 Subject: [PATCH 160/299] mm: memcontrol: clarify swapaccount=0 deprecation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 118642d7f606fc9b9c92ee611275420320290ffb upstream. The swapaccount deprecation warning is throwing false positives. Since we deprecated the knob and defaulted to enabling, the only reports we've been getting are from folks that set swapaccount=1. While this is a nice affirmation that always-enabling was the right choice, we certainly don't want to warn when users request the supported mode. Only warn when disabling is requested, and clarify the warning. [colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"] Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode") Signed-off-by: Colin Ian King Reported-by: "Jonas Schäfer" Reported-by: Narcis Garcia Suggested-by: Yosry Ahmed Signed-off-by: Johannes Weiner Reviewed-by: Yosry Ahmed Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6c..dd854cc65fd9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7613,9 +7613,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); From 9cad9a2e896c952e48d2d5a6acceb4fc401e679e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:24 -0800 Subject: [PATCH 161/299] mm/damon/reclaim: fix quota stauts loss due to online tunings commit 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 upstream. Patch series "mm/damon: fix quota status loss due to online tunings". DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status when applying new user parameters, and hence could cause temporal quota accuracy degradation. Fix it by preserving the status. This patch (of 2): For online parameters change, DAMON_RECLAIM creates new scheme based on latest values of the parameters and replaces the old scheme with the new one. When creating it, the internal status of the quota of the old scheme is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/damon/reclaim.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 648d2a85523a..eca9d000ecc5 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -148,9 +148,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; struct damos_filter *filter; int err = 0; @@ -162,6 +173,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } if (skip_anon) { filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true); if (!filter) { From e78a4e221ebf8711cc70d965b4083c51a90feb44 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:33:00 +0100 Subject: [PATCH 162/299] platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler commit 84c16d01ff219bc0a5dca5219db6b8b86a6854fb upstream. Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2: 1. The ThinkPad will wake up immediately from suspend 2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second Both these issues are caused by the "VBDL" ACPI method call added at the end of the notify_handler. And it never became entirely clear if this call is even necessary to fix the issue of missing tablet-mode-switch events on the Dell Inspiron 7352. Drop the "VBDL" ACPI method call again to fix the 2 issues this is causing on the ThinkPad X1 Tablet Gen2. Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") Reported-by: Alexander Kobel Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/ Cc: regressions@lists.linux.dev Cc: Arnold Gozum Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Tested-by: Alexander Kobel Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/vbtn.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 210b0a81b7ec..084c355c86f5 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* From 557cac23bee3d43a74b823be9df1c27dbe8345db Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2024 13:06:07 +0100 Subject: [PATCH 163/299] platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names commit dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 upstream. On some devices the ACPI name of the touchscreen is e.g. either MSSL1680:00 or MSSL1680:01 depending on the BIOS version. This happens for example on the "Chuwi Hi8 Air" tablet where the initial commit's ts_data uses "MSSL1680:00" but the tablets from the github issue and linux-hardware.org probe linked below both use "MSSL1680:01". Replace the strcmp() match on ts_data->acpi_name with a strstarts() check to allow using a partial match on just the ACPI HID of "MSSL1680" and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly to fix the touchscreen not working on models where it is "MSSL1680:01". Note this drops the length check for I2C_NAME_SIZE. This never was necessary since the ACPI names used are never more then 11 chars and I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long before reaching I2C_NAME_SIZE. Link: https://linux-hardware.org/?computer=AC4301C0542A Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet") Closes: https://github.com/onitake/gsl-firmware/issues/91 Cc: stable@vger.kernel.org Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/touchscreen_dmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 7aee5e9ff2b8..969477c83e56 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -81,7 +81,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -1821,7 +1821,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); From 38e921616320d159336b0ffadb09e9fb4945c7c3 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Sat, 17 Feb 2024 16:14:31 +0800 Subject: [PATCH 164/299] cachefiles: fix memory leak in cachefiles_add_cache() commit e21a2f17566cbd64926fb8f16323972f7a064444 upstream. The following memory leak was reported after unbinding /dev/cachefiles: ================================================================== unreferenced object 0xffff9b674176e3c0 (size 192): comm "cachefilesd2", pid 680, jiffies 4294881224 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc ea38a44b): [] kmem_cache_alloc+0x2d5/0x370 [] prepare_creds+0x26/0x2e0 [] cachefiles_determine_cache_security+0x1f/0x120 [] cachefiles_add_cache+0x13c/0x3a0 [] cachefiles_daemon_write+0x146/0x1c0 [] vfs_write+0xcb/0x520 [] ksys_write+0x69/0xf0 [] do_syscall_64+0x72/0x140 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ================================================================== Put the reference count of cache_cred in cachefiles_daemon_unbind() to fix the problem. And also put cache_cred in cachefiles_add_cache() error branch to avoid memory leaks. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") CC: stable@vger.kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com Acked-by: David Howells Reviewed-by: Jingbo Xu Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/cache.c | 2 ++ fs/cachefiles/daemon.c | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f47..f449f7340aad 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ error_unsupported: dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index aa4efcabb5e3..5f4df9588620 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -805,6 +805,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); From 8b004583dbc9da62ad2aebf63a9e20815b3b433c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 20 Feb 2024 10:54:12 +0100 Subject: [PATCH 165/299] sparc: Fix undefined reference to fb_is_primary_device commit ed683b9bb91fc274383e222ba5873a9ee9033462 upstream. Commit 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols") added a new FB_CORE Kconfig symbol, that can be enabled to only have fbcon/VT and DRM fbdev emulation, but without support for any legacy fbdev driver. Unfortunately, it missed to change the CONFIG_FB in arch/sparc makefiles, which leads to the following linking error in some sparc64 configurations: sparc64-linux-ld: drivers/video/fbdev/core/fbcon.o: in function `fbcon_fb_registered': >> fbcon.c:(.text+0x4f60): undefined reference to `fb_is_primary_device' Fixes: 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202401290306.IV8rhJ02-lkp@intel.com/ Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Acked-by: Arnd Bergmann Cc: # v6.6+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240220095428.3341195-1-javierm@redhat.com Signed-off-by: Greg Kroah-Hartman --- arch/sparc/Makefile | 2 +- arch/sparc/video/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 7417345c6639..60da865c079a 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/ libs-y += arch/sparc/lib/ drivers-$(CONFIG_PM) += arch/sparc/power/ -drivers-$(CONFIG_FB) += arch/sparc/video/ +drivers-$(CONFIG_FB_CORE) += arch/sparc/video/ boot := arch/sparc/boot diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile index 6baddbd58e4d..d4d83f1702c6 100644 --- a/arch/sparc/video/Makefile +++ b/arch/sparc/video/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_FB) += fbdev.o +obj-$(CONFIG_FB_CORE) += fbdev.o From 02dad157ba11064d073f5499dc33552b227d5d3a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:51 +0800 Subject: [PATCH 166/299] md: Fix missing release of 'active_io' for flush commit 855678ed8534518e2b428bcbcec695de9ba248e8 upstream. submit_flushes atomic_set(&mddev->flush_pending, 1); rdev_for_each_rcu(rdev, mddev) atomic_inc(&mddev->flush_pending); bi->bi_end_io = md_end_flush submit_bio(bi); /* flush io is done first */ md_end_flush if (atomic_dec_and_test(&mddev->flush_pending)) percpu_ref_put(&mddev->active_io) -> active_io is not released if (atomic_dec_and_test(&mddev->flush_pending)) -> missing release of active_io For consequence, mddev_suspend() will wait for 'active_io' to be zero forever. Fix this problem by releasing 'active_io' in submit_flushes() if 'flush_pending' is decreased to zero. Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Blazej Kucman Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 108590041db6..ce0516bdf8fa 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -530,8 +530,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) From fcf90b4703bd1d1dd69608aa16b85ae64fea34fb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:32 +0000 Subject: [PATCH 167/299] KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler commit 85a71ee9a0700f6c18862ef3b0011ed9dad99aca upstream. It is possible that an LPI mapped in a different ITS gets unmapped while handling the MOVALL command. If that is the case, there is no state that can be migrated to the destination. Silently ignore it and continue migrating other LPIs. Cc: stable@vger.kernel.org Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index c420723548d8..fe458e556d20 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1427,6 +1427,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); From 3f70ed98f77619dd2ad5c470c6d4083c0b6885ab Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:31 +0000 Subject: [PATCH 168/299] KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() commit 8d3a7dfb801d157ac423261d7cd62c33e95375f8 upstream. vgic_get_irq() may not return a valid descriptor if there is no ITS that holds a valid translation for the specified INTID. If that is the case, it is safe to silently ignore it and continue processing the LPI pending table. Cc: stable@vger.kernel.org Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kvm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index fe458e556d20..4f9084ba7949 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -462,6 +462,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); From cca20208515e23667a19d7be52207c782fd10922 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Tue, 20 Feb 2024 14:16:24 +0100 Subject: [PATCH 169/299] accel/ivpu: Don't enable any tiles by default on VPU40xx commit eb0d253ff9c74dee30aa92fe460b825eb28acd73 upstream. There is no point in requesting 1 tile on VPU40xx as the FW will probably need more tiles to run workloads, so it will have to reconfigure PLL anyway. Don't enable any tiles and allow the FW to perform initial tile configuration. This improves NPU boot stability as the tiles are always enabled only by the FW from the same initial state. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Cc: stable@vger.kernel.org Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240220131624.1447813-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index ae6be0d5aacc..cced6278c4f8 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -24,7 +24,7 @@ #define SKU_HW_ID_SHIFT 16u #define SKU_HW_ID_MASK 0xffff0000u -#define PLL_CONFIG_DEFAULT 0x1 +#define PLL_CONFIG_DEFAULT 0x0 #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_REF_CLK_FREQ (50 * 1000000) From ba6b8b02a3314e62571a540efa96560888c5f03e Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Wed, 14 Feb 2024 19:27:33 +0300 Subject: [PATCH 170/299] gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() commit 136cfaca22567a03bbb3bf53a43d8cb5748b80ec upstream. The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 RSP: 0018:ffff888014107220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? show_regs+0x90/0xa0 ? die_addr+0x50/0xd0 ? exc_general_protection+0x148/0x220 ? asm_exc_general_protection+0x22/0x30 ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] ? __alloc_skb+0x1dd/0x350 ? __pfx___alloc_skb+0x10/0x10 genl_dumpit+0x11d/0x230 netlink_dump+0x5b9/0xce0 ? lockdep_hardirqs_on_prepare+0x253/0x430 ? __pfx_netlink_dump+0x10/0x10 ? kasan_save_track+0x10/0x40 ? __kasan_kmalloc+0x9b/0xa0 ? genl_start+0x675/0x970 __netlink_dump_start+0x6fc/0x9f0 genl_family_rcv_msg_dumpit+0x1bb/0x2d0 ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 ? genl_op_from_small+0x2a/0x440 ? cap_capable+0x1d0/0x240 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 ? security_capable+0x9d/0xe0 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b1919278e931..2129ae42c703 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1907,20 +1907,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: From b0365460e945e1117b47cf7329d86de752daff63 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 30 Jan 2024 19:27:40 +0800 Subject: [PATCH 171/299] crypto: virtio/akcipher - Fix stack overflow on memcpy commit c0ec2a712daf133d9996a8a1b7ee2d4996080363 upstream. sizeof(struct virtio_crypto_akcipher_session_para) is less than sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from stack variable leads stack overflow. Clang reports this issue by commands: make -j CC=clang-14 mrproper >/dev/null 2>&1 make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ virtio_crypto_akcipher_algs.o Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ Cc: Signed-off-by: zhenwei pi Tested-by: Nathan Chancellor # build Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 2621ff8a9376..de53eddf6796 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); From a9ab338683a243f93ff467ead1c02cf0c57600f3 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 20 Feb 2024 19:14:29 +0800 Subject: [PATCH 172/299] irqchip/mbigen: Don't use bus_get_dev_root() to find the parent commit fb33a46cd75e18773dd5a414744507d84ae90870 upstream. bus_get_dev_root() returns sp->dev_root which is set in subsys_register(), but subsys_register() is not called by platform_bus_init(). Therefor for the platform_bus_type, bus_get_dev_root() always returns NULL. This makes mbigen_of_create_domain() always return -ENODEV. Don't try to retrieve the parent via bus_get_dev_root() and unconditionally hand a NULL pointer to of_platform_device_create() to fix this. Fixes: fea087fc291b ("irqchip/mbigen: move to use bus_get_dev_root()") Signed-off-by: Chen Jun Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240220111429.110666-1-chenjun102@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mbigen.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 5101a3fb11df..58881d313979 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -235,22 +235,17 @@ static const struct irq_domain_ops mbigen_domain_ops = { static int mbigen_of_create_domain(struct platform_device *pdev, struct mbigen_device *mgn_chip) { - struct device *parent; struct platform_device *child; struct irq_domain *domain; struct device_node *np; u32 num_pins; int ret = 0; - parent = bus_get_dev_root(&platform_bus_type); - if (!parent) - return -ENODEV; - for_each_child_of_node(pdev->dev.of_node, np) { if (!of_property_read_bool(np, "interrupt-controller")) continue; - child = of_platform_device_create(np, NULL, parent); + child = of_platform_device_create(np, NULL, NULL); if (!child) { ret = -ENOMEM; break; @@ -273,7 +268,6 @@ static int mbigen_of_create_domain(struct platform_device *pdev, } } - put_device(parent); if (ret) of_node_put(np); From 4332f54153de06d75a8d122ead32bc0b43049be8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 19 Feb 2024 18:58:06 +0000 Subject: [PATCH 173/299] irqchip/gic-v3-its: Do not assume vPE tables are preallocated commit ec4308ecfc887128a468f03fb66b767559c57c23 upstream. The GIC/ITS code is designed to ensure to pick up any preallocated LPI tables on the redistributors, as enabling LPIs is a one-way switch. There is no such restriction for vLPIs, and for GICv4.1 it is expected to allocate a new vPE table at boot. This works as intended when initializing an ITS, however when setting up a redistributor in cpu_init_lpis() the early return for preallocated RD tables skips straight past the GICv4 setup. This all comes to a head when trying to kexec() into a new kernel, as the new kernel silently fails to set up GICv4, leading to a complete loss of SGIs and LPIs for KVM VMs. Slap a band-aid on the problem by ensuring its_cpu_init_lpis() always initializes GICv4 on the way out, even if the other RD tables were preallocated. Fixes: 6479450f72c1 ("irqchip/gic-v4: Fix occasional VLPI drop") Reported-by: George Cherian Co-developed-by: Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Oliver Upton Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240219185809.286724-2-oliver.upton@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 3632c92cd183..676c9250d3f2 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3181,6 +3181,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3216,7 +3217,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), From fbe1049a4d484a80ee2b370bc0a8814cb4e24686 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 31 Jan 2024 09:19:33 +0100 Subject: [PATCH 174/299] irqchip/sifive-plic: Enable interrupt if needed before EOI commit 9c92006b896c767218aabe8947b62026a571cfd0 upstream. RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as explained in the description of Interrupt Completion in the PLIC spec: "The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that *is currently enabled* for the target, the completion is silently ignored." Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked") ensured that EOI is successful by enabling interrupt first, before EOI. Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") removed the interrupt enabling code from the previous commit, because it assumes that interrupt should already be enabled at the point of EOI. However, this is incorrect: there is a window after a hart claiming an interrupt and before irq_desc->lock getting acquired, interrupt can be disabled during this window. Thus, EOI can be invoked while the interrupt is disabled, effectively nullify this EOI. This results in the interrupt never gets asserted again, and the device who uses this interrupt appears frozen. Make sure that interrupt is really enabled before EOI. Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Samuel Holland Cc: Marc Zyngier Cc: Guo Ren Cc: linux-riscv@lists.infradead.org Cc: Link: https://lore.kernel.org/r/20240131081933.144512-1-namcao@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-sifive-plic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 5b7bc4fd9517..bf0b40b0fad4 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -148,7 +148,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP From 5ef293c3e0b29b2065d4f144dbecf631e44ee3e5 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 15 Jan 2024 19:26:49 +0530 Subject: [PATCH 175/299] PCI/MSI: Prevent MSI hardware interrupt number truncation commit db744ddd59be798c2627efbfc71f707f5a935a40 upstream. While calculating the hardware interrupt number for a MSI interrupt, the higher bits (i.e. from bit-5 onwards a.k.a domain_nr >= 32) of the PCI domain number gets truncated because of the shifted value casting to return type of pci_domain_nr() which is 'int'. This for example is resulting in same hardware interrupt number for devices 0019:00:00.0 and 0039:00:00.0. To address this cast the PCI domain number to 'irq_hw_number_t' before left shifting it to calculate the hardware interrupt number. Please note that this fixes the issue only on 64-bit systems and doesn't change the behavior for 32-bit systems i.e. the 32-bit systems continue to have the issue. Since the issue surfaces only if there are too many PCIe controllers in the system which usually is the case in modern server systems and they don't tend to run 32-bit kernels. Fixes: 3878eaefb89a ("PCI/MSI: Enhance core to support hierarchy irqdomain") Signed-off-by: Vidya Sagar Signed-off-by: Thomas Gleixner Tested-by: Shanker Donthineni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115135649.708536-1-vidyas@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index c8be056c248d..cfd84a899c82 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -61,7 +61,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, From 804bd8650a3a2bf3432375f8c97d5049d845ce56 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 20 Feb 2024 12:21:56 +0000 Subject: [PATCH 176/299] l2tp: pass correct message length to ip6_append_data commit 359e54a93ab43d32ee1bff3c2f9f10cb9f6b6e79 upstream. l2tp_ip6_sendmsg needs to avoid accounting for the transport header twice when splicing more data into an already partially-occupied skbuff. To manage this, we check whether the skbuff contains data using skb_queue_empty when deciding how much data to append using ip6_append_data. However, the code which performed the calculation was incorrect: ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; ...due to C operator precedence, this ends up setting ulen to transhdrlen for messages with a non-zero length, which results in corrupted packets on the wire. Add parentheses to correct the calculation in line with the original intent. Fixes: 9d4c75800f61 ("ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()") Cc: David Howells Cc: stable@vger.kernel.org Signed-off-by: Tom Parkin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220122156.43131-1-tparkin@katalix.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 11f3d375cec0..db4971d52802 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, From 97ba7c1f9c0a2401e644760d857b2386aa895997 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 5 Feb 2024 11:23:34 +0100 Subject: [PATCH 177/299] ARM: ep93xx: Add terminator to gpiod_lookup_table commit fdf87a0dc26d0550c60edc911cda42f9afec3557 upstream. Without the terminator, if a con_id is passed to gpio_find() that does not exist in the lookup table the function will not stop looping correctly, and eventually cause an oops. Cc: stable@vger.kernel.org Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") Reported-by: Andy Shevchenko Signed-off-by: Nikita Shubin Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240205102337.439002-1-alexander.sverdlin@gmail.com Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-ep93xx/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 71b113976420..8b1ec60a9a46 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; From 763f1f13d856c6c3bc1adaec0758348633bc99cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 14:48:03 +0100 Subject: [PATCH 178/299] dm-integrity, dm-verity: reduce stack usage for recheck commit 66ad2fbcdbeab0edfd40c5d94f32f053b98c2320 upstream. The newly added integrity_recheck() function has another larger stack allocation, just like its caller integrity_metadata(). When it gets inlined, the combination of the two exceeds the warning limit for 32-bit architectures and possibly risks an overflow when this is called from a deep call chain through a file system: drivers/md/dm-integrity.c:1767:13: error: stack frame size (1048) exceeds limit (1024) in 'integrity_metadata' [-Werror,-Wframe-larger-than] 1767 | static void integrity_metadata(struct work_struct *w) Since the caller at this point is done using its checksum buffer, just reuse the same buffer in the new function to avoid the double allocation. [Mikulas: add "noinline" to integrity_recheck and verity_recheck. These functions are only called on error, so they shouldn't bloat the stack frame or code size of the caller.] Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Fixes: 9177f3c0dea6 ("dm-verity: recheck the hash after a failure") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 10 ++++------ drivers/md/dm-verity-target.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 1ef6ca680f8f..68923c36b6d4 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1701,14 +1701,13 @@ failed: get_random_bytes(result, ic->tag_size); } -static void integrity_recheck(struct dm_integrity_io *dio) +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) { struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct dm_integrity_c *ic = dio->ic; struct bvec_iter iter; struct bio_vec bv; sector_t sector, logical_sector, area, offset; - char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; struct page *page; void *buffer; @@ -1744,9 +1743,8 @@ static void integrity_recheck(struct dm_integrity_io *dio) goto free_ret; } - integrity_sector_checksum(ic, logical_sector, buffer, - checksum_onstack); - r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, &dio->metadata_offset, ic->tag_size, TAG_CMP); if (r) { if (r > 0) { @@ -1861,7 +1859,7 @@ again: checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - integrity_recheck(dio); + integrity_recheck(dio, checksums); goto skip_io; } if (likely(checksums != checksums_onstack)) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 224469e1efbc..7b620b187da9 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -491,8 +491,8 @@ static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter start, sector_t cur_block) +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) { struct page *page; void *buffer; From ba84bbbcd5b890ffcb8c5c544df76f373b36a53e Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Wed, 21 Feb 2024 13:03:47 -0800 Subject: [PATCH 179/299] erofs: fix refcount on the metabuf used for inode lookup commit 56ee7db31187dc36d501622cb5f1415e88e01c2a upstream. In erofs_find_target_block() when erofs_dirnamecmp() returns 0, we do not assign the target metabuf. This causes the caller erofs_namei()'s erofs_put_metabuf() at the end to be not effective leaving the refcount on the page. As the page from metabuf (buf->page) is never put, such page cannot be migrated or reclaimed. Fix it now by putting the metabuf from previous loop and assigning the current metabuf to target before returning so caller erofs_namei() can do the final put as it was intended. Fixes: 500edd095648 ("erofs: use meta buffers for inode lookup") Cc: # 5.18+ Signed-off-by: Sandeep Dhavale Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240221210348.3667795-1-dhavale@google.com Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- fs/erofs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index d4f631d39f0f..f0110a78acb2 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -130,24 +130,24 @@ static void *erofs_find_target_block(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; + } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ From 6cf046b3acea853a719c1e8a106144c89323b166 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 17 Feb 2024 17:20:21 +0100 Subject: [PATCH 180/299] Revert "usb: typec: tcpm: reset counter when enter into unattached state after try role" commit 23b1d2d99b0f55326f05e7d757fa197c4a95dc5c upstream. The reverted commit makes the state machine only ever go from SRC_ATTACH_WAIT to SNK_TRY in endless loop when toggling. After revert it goes to SRC_ATTACHED after initially trying SNK_TRY earlier, as it should for toggling to ever detect the power source mode and the port is again able to provide power to attached power sinks. This reverts commit 2d6d80127006ae3da26b1f21a65eccf957f2d1e5. Cc: stable@vger.kernel.org Fixes: 2d6d80127006 ("usb: typec: tcpm: reset counter when enter into unattached state after try role") Signed-off-by: Ondrej Jirman Link: https://lore.kernel.org/r/20240217162023.1719738-1-megi@xff.cz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6d455ca76125..53c60d93bbeb 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3730,9 +3730,6 @@ static void tcpm_detach(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; - port->try_src_count = 0; - port->try_snk_count = 0; - if (!port->attached) return; From 3e3578ca1b877a49a7521e600d51658d6d794267 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:07 +0100 Subject: [PATCH 181/299] serial: stm32: do not always set SER_RS485_RX_DURING_TX if RS485 is enabled commit f418ae73311deb901c0110b08d1bbafc20c1820e upstream. Before commit 07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO") the SER_RS485_RX_DURING_TX flag was only set if the rx-during-tx mode was not controlled by a GPIO. Now the flag is set unconditionally when RS485 is enabled. This results in an incorrect setting if the rx-during-tx GPIO is not asserted. Fix this by setting the flag only if the rx-during-tx mode is not controlled by a GPIO and thus restore the correct behaviour. Cc: stable@vger.kernel.org # 6.6+ Fixes: 07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-1-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index b6f4f436a565..e5f933beb6c0 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter writel_relaxed(cr3, port->membase + ofs->cr3); writel_relaxed(cr1, port->membase + ofs->cr1); - rs485conf->flags |= SER_RS485_RX_DURING_TX; + if (!port->rs485_rx_during_tx_gpio) + rs485conf->flags |= SER_RS485_RX_DURING_TX; + } else { stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DEM | USART_CR3_DEP); From 9319ecb8380ffee62a918b08f6146c0e43766463 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:08 +0100 Subject: [PATCH 182/299] serial: amba-pl011: Fix DMA transmission in RS485 mode commit 3b69e32e151bc4a4e3c785cbdb1f918d5ee337ed upstream. When DMA is used in RS485 mode make sure that the UARTs tx section is enabled before the DMA buffers are queued for transmission. Cc: stable@vger.kernel.org Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-2-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index cd3913b933c7..362bbcdece0d 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1345,11 +1345,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1431,42 +1461,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; From d3999e342099c08716514f4284e4e74ce0b374e1 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 16 Feb 2024 00:41:02 +0000 Subject: [PATCH 183/299] usb: dwc3: gadget: Don't disconnect if not started commit b191a18cb5c47109ca696370a74a5062a70adfd0 upstream. Don't go through soft-disconnection sequence if the controller hasn't started. Otherwise, there will be timeout and warning reports from the soft-disconnection flow. Cc: stable@vger.kernel.org Fixes: 61a348857e86 ("usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-usb/20240215233536.7yejlj3zzkl23vjd@synopsys.com/T/#mb0661cd5f9272602af390c18392b9a36da4f96e6 Tested-by: Marek Szyprowski Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e3be9b929934e0680a6f4b8f6eb11b18ae9c7e07.1708043922.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4c8dd6724678..28f49400f3e8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2650,6 +2650,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* From a92de02692b4461db2944f08a5005cbb8ab19de1 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 6 Feb 2024 11:40:18 +0100 Subject: [PATCH 184/299] usb: cdnsp: blocked some cdns3 specific code commit 18a6be674306c9acb05c08e5c3fd376ef50a917c upstream. host.c file has some parts of code that were introduced for CDNS3 driver and should not be used with CDNSP driver. This patch blocks using these parts of codes by CDNSP driver. These elements include: - xhci_plat_cdns3_xhci object - cdns3 specific XECP_PORT_CAP_REG register - cdns3 specific XECP_AUX_CTRL_REG1 register cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240206104018.48272-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a4..ceca4d839dfd 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; From 11f656fc0a56edaf1127bcd0442f3b7854892c1d Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 15 Feb 2024 13:16:09 +0100 Subject: [PATCH 185/299] usb: cdnsp: fixed issue with incorrect detecting CDNSP family controllers commit 47625b018c6bc788bc10dd654c82696eb0a5ef11 upstream. Cadence have several controllers from 0x000403xx family but current driver suuport detecting only one with DID equal 0x0004034E. It causes that if someone uses different CDNSP controller then driver will use incorrect version and register space. Patch fix this issue. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240215121609.259772-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 1 - drivers/usb/cdns3/drd.c | 13 +++++++++---- drivers/usb/cdns3/drd.h | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 33548771a0d3..465e9267b49c 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -395,7 +395,6 @@ pm_put: return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 04b6d12f2b9a..ee917f1b091c 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -422,15 +423,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -483,7 +489,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed9..d72370c321d3 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. From 29e42e1578a10c611b3f1a38f3229b2d664b5d16 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:16 -0500 Subject: [PATCH 186/299] usb: cdns3: fixed memory use after free at cdns3_gadget_ep_disable() commit cd45f99034b0c8c9cb346dd0d6407a95ca3d36f6 upstream. ... cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); list_del_init(&priv_req->list); ... 'priv_req' actually free at cdns3_gadget_ep_free_request(). But list_del_init() use priv_req->list after it. [ 1542.642868][ T534] BUG: KFENCE: use-after-free read in __list_del_entry_valid+0x10/0xd4 [ 1542.642868][ T534] [ 1542.653162][ T534] Use-after-free read at 0x000000009ed0ba99 (in kfence-#3): [ 1542.660311][ T534] __list_del_entry_valid+0x10/0xd4 [ 1542.665375][ T534] cdns3_gadget_ep_disable+0x1f8/0x388 [cdns3] [ 1542.671571][ T534] usb_ep_disable+0x44/0xe4 [ 1542.675948][ T534] ffs_func_eps_disable+0x64/0xc8 [ 1542.680839][ T534] ffs_func_set_alt+0x74/0x368 [ 1542.685478][ T534] ffs_func_disable+0x18/0x28 Move list_del_init() before cdns3_gadget_ep_free_request() to resolve this problem. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index d14001025700..2ae912921450 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2539,11 +2539,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } From 70e8038813f9d3e72df966748ebbc40efe466019 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:17 -0500 Subject: [PATCH 187/299] usb: cdns3: fix memory double free when handle zero packet commit 5fd9e45f1ebcd57181358af28506e8a661a260b3 upstream. 829 if (request->complete) { 830 spin_unlock(&priv_dev->lock); 831 usb_gadget_giveback_request(&priv_ep->endpoint, 832 request); 833 spin_lock(&priv_dev->lock); 834 } 835 836 if (request->buf == priv_dev->zlp_buf) 837 cdns3_gadget_ep_free_request(&priv_ep->endpoint, request); Driver append an additional zero packet request when queue a packet, which length mod max packet size is 0. When transfer complete, run to line 831, usb_gadget_giveback_request() will free this requestion. 836 condition is true, so cdns3_gadget_ep_free_request() free this request again. Log: [ 1920.140696][ T150] BUG: KFENCE: use-after-free read in cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.140696][ T150] [ 1920.151837][ T150] Use-after-free read at 0x000000003d1cd10b (in kfence-#36): [ 1920.159082][ T150] cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.164988][ T150] cdns3_transfer_completed+0x438/0x5f8 [cdns3] Add check at line 829, skip call usb_gadget_giveback_request() if it is additional zero length packet request. Needn't call usb_gadget_giveback_request() because it is allocated in this driver. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-2-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 2ae912921450..b1b46c7c63f8 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -828,7 +828,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); From 2b7ec68869d50ea998908af43b643bca7e54577e Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 5 Feb 2024 13:16:50 +0530 Subject: [PATCH 188/299] usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 76c51146820c5dac629f21deafab0a7039bc3ccd upstream. It is observed sometimes when tethering is used over NCM with Windows 11 as host, at some instances, the gadget_giveback has one byte appended at the end of a proper NTB. When the NTB is parsed, unwrap call looks for any leftover bytes in SKB provided by u_ether and if there are any pending bytes, it treats them as a separate NTB and parses it. But in case the second NTB (as per unwrap call) is faulty/corrupt, all the datagrams that were parsed properly in the first NTB and saved in rx_list are dropped. Adding a few custom traces showed the following: [002] d..1 7828.532866: dwc3_gadget_giveback: ep1out: req 000000003868811a length 1025/16384 zsI ==> 0 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb toprocess: 1025 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb seq: 0xce67 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x400 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb ndp_len: 0x10 [002] d..1 7828.532869: ncm_unwrap_ntb: K: Parsed NTB with 1 frames In this case, the giveback is of 1025 bytes and block length is 1024. The rest 1 byte (which is 0x00) won't be parsed resulting in drop of all datagrams in rx_list. Same is case with packets of size 2048: [002] d..1 7828.557948: dwc3_gadget_giveback: ep1out: req 0000000011dfd96e length 2049/16384 zsI ==> 0 [002] d..1 7828.557949: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.557950: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x800 Lecroy shows one byte coming in extra confirming that the byte is coming in from PC: Transfer 2959 - Bytes Transferred(1025) Timestamp((18.524 843 590) - Transaction 8391 - Data(1025 bytes) Timestamp(18.524 843 590) --- Packet 4063861 Data(1024 bytes) Duration(2.117us) Idle(14.700ns) Timestamp(18.524 843 590) --- Packet 4063863 Data(1 byte) Duration(66.160ns) Time(282.000ns) Timestamp(18.524 845 722) According to Windows driver, no ZLP is needed if wBlockLength is non-zero, because the non-zero wBlockLength has already told the function side the size of transfer to be expected. However, there are in-market NCM devices that rely on ZLP as long as the wBlockLength is multiple of wMaxPacketSize. To deal with such devices, it pads an extra 0 at end so the transfer is no longer multiple of wMaxPacketSize. Cc: Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240205074650.200304-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index cc0ed29a4adc..5712883a7527 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1325,7 +1325,15 @@ parse_ntb: "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } From da7fc10bc4714902c26d07261391f57cfa7c406d Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 17 Feb 2024 21:20:42 +0200 Subject: [PATCH 189/299] usb: gadget: omap_udc: fix USB gadget regression on Palm TE commit 858a74cb512833e276d96a72acb560ce8c138bec upstream. When upgrading from 6.1 LTS to 6.6 LTS, I noticed the ethernet gadget stopped working on Palm TE. Commit 8825acd7cc8a ("ARM: omap1: remove dead code") deleted Palm TE from machine_without_vbus_sense(), although the board is still used. Fix that. Fixes: 8825acd7cc8a ("ARM: omap1: remove dead code") Cc: stable Signed-off-by: Aaro Koskinen Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240217192042.GA372205@darkstar.musicnaut.iki.fi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/omap_udc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 10c5d7f726a1..f90eeecf27de 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2036,7 +2036,8 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev) static inline int machine_without_vbus_sense(void) { - return machine_is_omap_osk() || machine_is_sx1(); + return machine_is_omap_osk() || machine_is_omap_palmte() || + machine_is_sx1(); } static int omap_udc_start(struct usb_gadget *g, From 4b45829440b1b208948b39cc71f77a37a2536734 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:38 +0800 Subject: [PATCH 190/299] usb: roles: fix NULL pointer issue when put module's reference commit 1c9be13846c0b2abc2480602f8ef421360e1ad9e upstream. In current design, usb role class driver will get usb_role_switch parent's module reference after the user get usb_role_switch device and put the reference after the user put the usb_role_switch device. However, the parent device of usb_role_switch may be removed before the user put the usb_role_switch. If so, then, NULL pointer issue will be met when the user put the parent module's reference. This will save the module pointer in structure of usb_role_switch. Then, we don't need to find module by iterating long relations. Fixes: 5c54fcac9a9d ("usb: roles: Take care of driver module reference counting") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index ae41578bd014..2bad038fb9ad 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,6 +21,7 @@ static const struct class role_class = { struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -135,7 +136,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -157,7 +158,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -189,15 +190,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(&role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -338,6 +342,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = &role_class; From 6aba8cf676c0073ac6d1be3bd70886b590af5637 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:39 +0800 Subject: [PATCH 191/299] usb: roles: don't get/set_role() when usb_role_switch is unregistered commit b787a3e781759026a6212736ef8e52cf83d1821a upstream. There is a possibility that usb_role_switch device is unregistered before the user put usb_role_switch. In this case, the user may still want to get/set_role() since the user can't sense the changes of usb_role_switch. This will add a flag to show if usb_role_switch is already registered and avoid unwanted behaviors. Fixes: fde0aa6c175a ("usb: common: Small class for USB role switches") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 2bad038fb9ad..70165dd86b5d 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -23,6 +23,7 @@ struct usb_role_switch { struct mutex lock; /* device lock*/ struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -49,6 +50,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -74,7 +78,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -357,6 +361,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -371,8 +377,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); From 176421d7afba21823d63402c856a72fd14836493 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:28 +0100 Subject: [PATCH 192/299] mptcp: add needs_id for userspace appending addr commit 6c347be62ae963b301ead8e7fa7b9973e6e0d6e1 upstream. When userspace PM requires to create an ID 0 subflow in "userspace pm create id 0 subflow" test like this: userspace_pm_add_sf $ns2 10.0.3.2 0 An ID 1 subflow, in fact, is created. Since in mptcp_pm_nl_append_new_local_addr(), 'id 0' will be treated as no ID is set by userspace, and will allocate a new ID immediately: if (!e->addr.id) e->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); To solve this issue, a new parameter needs_id is added for mptcp_userspace_pm_append_new_local_addr() to distinguish between whether userspace PM has set an ID 0 or whether userspace PM has not set any address. needs_id is true in mptcp_userspace_pm_get_local_id(), but false in mptcp_pm_nl_announce_doit() and mptcp_pm_nl_subflow_create_doit(). Fixes: e5ed101a6028 ("mptcp: userspace pm allow creating id 0 subflow") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index c1717322c892..3b34b7cf56c9 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -153,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) @@ -195,7 +196,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -333,7 +334,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; From e074c8297ee421e7ff352404262fe0e042954ab7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: [PATCH 193/299] mptcp: fix lockless access in subflow ULP diag commit b8adb69a7d29c2d33eb327bca66476fb6066516b upstream. Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +++++- net/tls/tls_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9cbcfb3c95da..a3840a2749c1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2343,7 +2343,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f2..e57c5f47f035 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 002483e60c19..e97fcb502115 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1001,7 +1001,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; From ba2cf922502c557cd5f2f15d3004f1a168ef3764 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:31 +0100 Subject: [PATCH 194/299] mptcp: fix data races on local_id commit a7cfe776637004a4c938fde78be4bd608c32c3ef upstream. The local address id is accessed lockless by the NL PM, add all the required ONCE annotation. There is a caveat: the local id can be initialized late in the subflow life-cycle, and its validity is controlled by the local_id_valid flag. Remove such flag and encode the validity in the local_id field itself with negative value before initialization. That allows accessing the field consistently with a single read operation. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/diag.c | 2 +- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 15 ++++++++++++--- net/mptcp/subflow.c | 9 +++++---- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f035..6ff6f14674aa 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -65,7 +65,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 44c0e96210a4..90573ef90b92 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -800,7 +800,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) continue; @@ -809,7 +809,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, + i, rm_id, id, subflow->remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -2005,7 +2005,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d36927411310..d4ee0a6bdc86 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -99,7 +99,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); iput(SOCK_INODE(ssock)); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c9516882cdd4..55536ac835d9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -493,10 +493,9 @@ struct mptcp_subflow_context { remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 9; + __unused : 10; enum mptcp_data_avail data_avail; bool scheduled; u32 remote_nonce; @@ -507,7 +506,7 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -558,6 +557,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -1008,6 +1008,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 43d6ee432814..7968dde15793 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -577,8 +577,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -587,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1723,6 +1723,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1958,7 +1959,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; From 2dba5774e8ed326a78ad4339d921a4291281ea6e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:32 +0100 Subject: [PATCH 195/299] mptcp: fix data races on remote_id commit 967d3c27127e71a10ff5c083583a038606431b61 upstream. Similar to the previous patch, address the data race on remote_id, adding the suitable ONCE annotations. Fixes: bedee0b56113 ("mptcp: address lookup improvements") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/subflow.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 90573ef90b92..394a4174be7a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -443,7 +443,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; @@ -799,18 +799,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 7968dde15793..ab41700bee68 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -535,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -1561,7 +1561,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; @@ -1966,7 +1966,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; From 1ea7b252b47ff25acc12df02f2a37411d7a0b7d7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:33 +0100 Subject: [PATCH 196/299] mptcp: fix duplicate subflow creation commit 045e9d812868a2d80b7a57b224ce8009444b7bbc upstream. Fullmesh endpoints could end-up unexpectedly generating duplicate subflows - same local and remote addresses - when multiple incoming ADD_ADDR are processed before the PM creates the subflow for the local endpoints. Address the issue explicitly checking for duplicates at subflow creation time. To avoid a quadratic computational complexity, track the unavailable remote address ids in a temporary bitmap and initialize such bitmap with the remote ids of all the existing subflows matching the local address currently processed. The above allows additionally replacing the existing code checking for duplicate entry in the current set with a simple bit test operation. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 394a4174be7a..e504a1649da0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ @@ -440,6 +427,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); @@ -447,11 +444,17 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, if (deny_id0 && !addrs[i].id) continue; + if (test_bit(addrs[i].id, unavail_id)) + continue; + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } From 5b9bc8e6275a4327741a431fbdec401890ef21f9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:38 +0100 Subject: [PATCH 197/299] selftests: mptcp: userspace_pm: unique subtest names commit 2ef0d804c090658960c008446523863fd7e3541e upstream. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated names. Some subtests from the userspace_pm selftest had the same names. That's because different subflows are created (and deleted) between the same pair of IP addresses. Simply adding the destination port in the name is then enough to have different names, because the destination port is always different. Note that adding such info takes a bit more space, so we need to increase a bit the width to print the name, simply to keep all the '[ OK ]' aligned as before. Fixes: f589234e1af0 ("selftests: mptcp: userspace_pm: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index c44bf5c7c6e0..0e573c6c393a 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -75,7 +75,7 @@ print_test() { test_name="${1}" - _printf "%-63s" "${test_name}" + _printf "%-68s" "${test_name}" } print_results() @@ -555,7 +555,7 @@ verify_subflow_events() local remid local info - info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then From db887e24f95fbdee4c8e77a5899ec290d4e33593 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:37 +0100 Subject: [PATCH 198/299] selftests: mptcp: simult flows: fix some subtest names commit 4d8e0dde0403b5a86aa83e243f020711a9c3e31f upstream. The selftest was correctly recording all the results, but the 'reverse direction' part was missing in the name when needed. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ce9203b817f8..9096bf579488 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -267,7 +267,8 @@ run_test() [ $bail -eq 0 ] || exit $ret fi - printf "%-60s" "$msg - reverse direction" + msg+=" - reverse direction" + printf "%-60s" "${msg}" do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" From 4f1aa3853b953b888f6b98b0a0135d014a073c26 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:34 +0100 Subject: [PATCH 199/299] selftests: mptcp: pm nl: also list skipped tests commit d2a2547565a9f1ad7989f7e21f97cbf065a9390d upstream. If the feature is not supported by older kernels, and instead of just ignoring some tests, we should mark them as skipped, so we can still track them. Fixes: d85555ac11f9 ("selftests: mptcp: pm_netlink: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 8f4ff123a7eb..79e83a2c95de 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -194,6 +194,12 @@ subflow 10.0.1.1" " (nofullmesh)" ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + printf "%-50s%s\n" "${st}" "[SKIP]" + mptcp_lib_result_skip "${st}" + done fi mptcp_lib_result_print_all_tap From 1b1ce669a1f04f993c7960c83d87bf75e46d34f0 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:35 +0100 Subject: [PATCH 200/299] selftests: mptcp: pm nl: avoid error msg on older kernels commit 662f084f3396d8a804d56cb53ac05c9e39902a7b upstream. Since the 'Fixes' commit mentioned below, and if the kernel being tested doesn't support the 'fullmesh' flag, this error will be printed: netlink error -22 (Invalid argument) ./pm_nl_ctl: bailing out due to netlink error[s] But that can be normal if the kernel doesn't support the feature, no need to print this worrying error message while everything else looks OK. So we can mute stderr. Failures will still be detected if any. Fixes: 1dc88d241f92 ("selftests: mptcp: pm_nl_ctl: always look for errors") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 79e83a2c95de..71899a3ffa7a 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -183,7 +183,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || mptcp_lib_expect_all_features; then check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ From 1f24ba67ba49483bf03c23dd84e8e4399bb7da79 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 22 Dec 2023 13:47:25 +0100 Subject: [PATCH 201/299] selftests: mptcp: diag: check CURRESTAB counters commit 81ab772819da408977ac79c0a17d8be57283379f upstream. This patch adds a new helper chk_msk_cestab() to check the current established connections counter MIB_CURRESTAB in diag.sh. Invoke it to check the counter during the connection after every chk_msk_inuse(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 85a8ee9395b3..15bd06c9b1db 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() local command="$1" local expected=$2 local msg="$3" - local skip="${4:-SKIP}" + local skip="${4-SKIP}" local nr nr=$(eval $command) @@ -199,6 +199,15 @@ wait_local_port_listen() done } +# $1: cestab nr +chk_msk_cestab() +{ + local cestab=$1 + + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${cestab}" "....chk ${cestab} cestab" "" +} + wait_connected() { local listener_ns="${1}" @@ -236,9 +245,11 @@ chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 echo "a" | \ timeout ${timeout_test} \ @@ -254,9 +265,11 @@ echo "b" | \ wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -278,9 +291,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_cestab $((NR_CLIENTS*2)) flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 mptcp_lib_result_print_all_tap exit $ret From 509bf4e553eb494b472a61981823fbcdc306063d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:36 +0100 Subject: [PATCH 202/299] selftests: mptcp: diag: fix bash warnings on older kernels commit 694bd45980a61045eb5ec07799e3b94c76db830e upstream. Since the 'Fixes' commit mentioned below, the command that is executed in __chk_nr() helper can return nothing if the feature is not supported. This is the case when the MPTCP CURRESTAB counter is not supported. To avoid this warning ... ./diag.sh: line 65: [: !=: unary operator expected ... we just need to surround '$nr' with double quotes, to support an empty string when the feature is not supported. Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 15bd06c9b1db..cd6f322e95fb 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -62,8 +62,8 @@ __chk_nr() nr=$(eval $command) printf "%-50s" "$msg" - if [ $nr != $expected ]; then - if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then echo "[ skip ] Feature probably not supported" mptcp_lib_result_skip "${msg}" else From 6b51994e19948daaefec4b8f30d18d2aca688fd9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:39 +0100 Subject: [PATCH 203/299] selftests: mptcp: diag: unique 'in use' subtest names commit 645c1dc965ef6b5554e5e69737bb179c7a0f872f upstream. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'in use' subtests from the diag selftest had the same names, e.g.: chk 0 msk in use after flush Now the previous value is taken, to have different names, e.g.: chk 2->0 msk in use after flush While at it, avoid repeating the full message, declare it once in the helper. Fixes: ce9902573652 ("selftests: mptcp: diag: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index cd6f322e95fb..89aba2786180 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -166,9 +166,13 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 - local msg="$2" + local msg="....chk ${2:-${expected}} msk in use" local listen_nr + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) @@ -179,7 +183,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected "$msg" 0 + __chk_nr get_msk_inuse $expected "${msg}" 0 } # $1: ns, $2: port @@ -244,11 +248,11 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" -chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_inuse 2 chk_msk_cestab 2 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "2->0" chk_msk_cestab 0 echo "a" | \ @@ -264,11 +268,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" -chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_inuse 1 chk_msk_cestab 1 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "1->0" chk_msk_cestab 0 NR_CLIENTS=100 @@ -290,11 +294,11 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" -chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_inuse $((NR_CLIENTS*2)) "many" chk_msk_cestab $((NR_CLIENTS*2)) flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "many->0" chk_msk_cestab 0 mptcp_lib_result_print_all_tap From a7f34a068467dbfc91d058013d8f5a56523dcf88 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:40 +0100 Subject: [PATCH 204/299] selftests: mptcp: diag: unique 'cestab' subtest names commit 4103d8480866fe5abb71ef0ed8af3a3b7b9625bf upstream. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'cestab' subtests from the diag selftest had the same names, e.g.: ....chk 0 cestab Now the previous value is taken, to have different names, e.g.: ....chk 2->0 cestab after flush While at it, the 'after flush' info is added, similar to what is done with the 'in use' subtests. Also inspired by these 'in use' subtests, 'many' is displayed instead of a large number: many msk socket present [ ok ] ....chk many msk in use [ ok ] ....chk many cestab [ ok ] ....chk many->0 msk in use after flush [ ok ] ....chk many->0 cestab after flush [ ok ] Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/diag.sh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 89aba2786180..4d8c59be1b30 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,10 +206,15 @@ wait_local_port_listen() # $1: cestab nr chk_msk_cestab() { - local cestab=$1 + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ - "${cestab}" "....chk ${cestab} cestab" "" + "${expected}" "${msg}" "" } wait_connected() @@ -253,7 +258,7 @@ chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "2->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "2->0" echo "a" | \ timeout ${timeout_test} \ @@ -273,7 +278,7 @@ chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "1->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "1->0" NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -295,11 +300,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "many" -chk_msk_cestab $((NR_CLIENTS*2)) +chk_msk_cestab $((NR_CLIENTS*2)) "many" flush_pids chk_msk_inuse 0 "many->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "many->0" mptcp_lib_result_print_all_tap exit $ret From 824c15ad0910f3622bf2fc93993d6f4c7b4808ee Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 5 Feb 2024 14:43:17 -0600 Subject: [PATCH 205/299] smb3: add missing null server pointer check commit 45be0882c5f91e1b92e645001dd1a53b3bd58c97 upstream. Address static checker warning in cifs_ses_get_chan_index(): warn: variable dereferenced before check 'server' To be consistent, and reduce risk, we should add another check for null server pointer. Fixes: 88675b22d34e ("cifs: do not search for channel if server is terminating") Reported-by: Dan Carpenter Reviewed-by: Shyam Prasad N Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 07726b456a0a..2fc2fbb260bf 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -76,7 +76,7 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, unsigned int i; /* if the channel is waiting for termination */ - if (server->terminate) + if (server && server->terminate) return CIFS_INVAL_CHAN_INDEX; for (i = 0; i < ses->chan_count; i++) { From a1baf5734231dd3ee5d98022b2da474bc972f9c7 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 10 Jan 2024 13:46:47 -0700 Subject: [PATCH 206/299] drm/amd/display: Avoid enum conversion warning commit d7643fe6fb76edb1f2f1497bf5e8b8f4774b5129 upstream. Clang warns (or errors with CONFIG_WERROR=y) when performing arithmetic with different enumerated types, which is usually a bug: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:548:24: error: arithmetic between different enumeration types ('const enum dc_link_rate' and 'const enum dc_lane_count') [-Werror,-Wenum-enum-conversion] 548 | link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; | ~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~ 1 error generated. In this case, there is not a problem because the enumerated types are basically treated as '#define' values. Add an explicit cast to an integral type to silence the warning. Closes: https://github.com/ClangBuiltLinux/linux/issues/1976 Fixes: 5f3bce13266e ("drm/amd/display: Request usb4 bw for mst streams") Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index 4ef1a6a1d129..dd0d2b206462 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -544,8 +544,9 @@ int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) */ const struct dc_link_settings *link_cap = dc_link_get_link_cap(link); - uint32_t link_bw_in_kbps = - link_cap->link_rate * link_cap->lane_count * LINK_RATE_REF_FREQ_IN_KHZ * 8; + uint32_t link_bw_in_kbps = (uint32_t)link_cap->link_rate * + (uint32_t)link_cap->lane_count * + LINK_RATE_REF_FREQ_IN_KHZ * 8; link_mst_overhead = (link_bw_in_kbps / 64) + ((link_bw_in_kbps % 64) ? 1 : 0); } From 7211800091a9e2d49ad34f59d47321ca09ae30a7 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jan 2024 21:17:09 +0530 Subject: [PATCH 207/299] drm/amd/display: Fix buffer overflow in 'get_host_router_total_dp_tunnel_bw()' commit 97cba232549b9fe7e491fb60a69cf93075015f29 upstream. The error message buffer overflow 'dc->links' 12 <= 12 suggests that the code is trying to access an element of the dc->links array that is beyond its bounds. In C, arrays are zero-indexed, so an array with 12 elements has valid indices from 0 to 11. Trying to access dc->links[12] would be an attempt to access the 13th element of a 12-element array, which is a buffer overflow. To fix this, ensure that the loop does not go beyond the last valid index when accessing dc->links[i + 1] by subtracting 1 from the loop condition. This would ensure that i + 1 is always a valid index in the array. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:208 get_host_router_total_dp_tunnel_bw() error: buffer overflow 'dc->links' 12 <= 12 Fixes: 59f1622a5f05 ("drm/amd/display: Add dpia display mode validation logic") Cc: PeiChen Huang Cc: Aric Cyr Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Meenakshikumar Somasundaram Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index dd0d2b206462..5491b707cec8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { + for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) { if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; From 658750e3d8ede6006deee99cb33948133d51fbf0 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Mon, 29 Jan 2024 17:33:40 -0500 Subject: [PATCH 208/299] Revert "drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz" commit a538dabf772c169641e151834e161e241802ab33 upstream. [why]: This reverts commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a. The commit caused corruption when running some applications in fullscreen Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index ec09d5a8876b..cf3b400c8619 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2452,7 +2452,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; From 980278aca1f82c4b80dfc3e72bdfacce8e739680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 28 Nov 2023 09:52:41 +0300 Subject: [PATCH 209/299] xen/events: fix error code in xen_bind_pirq_msi_to_irq() commit 7f3da4b698bcc21a6df0e7f114af71d53a3e26ac upstream. Return -ENOMEM if xen_irq_init() fails. currently the code returns an uninitialized variable or zero. Fixes: 5dd9ad32d775 ("xen/events: drop xen_allocate_irqs_dynamic()") Signed-off-by: Dan Carpenter Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/3b9ab040-a92e-4e35-b687-3a95890a9ace@moroto.mountain Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 36ba3ef6ef01..0c5259c68ade 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1099,8 +1099,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, for (i = 0; i < nvec; i++) { info = xen_irq_init(irq + i); - if (!info) + if (!info) { + ret = -ENOMEM; goto error_irq; + } irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); From b96f500dbbc38ef9065a11a5e390a9c3070c2e95 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Sat, 7 Oct 2023 10:14:14 +0200 Subject: [PATCH 210/299] bpf: Derive source IP addr via bpf_*_fib_lookup() commit dab4e1f06cabb6834de14264394ccab197007302 upstream. Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: https://github.com/cilium/cilium/pull/28283 Signed-off-by: Martynas Pumputis Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau Cc: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6_stubs.h | 5 +++++ include/uapi/linux/bpf.h | 10 ++++++++++ net/core/filter.c | 18 +++++++++++++++++- net/ipv6/af_inet6.c | 1 + tools/include/uapi/linux/bpf.h | 10 ++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index c48186bf4737..21da31e1dff5 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -85,6 +85,11 @@ struct ipv6_bpf_stub { sockptr_t optval, unsigned int optlen); int (*ipv6_getsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); + int (*ipv6_dev_get_saddr)(struct net *net, + const struct net_device *dst_dev, + const struct in6_addr *daddr, + unsigned int prefs, + struct in6_addr *saddr); }; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ada7acb91a1b..366df8a1a5fc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3257,6 +3257,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6956,6 +6961,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6968,6 +6974,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7002,6 +7009,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ diff --git a/net/core/filter.c b/net/core/filter.c index 01f2417deef2..24f23a30c945 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5903,6 +5903,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -6045,6 +6048,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6063,7 +6078,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4375bfa4f608..b9c50cceba56 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1064,6 +1064,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ada7acb91a1b..366df8a1a5fc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3257,6 +3257,11 @@ union bpf_attr { * and *params*->smac will not be set as output. A common * use case is to call **bpf_redirect_neigh**\ () after * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -6956,6 +6961,7 @@ enum { BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), }; enum { @@ -6968,6 +6974,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -7002,6 +7009,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ From b41d0ade0398007fb746213f09903d52a920e896 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 12 Jan 2024 16:55:23 +0800 Subject: [PATCH 211/299] IB/hfi1: Fix a memleak in init_credit_return [ Upstream commit 809aa64ebff51eb170ee31a95f83b2d21efa32e2 ] When dma_alloc_coherent fails to allocate dd->cr_base[i].va, init_credit_return should deallocate dd->cr_base and dd->cr_base[i] that allocated before. Or those resources would be never freed and a memleak is triggered. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Zhipeng Lu Link: https://lore.kernel.org/r/20240112085523.3731720-1-alexious@zju.edu.cn Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/pio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index dfea53e0fdeb..5eb309ead707 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2086,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2094,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) From 9abe693274071aa68944094587f665d212803bec Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:36 -0800 Subject: [PATCH 212/299] RDMA/bnxt_re: Return error for SRQ resize [ Upstream commit 3687b450c5f32e80f179ce4b09e0454da1449eac ] SRQ resize is not supported in the driver. But driver is not returning error from bnxt_re_modify_srq() for SRQ resize. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index faa88d12ee86..cc466dfd792b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1809,7 +1809,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1824,13 +1824,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) From 43a6b52b7cf1fae3e0eb27fa57e08e92a9f279b7 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:37 -0800 Subject: [PATCH 213/299] RDMA/bnxt_re: Add a missing check in bnxt_qplib_query_srq [ Upstream commit 80dde187f734cf9ccf988d5c2ef1a46b990660fd ] Before populating the response, driver has to check the status of HWRM command. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index abbabea7f5fa..2a6223918762 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -748,7 +748,8 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); - srq->threshold = le16_to_cpu(sb->srq_limit); + if (!rc) + srq->threshold = le16_to_cpu(sb->srq_limit); dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); From 25f7f28142a2b8d2f6452eb4f88180456431b17e Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 28 Jan 2024 11:29:12 +0200 Subject: [PATCH 214/299] IB/mlx5: Don't expose debugfs entries for RRoCE general parameters if not supported [ Upstream commit 43fdbd140238d44e7e847232719fef7d20f9d326 ] debugfs entries for RRoCE general CC parameters must be exposed only when they are supported, otherwise when accessing them there may be a syndrome error in kernel log, for example: $ cat /sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp cat: '/sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp': Invalid argument $ dmesg mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1253): QUERY_CONG_PARAMS(0x824) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x325a82), err(-22) Fixes: 66fb1d5df6ac ("IB/mlx5: Extend debug control for CC parameters") Reviewed-by: Edward Srouji Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/e7ade70bad52b7468bdb1de4d41d5fad70c8b71c.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/cong.c | 6 ++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index f87531318feb..a78a067e3ce7 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev)); for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID || + i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP)) + if (!MLX5_CAP_GEN(mdev, roce) || + !MLX5_CAP_ROCE(mdev, roce_cc_general)) + continue; + dbg_cc_params->params[i].offset = i; dbg_cc_params->params[i].dev = dev; dbg_cc_params->params[i].port_num = port_num; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 51eb83f77938..643e9ba4e64b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1102,7 +1102,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; From a8ef9c7f4cfd0841007af8eddfc3b26e33e9ef91 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 20 Dec 2023 01:02:42 +0100 Subject: [PATCH 215/299] arm64: dts: imx8mp: Disable UART4 by default on Data Modul i.MX8M Plus eDM SBC [ Upstream commit f03869698bc3bd6d9d2d9f216b20da08a8c2508a ] UART4 is used as CM7 coprocessor debug UART and may not be accessible from Linux in case it is protected by RDC. The RDC protection is set up by the platform firmware. UART4 is not used on this platform by Linux. Disable UART4 by default to prevent boot hangs, which occur when the RDC protection is in place. Fixes: 562d222f23f0 ("arm64: dts: imx8mp: Add support for Data Modul i.MX8M Plus eDM SBC") Signed-off-by: Marek Vasut Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts index 13674dc64be9..116bf9738a8a 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts @@ -484,7 +484,7 @@ &uart4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart4>; - status = "okay"; + status = "disabled"; }; &usb3_phy0 { From c6f1ca235f68b22b3e691b2ea87ac285e5946848 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 31 Jan 2024 17:38:46 -0600 Subject: [PATCH 216/299] RDMA/irdma: Fix KASAN issue with tasklet [ Upstream commit bd97cea7b18a0a553773af806dfbfac27a7c4acb ] KASAN testing revealed the following issue assocated with freeing an IRQ. [50006.466686] Call Trace: [50006.466691] [50006.489538] dump_stack+0x5c/0x80 [50006.493475] print_address_description.constprop.6+0x1a/0x150 [50006.499872] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.505742] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.511644] kasan_report.cold.11+0x7f/0x118 [50006.516572] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.522473] irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.528232] irdma_process_ceq+0xb2/0x400 [irdma] [50006.533601] ? irdma_hw_flush_wqes_callback+0x370/0x370 [irdma] [50006.540298] irdma_ceq_dpc+0x44/0x100 [irdma] [50006.545306] tasklet_action_common.isra.14+0x148/0x2c0 [50006.551096] __do_softirq+0x1d0/0xaf8 [50006.555396] irq_exit_rcu+0x219/0x260 [50006.559670] irq_exit+0xa/0x20 [50006.563320] smp_apic_timer_interrupt+0x1bf/0x690 [50006.568645] apic_timer_interrupt+0xf/0x20 [50006.573341] The issue is that a tasklet could be pending on another core racing the delete of the irq. Fix by insuring any scheduled tasklet is killed after deleting the irq. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-2-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 564c9188e1f8..aff68aa8dfad 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -570,6 +570,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** From 9afa1e4354a11faef61703e9514fd01cb352d318 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 31 Jan 2024 17:38:47 -0600 Subject: [PATCH 217/299] RDMA/irdma: Validate max_send_wr and max_recv_wr [ Upstream commit ee107186bcfd25d7873258f3f75440e20f5e6416 ] Validate that max_send_wr and max_recv_wr is within the supported range. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Change-Id: I2fc8b10292b641fddd20b36986a9dae90a93f4be Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-3-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2f1bedd3a520..d9750901c599 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -839,7 +839,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { From 3907d842f291f17cb317cdc19cea506f225d19c8 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:48 -0600 Subject: [PATCH 218/299] RDMA/irdma: Set the CQ read threshold for GEN 1 [ Upstream commit 666047f3ece9f991774c1fe9b223139a9ef8908d ] The CQ shadow read threshold is currently not set for GEN 2. This could cause an invalid CQ overflow condition, so remove the GEN check that exclused GEN 1. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-4-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index d9750901c599..60618b2046b9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2186,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); From f562dbfd89dccac45281cb83d8ca390c150abb9b Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:49 -0600 Subject: [PATCH 219/299] RDMA/irdma: Add AE for too many RNRS [ Upstream commit 630bdb6f28ca9e5ff79e244030170ac788478332 ] Add IRDMA_AE_LLP_TOO_MANY_RNRS to the list of AE's processed as an abnormal asyncronous event. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-5-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index d06e45d2c23f..9052e8932dc1 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -346,6 +346,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index aff68aa8dfad..1745f40b075f 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -387,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: From fe2a73d57319feab4b3b175945671ce43492172f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 4 Feb 2024 16:42:07 -0800 Subject: [PATCH 220/299] RDMA/srpt: Support specifying the srpt_service_guid parameter [ Upstream commit fdfa083549de5d50ebf7f6811f33757781e838c0 ] Make loading ib_srpt with this parameter set work. The current behavior is that setting that parameter while loading the ib_srpt kernel module triggers the following kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: parse_one+0x18c/0x1d0 parse_args+0xe1/0x230 load_module+0x8de/0xa60 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x181/0x240 __x64_sys_finit_module+0x5a/0xb0 do_syscall_64+0x5f/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Cc: LiHonggang Reported-by: LiHonggang Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240205004207.17031-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index c12005eab14c..38168d8c626f 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); From 9c29933eae402b0e65144cdabda915c5aafadcb5 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:08:49 +0100 Subject: [PATCH 221/299] arm64: dts: tqma8mpql: fix audio codec iov-supply [ Upstream commit a620a7f2ae8b08c5beea6369f61e87064ee222dc ] IOVDD is supplied by 1.8V, fix the referenced regulator. Fixes: d8f9d8126582d ("arm64: dts: imx8mp: Add analog audio output on i.MX8MP TQMa8MPxL/MBa8MPxL") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- .../boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts index 4240e20d38ac..258e90cc16ff 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts @@ -168,6 +168,13 @@ enable-active-high; }; + reg_vcc_1v8: regulator-1v8 { + compatible = "regulator-fixed"; + regulator-name = "VCC_1V8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + reg_vcc_3v3: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "VCC_3V3"; @@ -464,7 +471,7 @@ clock-names = "mclk"; clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SAI3_MCLK1>; reset-gpios = <&gpio4 29 GPIO_ACTIVE_LOW>; - iov-supply = <®_vcc_3v3>; + iov-supply = <®_vcc_1v8>; ldoin-supply = <®_vcc_3v3>; }; From bc569f86f978133da740870bbb8bef678783788b Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 19 Jan 2024 19:50:26 +0100 Subject: [PATCH 222/299] bus: imx-weim: fix valid range check [ Upstream commit 7bca405c986075c99b9f729d3587b5c45db39d01 ] When the range parsing was open-coded the number of u32 entries to parse had to be a multiple of 4 and the driver checks this. With the range parsing converted to the range parser the counting changes from individual u32 entries to a complete range, so the check must not reject counts not divisible by 4. Fixes: 2a88e4792c6d ("bus: imx-weim: Remove open coded "ranges" parsing") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 42c9386a7b42..f9fd1582f150 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -117,7 +117,7 @@ static int imx_weim_gpr_setup(struct platform_device *pdev) i++; } - if (i == 0 || i % 4) + if (i == 0) goto err; for (i = 0; i < ARRAY_SIZE(gprvals); i++) { From 634745054a521365f150e746258f25f6db91118d Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:07 +0000 Subject: [PATCH 223/299] iommufd/iova_bitmap: Bounds check mapped::pages access [ Upstream commit a4ab7dedaee0e39b15653c5fd0367e420739f7ef ] Dirty IOMMU hugepages reported on a base page page-size granularity can lead to an attempt to set dirty pages in the bitmap beyond the limits that are pinned. Bounds check the page index of the array we are trying to access is within the limits before we kmap() and return otherwise. While it is also a defensive check, this is also in preparation to defer setting bits (outside the mapped range) to the next iteration(s) when the pages become available. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-2-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 0848f920efb7..997134a24c02 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -406,6 +406,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -414,6 +415,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); From 929766dadbd27c1ca4d7c53008ccdee790e846f4 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:08 +0000 Subject: [PATCH 224/299] iommufd/iova_bitmap: Switch iova_bitmap::bitmap to an u8 array [ Upstream commit d18411ec305728c6371806c4fb09be07016aad0b ] iova_bitmap_mapped_length() don't deal correctly with the small bitmaps (< 2M bitmaps) when the starting address isn't u64 aligned, leading to skipping a tiny part of the IOVA range. This is materialized as not marking data dirty that should otherwise have been. Fix that by using a u8 * in the internal state of IOVA bitmap. Most of the data structures use the type of the bitmap to adjust its indexes, thus changing the type of the bitmap decreases the granularity of the bitmap indexes. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-3-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 997134a24c02..26ad0912cfea 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -100,7 +100,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -162,7 +162,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -247,7 +247,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -302,7 +302,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } From c99e6b267d761bbd856b2d4c98b9dc801387e704 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:15 +0000 Subject: [PATCH 225/299] iommufd/iova_bitmap: Consider page offset for the pages to be pinned [ Upstream commit 4bbcbc6ea2fa379632a24c14cfb47aa603816ac6 ] For small bitmaps that aren't PAGE_SIZE aligned *and* that are less than 512 pages in bitmap length, use an extra page to be able to cover the entire range e.g. [1M..3G] which would be iterated more efficiently in a single iteration, rather than two. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/vfio/iova_bitmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 26ad0912cfea..7af5b204990b 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -175,18 +175,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) bitmap->mapped_base_index) * sizeof(*bitmap->bitmap), PAGE_SIZE); - /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) From 95175dda017cd4982cd47960536fa1de003d3298 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 8 Feb 2024 17:36:28 -0500 Subject: [PATCH 226/299] RDMA/qedr: Fix qedr_create_user_qp error flow [ Upstream commit 5ba4e6d5863c53e937f49932dee0ecb004c65928 ] Avoid the following warning by making sure to free the allocated resources in case that qedr_init_user_queue() fail. -----------[ cut here ]----------- WARNING: CPU: 0 PID: 143192 at drivers/infiniband/core/rdma_core.c:874 uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Modules linked in: tls target_core_user uio target_core_pscsi target_core_file target_core_iblock ib_srpt ib_srp scsi_transport_srp nfsd nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs 8021q garp mrp stp llc ext4 mbcache jbd2 opa_vnic ib_umad ib_ipoib sunrpc rdma_ucm ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm hfi1 intel_rapl_msr intel_rapl_common mgag200 qedr sb_edac drm_shmem_helper rdmavt x86_pkg_temp_thermal drm_kms_helper intel_powerclamp ib_uverbs coretemp i2c_algo_bit kvm_intel dell_wmi_descriptor ipmi_ssif sparse_keymap kvm ib_core rfkill syscopyarea sysfillrect video sysimgblt irqbypass ipmi_si ipmi_devintf fb_sys_fops rapl iTCO_wdt mxm_wmi iTCO_vendor_support intel_cstate pcspkr dcdbas intel_uncore ipmi_msghandler lpc_ich acpi_power_meter mei_me mei fuse drm xfs libcrc32c qede sd_mod ahci libahci t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel qed libata tg3 ghash_clmulni_intel megaraid_sas crc8 wmi [last unloaded: ib_srpt] CPU: 0 PID: 143192 Comm: fi_rdm_tagged_p Kdump: loaded Not tainted 5.14.0-408.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.14.0 01/25/2022 RIP: 0010:uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Code: 5d 41 5c 41 5d 41 5e e9 0f 26 1b dd 48 89 df e8 67 6a ff ff 49 8b 86 10 01 00 00 48 85 c0 74 9c 4c 89 e7 e8 83 c0 cb dd eb 92 <0f> 0b eb be 0f 0b be 04 00 00 00 48 89 df e8 8e f5 ff ff e9 6d ff RSP: 0018:ffffb7c6cadfbc60 EFLAGS: 00010286 RAX: ffff8f0889ee3f60 RBX: ffff8f088c1a5200 RCX: 00000000802a0016 RDX: 00000000802a0017 RSI: 0000000000000001 RDI: ffff8f0880042600 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8f11fffd5000 R11: 0000000000039000 R12: ffff8f0d5b36cd80 R13: ffff8f088c1a5250 R14: ffff8f1206d91000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8f11d7c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000147069200e20 CR3: 00000001c7210002 CR4: 00000000001706f0 Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? ib_uverbs_close+0x1f/0xb0 [ib_uverbs] ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? __warn+0x81/0x110 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? report_bug+0x10a/0x140 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x94/0x250 task_work_run+0x5c/0x90 do_exit+0x270/0x4a0 do_group_exit+0x2d/0x90 get_signal+0x87c/0x8c0 arch_do_signal_or_restart+0x25/0x100 ? ib_uverbs_ioctl+0xc2/0x110 [ib_uverbs] exit_to_user_mode_loop+0x9c/0x130 exit_to_user_mode_prepare+0xb6/0x100 syscall_exit_to_user_mode+0x12/0x40 do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? do_syscall_64+0x69/0x90 ? common_interrupt+0x43/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x1470abe3ec6b Code: Unable to access opcode bytes at RIP 0x1470abe3ec41. RSP: 002b:00007fff13ce9108 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: fffffffffffffffc RBX: 00007fff13ce9218 RCX: 00001470abe3ec6b RDX: 00007fff13ce9200 RSI: 00000000c0181b01 RDI: 0000000000000004 RBP: 00007fff13ce91e0 R08: 0000558d9655da10 R09: 0000558d9655dd00 R10: 00007fff13ce95c0 R11: 0000000000000246 R12: 00007fff13ce9358 R13: 0000000000000013 R14: 0000558d9655db50 R15: 00007fff13ce9470 --[ end trace 888a9b92e04c5c97 ]-- Fixes: df15856132bc ("RDMA/qedr: restructure functions that create/destroy QPs") Signed-off-by: Kamal Heib Link: https://lore.kernel.org/r/20240208223628.2040841-1-kheib@redhat.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7887a6786ed4..f118ce0a9a61 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); From 9ff254f14b1994ec9a2866b0c4abbb2bb202e0aa Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 19 Jan 2024 11:16:56 +0100 Subject: [PATCH 227/299] arm64: dts: rockchip: set num-cs property for spi on px30 [ Upstream commit 334bf0710c98d391f4067b72f535d6c4c84dfb6f ] The px30 has two spi controllers with two chip-selects each. The num-cs property is specified as the total number of chip selects a controllers has and is used since 2020 to find uses of chipselects outside that range in the Rockchip spi driver. Without the property set, the default is 1, so spi devices using the second chipselect will not be created. Fixes: eb1262e3cc8b ("spi: spi-rockchip: use num-cs property and ctlr->enable_gpiods") Signed-off-by: Heiko Stuebner Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240119101656.965744-1-heiko@sntech.de Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/px30.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 42ce78beb413..20955556b624 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -632,6 +632,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -647,6 +648,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; From 0f7798768f21b41e4572f8d486fb197d3b622ede Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 25 Jan 2024 14:19:42 -0600 Subject: [PATCH 228/299] arm64: dts: rockchip: Correct Indiedroid Nova GPIO Names [ Upstream commit c22d03a95b0d815cd186302fdd93f74d99f1c914 ] Correct the names given to a few of the GPIO pins. The original names were unknowingly based on the header from a pre-production board. The production board has a slightly different pin assignment for the 40-pin GPIO header. Fixes: 3900160e164b ("arm64: dts: rockchip: Add Indiedroid Nova board") Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20240125201943.90476-2-macroalpha82@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- .../boot/dts/rockchip/rk3588s-indiedroid-nova.dts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index d1503a4b233a..9299fa7e3e21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -163,13 +163,13 @@ &gpio1 { gpio-line-names = /* GPIO1 A0-A7 */ - "HEADER_27_3v3", "HEADER_28_3v3", "", "", + "HEADER_27_3v3", "", "", "", "HEADER_29_1v8", "", "HEADER_7_1v8", "", /* GPIO1 B0-B7 */ "", "HEADER_31_1v8", "HEADER_33_1v8", "", "HEADER_11_1v8", "HEADER_13_1v8", "", "", /* GPIO1 C0-C7 */ - "", "", "", "", + "", "HEADER_28_3v3", "", "", "", "", "", "", /* GPIO1 D0-D7 */ "", "", "", "", @@ -193,11 +193,11 @@ &gpio4 { gpio-line-names = /* GPIO4 A0-A7 */ - "", "", "HEADER_37_3v3", "HEADER_32_3v3", - "HEADER_36_3v3", "", "HEADER_35_3v3", "HEADER_38_3v3", + "", "", "HEADER_37_3v3", "HEADER_8_3v3", + "HEADER_10_3v3", "", "HEADER_32_3v3", "HEADER_35_3v3", /* GPIO4 B0-B7 */ "", "", "", "HEADER_40_3v3", - "HEADER_8_3v3", "HEADER_10_3v3", "", "", + "HEADER_38_3v3", "HEADER_36_3v3", "", "", /* GPIO4 C0-C7 */ "", "", "", "", "", "", "", "", From f053322543504014c06bd2276f4586b62edd01b4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 2 Feb 2024 17:32:20 +0100 Subject: [PATCH 229/299] xsk: Add truesize to skb_add_rx_frag(). [ Upstream commit 2127c604383666675789fd4a5fc2aead46c73aad ] xsk_build_skb() allocates a page and adds it to the skb via skb_add_rx_frag() and specifies 0 for truesize. This leads to a warning in skb_add_rx_frag() with CONFIG_DEBUG_NET enabled because size is larger than truesize. Increasing truesize requires to add the same amount to socket's sk_wmem_alloc counter in order not to underflow the counter during release in the destructor (sock_wfree()). Pass the size of the allocated page as truesize to skb_add_rx_frag(). Add this mount to socket's sk_wmem_alloc counter. Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20240202163221.2488589-1-bigeasy@linutronix.de Signed-off-by: Sasha Levin --- net/xdp/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d849dc04a334..2c3ba42bfcdc 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -683,7 +683,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, memcpy(vaddr, buffer, len); kunmap_local(vaddr); - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); } } From 3207671036440609f8f701974c398fa68a88d368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:07:13 +0100 Subject: [PATCH 230/299] RDMA/srpt: fix function pointer cast warnings [ Upstream commit eb5c7465c3240151cd42a55c7ace9da0026308a1 ] clang-16 notices that srpt_qp_event() gets called through an incompatible pointer here: drivers/infiniband/ulp/srpt/ib_srpt.c:1815:5: error: cast from 'void (*)(struct ib_event *, struct srpt_rdma_ch *)' to 'void (*)(struct ib_event *, void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1815 | = (void(*)(struct ib_event *, void*))srpt_qp_event; Change srpt_qp_event() to use the correct prototype and adjust the argument inside of it. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100728.458348-1-arnd@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 38168d8c626f..015bfeede90e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1811,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; From 239b85a9a97729a4d8b17eed6b4ebec4005d76d2 Mon Sep 17 00:00:00 2001 From: Gianmarco Lusvardi Date: Tue, 13 Feb 2024 23:05:46 +0000 Subject: [PATCH 231/299] bpf, scripts: Correct GPL license name [ Upstream commit e37243b65d528a8a9f8b9a57a43885f8e8dfc15c ] The bpf_doc script refers to the GPL as the "GNU Privacy License". I strongly suspect that the author wanted to refer to the GNU General Public License, under which the Linux kernel is released, as, to the best of my knowledge, there is no license named "GNU Privacy License". This patch corrects the license name in the script accordingly. Fixes: 56a092c89505 ("bpf: add script and prepare bpf.h for new helpers documentation") Signed-off-by: Gianmarco Lusvardi Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240213230544.930018-3-glusvardi@posteo.net Signed-off-by: Sasha Levin --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 61b7dddedc46..0669bac5e900 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\\ () system call, and this From 4f5b15c15e6016efb3e14582d02cc4ddf57227df Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 13 Feb 2024 10:22:00 -0600 Subject: [PATCH 232/299] scsi: smartpqi: Fix disable_managed_interrupts [ Upstream commit 5761eb9761d2d5fe8248a9b719efc4d8baf1f24a ] Correct blk-mq registration issue with module parameter disable_managed_interrupts enabled. When we turn off the default PCI_IRQ_AFFINITY flag, the driver needs to register with blk-mq using blk_mq_map_queues(). The driver is currently calling blk_mq_pci_map_queues() which results in a stack trace and possibly undefined behavior. Stack Trace: [ 7.860089] scsi host2: smartpqi [ 7.871934] WARNING: CPU: 0 PID: 238 at block/blk-mq-pci.c:52 blk_mq_pci_map_queues+0xca/0xd0 [ 7.889231] Modules linked in: sd_mod t10_pi sg uas smartpqi(+) crc32c_intel scsi_transport_sas usb_storage dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse [ 7.924755] CPU: 0 PID: 238 Comm: kworker/0:3 Not tainted 4.18.0-372.88.1.el8_6_smartpqi_test.x86_64 #1 [ 7.944336] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 03/08/2022 [ 7.963026] Workqueue: events work_for_cpu_fn [ 7.978275] RIP: 0010:blk_mq_pci_map_queues+0xca/0xd0 [ 7.978278] Code: 48 89 de 89 c7 e8 f6 0f 4f 00 3b 05 c4 b7 8e 01 72 e1 5b 31 c0 5d 41 5c 41 5d 41 5e 41 5f e9 7d df 73 00 31 c0 e9 76 df 73 00 <0f> 0b eb bc 90 90 0f 1f 44 00 00 41 57 49 89 ff 41 56 41 55 41 54 [ 7.978280] RSP: 0018:ffffa95fc3707d50 EFLAGS: 00010216 [ 7.978283] RAX: 00000000ffffffff RBX: 0000000000000000 RCX: 0000000000000010 [ 7.978284] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffff9190c32d4310 [ 7.978286] RBP: 0000000000000000 R08: ffffa95fc3707d38 R09: ffff91929b81ac00 [ 7.978287] R10: 0000000000000001 R11: ffffa95fc3707ac0 R12: 0000000000000000 [ 7.978288] R13: ffff9190c32d4000 R14: 00000000ffffffff R15: ffff9190c4c950a8 [ 7.978290] FS: 0000000000000000(0000) GS:ffff9193efc00000(0000) knlGS:0000000000000000 [ 7.978292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.172814] CR2: 000055d11166c000 CR3: 00000002dae10002 CR4: 00000000007706f0 [ 8.172816] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.172817] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.172818] PKRU: 55555554 [ 8.172819] Call Trace: [ 8.172823] blk_mq_alloc_tag_set+0x12e/0x310 [ 8.264339] scsi_add_host_with_dma.cold.9+0x30/0x245 [ 8.279302] pqi_ctrl_init+0xacf/0xc8e [smartpqi] [ 8.294085] ? pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.309015] pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.323286] local_pci_probe+0x42/0x80 [ 8.337855] work_for_cpu_fn+0x16/0x20 [ 8.351193] process_one_work+0x1a7/0x360 [ 8.364462] ? create_worker+0x1a0/0x1a0 [ 8.379252] worker_thread+0x1ce/0x390 [ 8.392623] ? create_worker+0x1a0/0x1a0 [ 8.406295] kthread+0x10a/0x120 [ 8.418428] ? set_kthread_struct+0x50/0x50 [ 8.431532] ret_from_fork+0x1f/0x40 [ 8.444137] ---[ end trace 1bf0173d39354506 ]--- Fixes: cf15c3e734e8 ("scsi: smartpqi: Add module param to disable managed ints") Tested-by: Yogesh Chandra Pandey Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mahesh Rajashekhara Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20240213162200.1875970-2-don.brace@microchip.com Reviewed-by: Tomas Henzl Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 081bb2c09806..868453b18c9a 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6533,8 +6533,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) From 0706faf631d71d8ee26848bdf916a943952b9232 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Feb 2024 21:59:53 -0800 Subject: [PATCH 233/299] scsi: jazz_esp: Only build if SCSI core is builtin [ Upstream commit 9ddf190a7df77b77817f955fdb9c2ae9d1c9c9a3 ] JAZZ_ESP is a bool kconfig symbol that selects SCSI_SPI_ATTRS. When CONFIG_SCSI=m, this results in SCSI_SPI_ATTRS=m while JAZZ_ESP=y, which causes many undefined symbol linker errors. Fix this by only offering to build this driver when CONFIG_SCSI=y. [mkp: JAZZ_ESP is unique in that it does not support being compiled as a module unlike the remaining SPI SCSI HBA drivers] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240214055953.9612-1-rdunlap@infradead.org Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nicolas Schier Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Cc: Geert Uytterhoeven Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402112222.Gl0udKyU-lkp@intel.com/ Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 695a57d894cd..23bce8995a55 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1285,7 +1285,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum From 603be95437e7fd85ba694e75918067fb9e7754db Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: [PATCH 234/299] net: bridge: switchdev: Skip MDB replays of deferred events on offload [ Upstream commit dc489f86257cab5056e747344f17a164f63bff4b ] Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/switchdev.h | 3 ++ net/bridge/br_switchdev.c | 76 ++++++++++++++++++++++++--------------- net/switchdev/switchdev.c | 73 +++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 29 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index a43062d4c734..8346b0d29542 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -308,6 +308,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index ee84e783e1df..6a7cb01f121c 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) + br_switchdev_mdb_populate(&mdb, mp); + + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) return -ENOMEM; - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); - + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. - */ - rcu_read_lock(); + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. + */ + spin_lock_bh(&br->multicast_lock); + + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5b045284849e..c9189a970eec 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include #include +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); From a83856bd0c240267a86ce3388f3437d6ba5ac5ca Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:04 +0100 Subject: [PATCH 235/299] net: bridge: switchdev: Ensure deferred event delivery on unoffload [ Upstream commit f7a70d650b0b6b0134ccba763d672c8439d9f09b ] When unoffloading a device, it is important to ensure that all relevant deferred events are delivered to it before it disassociates itself from the bridge. Before this change, this was true for the normal case when a device maps 1:1 to a net_bridge_port, i.e. br0 / swp0 When swp0 leaves br0, the call to switchdev_deferred_process() in del_nbp() makes sure to process any outstanding events while the device is still associated with the bridge. In the case when the association is indirect though, i.e. when the device is attached to the bridge via an intermediate device, like a LAG... br0 / lag0 / swp0 ...then detaching swp0 from lag0 does not cause any net_bridge_port to be deleted, so there was no guarantee that all events had been processed before the device disassociated itself from the bridge. Fix this by always synchronously processing all deferred events before signaling completion of unoffloading back to the driver. Fixes: 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_switchdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 6a7cb01f121c..7b41ee8740cb 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -804,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a From 334a8348b2df26526f3298848ad6864285592caf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Feb 2024 11:13:08 -0800 Subject: [PATCH 236/299] dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished(). [ Upstream commit 66b60b0c8c4a163b022a9f0ad6769b0fd3dc662f ] syzkaller reported a warning [0] in inet_csk_destroy_sock() with no repro. WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); However, the syzkaller's log hinted that connect() failed just before the warning due to FAULT_INJECTION. [1] When connect() is called for an unbound socket, we search for an available ephemeral port. If a bhash bucket exists for the port, we call __inet_check_established() or __inet6_check_established() to check if the bucket is reusable. If reusable, we add the socket into ehash and set inet_sk(sk)->inet_num. Later, we look up the corresponding bhash2 bucket and try to allocate it if it does not exist. Although it rarely occurs in real use, if the allocation fails, we must revert the changes by check_established(). Otherwise, an unconnected socket could illegally occupy an ehash entry. Note that we do not put tw back into ehash because sk might have already responded to a packet for tw and it would be better to free tw earlier under such memory presure. [0]: WARNING: CPU: 0 PID: 350830 at net/ipv4/inet_connection_sock.c:1193 inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Code: 41 5c 41 5d 41 5e e9 2d 4a 3d fd e8 28 4a 3d fd 48 89 ef e8 f0 cd 7d ff 5b 5d 41 5c 41 5d 41 5e e9 13 4a 3d fd e8 0e 4a 3d fd <0f> 0b e9 61 fe ff ff e8 02 4a 3d fd 4c 89 e7 be 03 00 00 00 e8 05 RSP: 0018:ffffc9000b21fd38 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000009e78 RCX: ffffffff840bae40 RDX: ffff88806e46c600 RSI: ffffffff840bb012 RDI: ffff88811755cca8 RBP: ffff88811755c880 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000009e78 R11: 0000000000000000 R12: ffff88811755c8e0 R13: ffff88811755c892 R14: ffff88811755c918 R15: 0000000000000000 FS: 00007f03e5243800(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f21000 CR3: 0000000112ffe001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: ? inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) dccp_close (net/dccp/proto.c:1078) inet_release (net/ipv4/af_inet.c:434) __sock_release (net/socket.c:660) sock_close (net/socket.c:1423) __fput (fs/file_table.c:377) __fput_sync (fs/file_table.c:462) __x64_sys_close (fs/open.c:1557 fs/open.c:1539 fs/open.c:1539) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e53852bb Code: 03 00 00 00 0f 05 48 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 43 c9 f5 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 c9 f5 ff 8b 44 RSP: 002b:00000000005dfba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f03e53852bb RDX: 0000000000000002 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000167c R10: 0000000008a79680 R11: 0000000000000293 R12: 00007f03e4e43000 R13: 00007f03e4e43170 R14: 00007f03e4e43178 R15: 00007f03e4e43170 [1]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 350833 Comm: syz-executor.1 Not tainted 6.7.0-12272-g2121c43f88f5 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3748) kmem_cache_alloc (mm/slub.c:3763 mm/slub.c:3842 mm/slub.c:3867) inet_bind2_bucket_create (net/ipv4/inet_hashtables.c:135) __inet_hash_connect (net/ipv4/inet_hashtables.c:1100) dccp_v4_connect (net/dccp/ipv4.c:116) __inet_stream_connect (net/ipv4/af_inet.c:676) inet_stream_connect (net/ipv4/af_inet.c:747) __sys_connect_file (net/socket.c:2048 (discriminator 2)) __sys_connect (net/socket.c:2065) __x64_sys_connect (net/socket.c:2072) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e5284e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f03e4641cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f03e5284e5d RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f03e52e5530 R15: 0000000000000000 Reported-by: syzkaller Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a532f749e477..9456bf9e2705 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1131,10 +1131,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } From 73db191dc30d42a84c27d01c85f6f0cd851f91d7 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Tue, 19 Dec 2023 15:16:21 -0300 Subject: [PATCH 237/299] net/sched: act_mirred: Create function tcf_mirred_to_dev and improve readability [ Upstream commit 16085e48cb48aeb50a1178dc276747749910b0f2 ] As a preparation for adding block ID to mirred, separate the part of mirred that redirect/mirrors to a dev into a specific function so that it can be called by blockcast for each dev. Also improve readability. Eg. rename use_reinsert to dont_clone and skb2 to skb_to_send. Co-developed-by: Jamal Hadi Salim Signed-off-by: Jamal Hadi Salim Co-developed-by: Pedro Tammela Signed-off-by: Pedro Tammela Signed-off-by: Victor Nogueira Signed-off-by: David S. Miller Stable-dep-of: 52f671db1882 ("net/sched: act_mirred: use the backlog for mirred ingress") Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 127 +++++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 56 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c29..6f2544c1e396 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -225,48 +225,26 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); + err = -ENODEV; goto out; } @@ -274,61 +252,98 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) { + err = -ENOMEM; goto out; + } } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(want_ingress, skb_to_send); } - err = tcf_mirred_forward(want_ingress, skb2); if (err) { out: tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) + if (is_redirect) retval = TC_ACT_SHOT; } + + return retval; +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; + + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); + tcf_action_inc_overlimit_qstats(&m->common); + goto dec_nest_level; + } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; From 7c787888d164689da8b1b115f3ef562c1e843af4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Feb 2024 06:33:45 -0800 Subject: [PATCH 238/299] net/sched: act_mirred: use the backlog for mirred ingress [ Upstream commit 52f671db18823089a02f07efc04efdb2272ddc17 ] The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress") hangs our testing VMs every 10 or so runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by lockdep. The problem as previously described by Davide (see Link) is that if we reverse flow of traffic with the redirect (egress -> ingress) we may reach the same socket which generated the packet. And we may still be holding its socket lock. The common solution to such deadlocks is to put the packet in the Rx backlog, rather than run the Rx path inline. Do that for all egress -> ingress reversals, not just once we started to nest mirred calls. In the past there was a concern that the backlog indirection will lead to loss of error reporting / less accurate stats. But the current workaround does not seem to address the issue. Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions") Cc: Marcelo Ricardo Leitner Suggested-by: Davide Caratti Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/ Signed-off-by: Jakub Kicinski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 14 +++++--------- .../testing/selftests/net/forwarding/tc_actions.sh | 3 --- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6f2544c1e396..bab090bb5e80 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -206,18 +206,14 @@ release_idr: return err; } -static bool is_mirred_nested(void) -{ - return unlikely(__this_cpu_read(mirred_nest_level) > 1); -} - -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +static int +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (is_mirred_nested()) + else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); @@ -293,9 +289,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) { diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index b0f5e55d2d0b..589629636502 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test() check_err $? "didn't mirred redirect ICMP" tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" - local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) - test ${overlimits} = 10 - check_err $? "wrong overlimits, expected 10 got ${overlimits}" tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower From 28cdbbd38a4413b8eff53399b3f872fd4e80db9d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Feb 2024 06:33:46 -0800 Subject: [PATCH 239/299] net/sched: act_mirred: don't override retval if we already lost the skb [ Upstream commit 166c2c8a6a4dc2e4ceba9e10cfe81c3e469e3210 ] If we're redirecting the skb, and haven't called tcf_mirred_forward(), yet, we need to tell the core to drop the skb by setting the retcode to SHOT. If we have called tcf_mirred_forward(), however, the skb is out of our hands and returning SHOT will lead to UaF. Move the retval override to the error path which actually need it. Reviewed-by: Michal Swiatkowski Fixes: e5cf1baf92cb ("act_mirred: use TC_ACT_REINSERT when possible") Signed-off-by: Jakub Kicinski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/act_mirred.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index bab090bb5e80..674f7ae356ca 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -240,8 +240,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); - err = -ENODEV; - goto out; + goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; @@ -253,10 +252,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); - if (!skb_to_send) { - err = -ENOMEM; - goto out; - } + if (!skb_to_send) + goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); @@ -293,15 +290,16 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } - - if (err) { -out: + if (err) tcf_action_inc_overlimit_qstats(&m->common); - if (is_redirect) - retval = TC_ACT_SHOT; - } return retval; + +err_cant_do: + if (is_redirect) + retval = TC_ACT_SHOT; + tcf_action_inc_overlimit_qstats(&m->common); + return retval; } TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, From b6979032552be72b1f42c386d2c603e17c83c8aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:57:37 +0100 Subject: [PATCH 240/299] nouveau: fix function cast warnings [ Upstream commit 0affdba22aca5573f9d989bcb1d71d32a6a03efe ] clang-16 warns about casting between incompatible function types: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c:161:10: error: cast from 'void (*)(const struct firmware *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 161 | .fini = (void(*)(void *))release_firmware, This one was done to use the generic shadow_fw_release() function as a callback for struct nvbios_source. Change it to use the same prototype as the other five instances, with a trivial helper function that actually calls release_firmware. Fixes: 70c0f263cc2e ("drm/nouveau/bios: pull in basic vbios subdev, more to come later") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240213095753.455062-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fc..8c2bf1c16f2a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; From b5bf39cd08780fd435f027e699dbec83658f4cab Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: [PATCH 241/299] x86/numa: Fix the address overlap check in numa_fill_memblks() [ Upstream commit 9b99c17f7510bed2adbe17751fb8abddba5620bc ] numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 19 +++++++------------ include/linux/memblock.h | 2 ++ mm/memblock.c | 5 +++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa39d678fe81..e60c61b8bbc6 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -971,14 +971,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -990,17 +988,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1c1072e3ca06..ed57c23f80ac 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 6d18485571b4..d630f5c2bdb9 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } From 3a0060d2ba7c30dafee30a178643507f5b1f5b19 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:51 -0800 Subject: [PATCH 242/299] x86/numa: Fix the sort compare func used in numa_fill_memblks() [ Upstream commit b626070ffc14acca5b87a2aa5f581db98617584c ] The compare function used to sort memblks into starting address order fails when the result of its u64 address subtraction gets truncated to an int upon return. The impact of the bad sort is that memblks will be filled out incorrectly. Depending on the set of memblks, a user may see no errors at all but still have a bad fill, or see messages reporting a node overlap that leads to numa init failure: [] node 0 [mem: ] overlaps with node 1 [mem: ] [] No NUMA configuration found Replace with a comparison that can only result in: 1, 0, -1. Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/99dcb3ae87e04995e9f293f6158dc8fa0749a487.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams Signed-off-by: Sasha Levin --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e60c61b8bbc6..dae5c952735c 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -961,7 +961,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; From 37067e6bc241280744b969b69e88f014934a31bb Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 14 Feb 2024 12:27:17 +0300 Subject: [PATCH 243/299] net: stmmac: Fix incorrect dereference in interrupt handlers [ Upstream commit 97dde84026339e4b4af9a6301f825d1828d7874b ] If 'dev' or 'data' is NULL, the 'priv' variable has an incorrect address when dereferencing calling netdev_err(). Since we get as 'dev_id' or 'data' what was passed as the 'dev' argument to request_irq() during interrupt initialization (that is, the net_device and rx/tx queue pointers initialized at the time of the call) and since there are usually no checks for the 'dev_id' argument in such handlers in other drivers, remove these checks from the handlers in stmmac driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX") Signed-off-by: Pavel Sakharov Reviewed-by: Serge Semin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f1614ad2daaa..5b3423d1af3f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5951,11 +5951,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5971,11 +5966,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -5997,11 +5987,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6028,11 +6013,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; From 6634a8ecacc6ceaf242c81baa77659611d3b95b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:06 +0000 Subject: [PATCH 244/299] ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid [ Upstream commit 081a0e3b0d4c061419d3f4679dec9f68725b17e4 ] net->dev_base_seq and ipv4.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 0465277f6b3f ("ipv4: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ca0ff15dc8fa..bc74f131fe4d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1825,6 +1825,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; From cf761c81e413c9ed1e92ac47951548108370a139 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:07 +0000 Subject: [PATCH 245/299] ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid [ Upstream commit e898e4cd1aab271ca414f9ac6e08e4c761f6913c ] net->dev_base_seq and ipv6.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 63998ac24f83 ("ipv6: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Signed-off-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b007d098ffe2..7881446a46c4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -706,6 +706,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +755,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5358,7 +5373,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; From ae24a16a834324f9277c3162178c9a3fed9a8e44 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 15 Feb 2024 10:27:31 -0800 Subject: [PATCH 246/299] net: bcmasp: Indicate MAC is in charge of PHY PM [ Upstream commit 5b76d928f8b779a1b19c5842e7cabee4cbb610c3 ] Avoid the PHY library call unnecessarily into the suspend/resume functions by setting phydev->mac_managed_pm to true. The ASP driver essentially does exactly what mdio_bus_phy_resume() does. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Florian Fainelli Signed-off-by: Justin Chen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 53e542881255..9cae5a309000 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -1048,6 +1048,9 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) netdev_err(dev, "could not attach to PHY\n"); goto err_phy_disable; } + + /* Indicate that the MAC is responsible for PHY PM */ + phydev->mac_managed_pm = true; } else if (!intf->wolopts) { ret = phy_resume(dev->phydev); if (ret) From 7bcb0a2510ce5b43de5c5bee62f6e28f2537713b Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Thu, 15 Feb 2024 10:27:32 -0800 Subject: [PATCH 247/299] net: bcmasp: Sanity check is off by one [ Upstream commit f120e62e37f0af4c4cbe08e5a88ea60a6a17c858 ] A sanity check for OOB write is off by one leading to a false positive when the array is full. Fixes: 9b90aca97f6d ("net: ethernet: bcmasp: fix possible OOB write in bcmasp_netfilt_get_all_active()") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/asp2/bcmasp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index 41a6098eb0c2..4b6bf2764bef 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -535,9 +535,6 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, int j = 0, i; for (i = 0; i < NUM_NET_FILTERS; i++) { - if (j == *rule_cnt) - return -EMSGSIZE; - if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -547,6 +544,9 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, priv->net_filters[i - 1].wake_filter) continue; + if (j == *rule_cnt) + return -EMSGSIZE; + rule_locs[j++] = priv->net_filters[i].fs.location; } From b8315b2e25b4e68e42fcb74630f824b9a5067765 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 15 Feb 2024 16:18:33 -0600 Subject: [PATCH 248/299] powerpc/pseries/iommu: DLPAR add doesn't completely initialize pci_controller [ Upstream commit a5c57fd2e9bd1c8ea8613a8f94fd0be5eccbf321 ] When a PCI device is dynamically added, the kernel oopses with a NULL pointer dereference: BUG: Kernel NULL pointer dereference on read at 0x00000030 Faulting instruction address: 0xc0000000006bbe5c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs xsk_diag bonding nft_compat nf_tables nfnetlink rfkill binfmt_misc dm_multipath rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c mlx5_core mlxfw sd_mod t10_pi sg tls ibmvscsi ibmveth scsi_transport_srp vmx_crypto pseries_wdt psample dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 17 PID: 2685 Comm: drmgr Not tainted 6.7.0-203405+ #66 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c0000000006bbe5c LR: c000000000a13e68 CTR: c0000000000579f8 REGS: c00000009924f240 TRAP: 0300 Not tainted (6.7.0-203405+) MSR: 8000000000009033 CR: 24002220 XER: 20040006 CFAR: c000000000a13e64 DAR: 0000000000000030 DSISR: 40000000 IRQMASK: 0 ... NIP sysfs_add_link_to_group+0x34/0x94 LR iommu_device_link+0x5c/0x118 Call Trace: iommu_init_device+0x26c/0x318 (unreliable) iommu_device_link+0x5c/0x118 iommu_init_device+0xa8/0x318 iommu_probe_device+0xc0/0x134 iommu_bus_notifier+0x44/0x104 notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 bus_notify+0x50/0x7c device_add+0x640/0x918 pci_device_add+0x23c/0x298 of_create_pci_dev+0x400/0x884 of_scan_pci_dev+0x124/0x1b0 __of_scan_bus+0x78/0x18c pcibios_scan_phb+0x2a4/0x3b0 init_phb_dynamic+0xb8/0x110 dlpar_add_slot+0x170/0x3b8 [rpadlpar_io] add_slot_store.part.0+0xb4/0x130 [rpadlpar_io] kobj_attr_store+0x2c/0x48 sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x350/0x4a0 ksys_write+0x84/0x140 system_call_exception+0x124/0x330 system_call_vectored_common+0x15c/0x2ec Commit a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") broke DLPAR add of PCI devices. The above added iommu_device structure to pci_controller. During system boot, PCI devices are discovered and this newly added iommu_device structure is initialized by a call to iommu_device_register(). During DLPAR add of a PCI device, a new pci_controller structure is allocated but there are no calls made to iommu_device_register() interface. Fix is to register the iommu device during DLPAR add as well. Fixes: a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") Signed-off-by: Gaurav Batra Reviewed-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240215221833.4817-1-gbatra@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/ppc-pci.h | 10 ++++++++++ arch/powerpc/kernel/iommu.c | 23 ++++++++++++++++------ arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index d9fcff575027..2689e7139b9e 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -30,6 +30,16 @@ void *pci_traverse_device_nodes(struct device_node *start, void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \ + defined(CONFIG_PPC_POWERNV)) +extern void ppc_iommu_register_device(struct pci_controller *phb); +extern void ppc_iommu_unregister_device(struct pci_controller *phb); +#else +static inline void ppc_iommu_register_device(struct pci_controller *phb) { } +static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { } +#endif + + /* From rtas_pci.h */ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 14251bc5219e..efaca0c6eff9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1344,7 +1344,7 @@ static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) struct pci_controller *hose; if (!dev_is_pci(dev)) - return ERR_PTR(-EPERM); + return ERR_PTR(-ENODEV); pdev = to_pci_dev(dev); hose = pdev->bus->sysdata; @@ -1393,6 +1393,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1403,11 +1418,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - iommu_device_sysfs_add(&hose->iommu, hose->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - hose->global_number); - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, - hose->parent); + ppc_iommu_register_device(hose); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4ba824568119..4448386268d9 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); + ppc_iommu_register_device(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + ppc_iommu_unregister_device(phb); + pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ From 3e831970cf7f9731c55cfff9f775e40d4104b989 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Feb 2024 10:33:25 +0800 Subject: [PATCH 249/299] selftests: bonding: set active slave to primary eth1 specifically [ Upstream commit cd65c48d66920457129584553f217005d09b1edb ] In bond priority testing, we set the primary interface to eth1 and add eth0,1,2 to bond in serial. This is OK in normal times. But when in debug kernel, the bridge port that eth0,1,2 connected would start slowly (enter blocking, forwarding state), which caused the primary interface down for a while after enslaving and active slave changed. Here is a test log from Jakub's debug test[1]. [ 400.399070][ T50] br0: port 1(s0) entered disabled state [ 400.400168][ T50] br0: port 4(s2) entered disabled state [ 400.941504][ T2791] bond0: (slave eth0): making interface the new active one [ 400.942603][ T2791] bond0: (slave eth0): Enslaving as an active interface with an up link [ 400.943633][ T2766] br0: port 1(s0) entered blocking state [ 400.944119][ T2766] br0: port 1(s0) entered forwarding state [ 401.128792][ T2792] bond0: (slave eth1): making interface the new active one [ 401.130771][ T2792] bond0: (slave eth1): Enslaving as an active interface with an up link [ 401.131643][ T69] br0: port 2(s1) entered blocking state [ 401.132067][ T69] br0: port 2(s1) entered forwarding state [ 401.346201][ T2793] bond0: (slave eth2): Enslaving as a backup interface with an up link [ 401.348414][ T50] br0: port 4(s2) entered blocking state [ 401.348857][ T50] br0: port 4(s2) entered forwarding state [ 401.519669][ T250] bond0: (slave eth0): link status definitely down, disabling slave [ 401.526522][ T250] bond0: (slave eth1): link status definitely down, disabling slave [ 401.526986][ T250] bond0: (slave eth2): making interface the new active one [ 401.629470][ T250] bond0: (slave eth0): link status definitely up [ 401.630089][ T250] bond0: (slave eth1): link status definitely up [...] # TEST: prio (active-backup ns_ip6_target primary_reselect 1) [FAIL] # Current active slave is eth2 but not eth1 Fix it by setting active slave to primary slave specifically before testing. [1] https://netdev-3.bots.linux.dev/vmksft-bonding-dbg/results/464301/1-bond-options-sh/stdout Fixes: 481b56e0391e ("selftests: bonding: re-format bond option tests") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index d508486cc0bd..9a3d3c389dad 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -62,6 +62,8 @@ prio_test() # create bond bond_reset "${param}" + # set active_slave to primary eth1 specifically + ip -n ${s_ns} link set bond0 type bond active_slave eth1 # check bonding member prio value ip -n ${s_ns} link set eth0 type bond_slave prio 0 From d4c58764dab89d3f071e395c826dfd76f65978bb Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 16 Feb 2024 23:44:57 +0530 Subject: [PATCH 250/299] ata: ahci_ceva: fix error handling for Xilinx GT PHY support [ Upstream commit 26c8404e162b43dddcb037ba2d0cb58c0ed60aab ] Platform clock and phy error resources are not cleaned up in Xilinx GT PHY error path. To fix introduce the function ceva_ahci_platform_enable_resources() which is a customized version of ahci_platform_enable_resources() and inline with SATA IP programming sequence it does: - Assert SATA reset - Program PS GTR phy - Bring SATA by de-asserting the reset - Wait for GT lane PLL to be locked ceva_ahci_platform_enable_resources() is also used in the resume path as the same SATA programming sequence (as in probe) should be followed. Also cleanup the mixed usage of ahci_platform_enable_resources() and custom implementation in the probe function as both are not required. Fixes: 9a9d3abe24bb ("ata: ahci: ceva: Update the driver to support xilinx GT phy") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/ahci_ceva.c | 125 +++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 64f7f7d6ba84..11a2c199a7c2 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static const struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; From 8327ed12e8ebc5436bfaa1786c49988894f9c8a6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Feb 2024 13:12:17 -0800 Subject: [PATCH 251/299] bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel [ Upstream commit 0281b919e175bb9c3128bd3872ac2903e9436e3f ] The following race is possible between bpf_timer_cancel_and_free and bpf_timer_cancel. It will lead a UAF on the timer->timer. bpf_timer_cancel(); spin_lock(); t = timer->time; spin_unlock(); bpf_timer_cancel_and_free(); spin_lock(); t = timer->timer; timer->timer = NULL; spin_unlock(); hrtimer_cancel(&t->timer); kfree(t); /* UAF on t */ hrtimer_cancel(&t->timer); In bpf_timer_cancel_and_free, this patch frees the timer->timer after a rcu grace period. This requires a rcu_head addition to the "struct bpf_hrtimer". Another kfree(t) happens in bpf_timer_init, this does not need a kfree_rcu because it is still under the spin_lock and timer->timer has not been visible by others yet. In bpf_timer_cancel, rcu_read_lock() is added because this helper can be used in a non rcu critical section context (e.g. from a sleepable bpf prog). Other timer->timer usages in helpers.c have been audited, bpf_timer_cancel() is the only place where timer->timer is used outside of the spin_lock. Another solution considered is to mark a t->flag in bpf_timer_cancel and clear it after hrtimer_cancel() is done. In bpf_timer_cancel_and_free, it busy waits for the flag to be cleared before kfree(t). This patch goes with a straight forward solution and frees timer->timer after a rcu grace period. Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240215211218.990808-1-martin.lau@linux.dev Signed-off-by: Sasha Levin --- kernel/bpf/helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e68ef39cda67..a5ce840f4fbe 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1100,6 +1100,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1328,6 +1329,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1349,6 +1351,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1403,7 +1406,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) From 287a0e6d3a62ecedc930714e2b89e1fe1cba0e62 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 15 Feb 2024 13:51:45 -0800 Subject: [PATCH 252/299] parisc: Fix stack unwinder [ Upstream commit 882a2a724ee964c1ebe7268a91d5c8c8ddc796bf ] Debugging shows a large number of unaligned access traps in the unwinder code. Code analysis reveals a number of issues with this code: - handle_interruption is passed twice through dereference_kernel_function_descriptor() - ret_from_kernel_thread, syscall_exit, intr_return, _switch_to_ret, and _call_on_stack are passed through dereference_kernel_function_descriptor() even though they are not declared as function pointers. To fix the problems, drop one of the calls to dereference_kernel_function_descriptor() for handle_interruption, and compare the other pointers directly. Fixes: 6414b30b39f9 ("parisc: unwind: Avoid missing prototype warning for handle_interruption()") Fixes: 8e0ba125c2bf ("parisc/unwind: fix unwinder when CONFIG_64BIT is enabled") Cc: Helge Deller Cc: Sven Schnelle Cc: John David Anglin Cc: Charlie Jenkins Cc: David Laight Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- arch/parisc/kernel/unwind.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 27ae40a443b8..f7e0fee5ee55 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -228,10 +228,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int #ifdef CONFIG_IRQSTACKS extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - void *ptr; - ptr = dereference_kernel_function_descriptor(&handle_interruption); - if (pc_is_kernel_fn(pc, ptr)) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -239,13 +237,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || - pc_is_kernel_fn(pc, syscall_exit)) { + if (pc == (unsigned long)&ret_from_kernel_thread || + pc == (unsigned long)&syscall_exit) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc_is_kernel_fn(pc, intr_return)) { + if (pc == (unsigned long)&intr_return) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -257,14 +255,14 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int } if (pc_is_kernel_fn(pc, _switch_to) || - pc_is_kernel_fn(pc, _switch_to_ret)) { + pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc_is_kernel_fn(pc, _call_on_stack)) { + if (pc == (unsigned long)&_call_on_stack) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; From 6e6065dd25b661420fac19c34282b6c626fcd35e Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 19 Feb 2024 14:39:03 +0000 Subject: [PATCH 253/299] afs: Increase buffer size in afs_update_volume_status() [ Upstream commit 6ea38e2aeb72349cad50e38899b0ba6fbcb2af3d ] The max length of volume->vid value is 20 characters. So increase idbuf[] size up to 24 to avoid overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. [DH: Actually, it's 20 + NUL, so increase it to 24 and use snprintf()] Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Daniil Dulov Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240211150442.3416-1-d.dulov@aladdin.ru/ # v1 Link: https://lore.kernel.org/r/20240212083347.10742-1-d.dulov@aladdin.ru/ # v2 Link: https://lore.kernel.org/r/20240219143906.138346-3-dhowells@redhat.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/afs/volume.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 115c081a8e2c..c028598a903c 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -337,7 +337,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -345,7 +345,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { From 9e02973dbc6a91e40aa4f5d87b8c47446fbfce44 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 254/299] ipv6: sr: fix possible use-after-free and null-ptr-deref [ Upstream commit 5559cea2d5aa3018a5f00dd2aca3427ba09b386b ] The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9f..35508abd76f4 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } From 919092bd5482b7070ae66d1daef73b600738f3a2 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:34:00 +0300 Subject: [PATCH 255/299] devlink: fix possible use-after-free and memory leaks in devlink_init() [ Upstream commit def689fc26b9a9622d2e2cb0c4933dd3b1c8071c ] The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Make an unregister in case of unsuccessful registration. Fixes: 687125b5799c ("devlink: split out core code") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215203400.29976-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/devlink/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 6cec4afb01fb..451f2bc141a0 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -308,14 +308,20 @@ static int __init devlink_init(void) { int err; - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; + err = genl_register_family(&devlink_nl_family); + if (err) + goto out_unreg_pernet_subsys; err = register_netdevice_notifier(&devlink_port_netdevice_nb); + if (!err) + return 0; + genl_unregister_family(&devlink_nl_family); + +out_unreg_pernet_subsys: + unregister_pernet_subsys(&devlink_pernet_ops); out: WARN_ON(err); return err; From a3f2c083cb575d80a7627baf3339e78fedccbb91 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 256/299] arp: Prevent overflow in arp_req_get(). [ Upstream commit a7d6027790acea24446ddd6632d394096c0f4667 ] syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 Reported-by: syzkaller Reported-by: Bjoern Doebel Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5..0d0d725b46ad 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; From 7c892383227f4248794e05529aaaf325fe1e558c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:32 +0000 Subject: [PATCH 257/299] arm64/sme: Restore SME registers on exit from suspend [ Upstream commit 9533864816fb4a6207c63b7a98396351ce1a9fae ] The fields in SMCR_EL1 and SMPRI_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. The vector length will be restored as part of restoring the SME state for the next SME using task. Fixes: a1f4ccd25cc2 ("arm64/sme: Provide Kconfig for SME") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-1-17e05e493471@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 8df46f186c64..b54506d70738 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -360,6 +360,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -395,6 +396,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t sme_state_size(struct task_struct const *task) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1e1e0511c008..ce0bc01b4208 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1406,6 +1406,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 0fbdf5fe64d8..045af2bfd656 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } From 79491ddfb42951f68eb727be60a1a6bd7803fbce Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:33 +0000 Subject: [PATCH 258/299] arm64/sme: Restore SMCR_EL1.EZT0 on exit from suspend [ Upstream commit d7b77a0d565b048cb0808fa8a4fb031352b22a01 ] The fields in SMCR_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. Do so for SMCR_EL1.EZT0. Fixes: d4913eee152d ("arm64/sme: Add basic enumeration for SME2") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-2-17e05e493471@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ce0bc01b4208..5cdfcc9e3e54 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1415,6 +1415,8 @@ void sme_suspend_exit(void) if (system_supports_fa64()) smcr |= SMCR_ELx_FA64; + if (system_supports_sme2()) + smcr |= SMCR_ELx_EZT0; write_sysreg_s(smcr, SYS_SMCR_EL1); write_sysreg_s(0, SYS_SMPRI_EL1); From f9f8f23c5851f78ff1a32c4c503bcd6ff1f61fcd Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 20:23:11 -0600 Subject: [PATCH 259/299] platform/x86: thinkpad_acpi: Only update profile if successfully converted [ Upstream commit 427c70dec738318b7f71e1b9d829ff0e9771d493 ] Randomly a Lenovo Z13 will trigger a kernel warning traceback from this condition: ``` if (WARN_ON((profile < 0) || (profile >= ARRAY_SIZE(profile_names)))) ``` This happens because thinkpad-acpi always assumes that convert_dytc_to_profile() successfully updated the profile. On the contrary a condition can occur that when dytc_profile_refresh() is called the profile doesn't get updated as there is a -EOPNOTSUPP branch. Catch this situation and avoid updating the profile. Also log this into dynamic debugging in case any other modes should be added in the future. Fixes: c3bfcd4c6762 ("platform/x86: thinkpad_acpi: Add platform profile support") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217022311.113879-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d73cbae4aa21..89c37a83d7fc 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10308,6 +10308,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10493,8 +10494,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } From 16bc939f224dfeed6cafbb78860179d4c2586e8c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 20 Feb 2024 14:12:51 +0100 Subject: [PATCH 260/299] drm/i915/tv: Fix TV mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fb1e881273f432e593f8789f99e725b09304cc97 ] Commit 1fd4a5a36f9f ("drm/connector: Rename legacy TV property") failed to update all the users of the struct drm_tv_connector_state mode field, which resulted in a build failure in i915. However, a subsequent commit in the same series reintroduced a mode field in that structure, with a different semantic but the same type, with the assumption that all previous users were updated. Since that didn't happen, the i915 driver now compiles, but mixes accesses to the legacy_mode field and the newer mode field, but with the previous semantics. This obviously doesn't work very well, so we need to update the accesses that weren't in the legacy renaming commit. Fixes: 1fd4a5a36f9f ("drm/connector: Rename legacy TV property") Reported-by: Ville Syrjälä Signed-off-by: Maxime Ripard Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240220131251.453060-1-mripard@kernel.org (cherry picked from commit bf7626f19d6ff14b9722273e23700400cc4d78ba) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 10 +++++----- drivers/gpu/drm/i915/display/intel_tv.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 0ce935efe5df..18ae41d5f4f9 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1212,7 +1212,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, struct intel_sdvo_tv_format format; u32 format_map; - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memset(&format, 0, sizeof(format)); memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map))); @@ -2295,7 +2295,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector) * Read the list of supported input resolutions for the selected TV * format. */ - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memcpy(&tv_res, &format_map, min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request))); @@ -2360,7 +2360,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, int i; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) { + if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) { *val = i; return 0; @@ -2416,7 +2416,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); if (property == intel_sdvo_connector->tv_format) { - state->tv.mode = intel_sdvo_connector->tv_format_supported[val]; + state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val]; if (state->crtc) { struct drm_crtc_state *crtc_state = @@ -3071,7 +3071,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, drm_property_add_enum(intel_sdvo_connector->tv_format, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); - intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; + intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, intel_sdvo_connector->tv_format, 0); return true; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index d84a79491da2..042ed966807e 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state, static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) { - int format = conn_state->tv.mode; + int format = conn_state->tv.legacy_mode; return &tv_modes[format]; } @@ -1710,7 +1710,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - connector->state->tv.mode = i; + connector->state->tv.legacy_mode = i; } static int @@ -1865,7 +1865,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector, old_state = drm_atomic_get_old_connector_state(state, connector); new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); - if (old_state->tv.mode != new_state->tv.mode || + if (old_state->tv.legacy_mode != new_state->tv.legacy_mode || old_state->tv.margins.left != new_state->tv.margins.left || old_state->tv.margins.right != new_state->tv.margins.right || old_state->tv.margins.top != new_state->tv.margins.top || @@ -1902,7 +1902,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) conn_state->tv.margins.right = 46; conn_state->tv.margins.bottom = 37; - conn_state->tv.mode = 0; + conn_state->tv.legacy_mode = 0; /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { @@ -1916,7 +1916,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) drm_object_attach_property(&connector->base, i915->drm.mode_config.legacy_tv_mode_property, - conn_state->tv.mode); + conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, i915->drm.mode_config.tv_left_margin_property, conn_state->tv.margins.left); From 734b494eac2f93cf878e083ee6655e095722f39b Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Feb 2024 18:25:14 +0530 Subject: [PATCH 261/299] octeontx2-af: Consider the action set by PF [ Upstream commit 3b1ae9b71c2a97f848b00fb085a2bd29bddbe8d9 ] AF reserves MCAM entries for each PF, VF present in the system and populates the entry with DMAC and action with default RSS so that basic packet I/O works. Since PF/VF is not aware of the RSS action installed by AF, AF only fixup the actions of the rules installed by PF/VF with corresponding default RSS action. This worked well for rules installed by PF/VF for features like RX VLAN offload and DMAC filters but rules involving action like drop/forward to queue are also getting modified by AF. Hence fix it by setting the default RSS action only if requested by PF/VF. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 3784347b6fd8..55639c133dd0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -437,6 +437,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); From 0a32395fd1e3f12e7bc6f0e21bead374d107b2a5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 19 Feb 2024 08:40:15 -0600 Subject: [PATCH 262/299] net: ipa: don't overrun IPA suspend interrupt registers [ Upstream commit d80f8e96d47d7374794a30fbed69be43f3388afc ] In newer hardware, IPA supports more than 32 endpoints. Some registers--such as IPA interrupt registers--represent endpoints as bits in a 4-byte register, and such registers are repeated as needed to represent endpoints beyond the first 32. In ipa_interrupt_suspend_clear_all(), we clear all pending IPA suspend interrupts by reading all status register(s) and writing corresponding registers to clear interrupt conditions. Unfortunately the number of registers to read/write is calculated incorrectly, and as a result we access *many* more registers than intended. This bug occurs only when the IPA hardware signals a SUSPEND interrupt, which happens when a packet is received for an endpoint (or its underlying GSI channel) that is suspended. This situation is difficult to reproduce, but possible. Fix this by correctly computing the number of interrupt registers to read and write. This is the only place in the code where registers that map endpoints or channels this way perform this calculation. Fixes: f298ba785e2d ("net: ipa: add a parameter to suspend registers") Signed-off-by: Alex Elder Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ipa/ipa_interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4bc05948f772..a78c692f2d3c 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -212,7 +212,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) u32 unit_count; u32 unit; - unit_count = roundup(ipa->endpoint_count, 32); + unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32); for (unit = 0; unit < unit_count; unit++) { const struct reg *reg; u32 val; From ef6566d10cf760190c705ea5c45c5a98c449fb85 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Feb 2024 20:48:14 -0400 Subject: [PATCH 263/299] s390: use the correct count for __iowrite64_copy() [ Upstream commit 723a2cc8d69d4342b47dfddbfe6c19f1b135f09b ] The signature for __iowrite64_copy() requires the number of 64 bit quantities, not bytes. Multiple by 8 to get to a byte length before invoking zpci_memcpy_toio() Fixes: 87bc359b9822 ("s390/pci: speed up __iowrite64_copy by using pci store block insn") Acked-by: Niklas Schnelle Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-9223d11a7662+1d7785-s390_iowrite64_jgg@nvidia.com Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index d34d5813d006..777362cb4ea8 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -241,7 +241,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, From 9b099ed46dcaf1403c531ff02c3d7400fa37fa26 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 19 Feb 2024 00:09:33 +0900 Subject: [PATCH 264/299] bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready() [ Upstream commit 4cd12c6065dfcdeba10f49949bffcf383b3952d8 ] syzbot reported the following NULL pointer dereference issue [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:0x0 [...] Call Trace: sk_psock_verdict_data_ready+0x232/0x340 net/core/skmsg.c:1230 unix_stream_sendmsg+0x9b4/0x1230 net/unix/af_unix.c:2293 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 If sk_psock_verdict_data_ready() and sk_psock_stop_verdict() are called concurrently, psock->saved_data_ready can be NULL, causing the above issue. This patch fixes this issue by calling the appropriate data ready function using the sk_psock_data_ready() helper and protecting it from concurrency with sk->sk_callback_lock. Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Daniel Borkmann Tested-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=fd7b34375c1c8ce29c93 [1] Link: https://lore.kernel.org/bpf/20240218150933.6004-1-syoshida@redhat.com Signed-off-by: Sasha Levin --- net/core/skmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93ecfceac1bc..4d75ef9d24bf 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } From 50b30655b2246ac21352d2ae5ea4690e75cecf37 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Sat, 3 Feb 2024 21:26:40 +0000 Subject: [PATCH 265/299] cache: ax45mp_cache: Align end size to cache boundary in ax45mp_dma_cache_wback() [ Upstream commit 9bd405c48b0ac4de087c0c4440fd79597201b8a7 ] Align the end size to cache boundary size in ax45mp_dma_cache_wback() callback likewise done in ax45mp_dma_cache_inv() callback. Additionally return early in case of start == end. Fixes: d34599bcd2e4 ("cache: Add L2 cache management for Andes AX45MP RISC-V core") Reported-by: Pavel Machek Link: https://lore.kernel.org/cip-dev/ZYsdKDiw7G+kxQ3m@duo.ucw.cz/ Signed-off-by: Lad Prabhakar Signed-off-by: Conor Dooley Signed-off-by: Sasha Levin --- drivers/cache/ax45mp_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c index 57186c58dc84..1d7dd3d2c101 100644 --- a/drivers/cache/ax45mp_cache.c +++ b/drivers/cache/ax45mp_cache.c @@ -129,8 +129,12 @@ static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size) unsigned long line_size; unsigned long flags; + if (unlikely(start == end)) + return; + line_size = ax45mp_priv.ax45mp_cache_line_size; start = start & (~(line_size - 1)); + end = ((end + line_size - 1) & (~(line_size - 1))); local_irq_save(flags); ax45mp_cpu_dcache_wb_range(start, end); local_irq_restore(flags); From f006c45a3ea424f8f6c8e4b9283bc245ce2a4d0f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 21 Feb 2024 06:01:20 -0800 Subject: [PATCH 266/299] hwmon: (nct6775) Fix access to temperature configuration registers [ Upstream commit d56e460e19ea8382f813eb489730248ec8d7eb73 ] The number of temperature configuration registers does not always match the total number of temperature registers. This can result in access errors reported if KASAN is enabled. BUG: KASAN: global-out-of-bounds in nct6775_probe+0x5654/0x6fe9 nct6775_core Reported-by: Erhard Furtner Closes: https://lore.kernel.org/linux-hwmon/d51181d1-d26b-42b2-b002-3f5a4037721f@roeck-us.net/ Fixes: b7f1f7b2523a ("hwmon: (nct6775) Additional TEMP registers for nct6799") Cc: Ahmad Khalifa Tested-by: Ahmad Khalifa Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/nct6775-core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 92a49fafe2c0..f3bf2e4701c3 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -3512,6 +3512,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; + int num_reg_temp_config; struct device *hwmon_dev; struct sensor_template_group tsi_temp_tg; @@ -3594,6 +3595,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3669,6 +3671,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3746,6 +3749,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6775_REG_TEMP_CONFIG); reg_temp_alternate = NCT6775_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6775_REG_TEMP_CRIT; @@ -3821,6 +3825,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6776_REG_TEMP_CONFIG); reg_temp_alternate = NCT6776_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6776_REG_TEMP_CRIT; @@ -3900,6 +3905,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4034,6 +4040,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4123,6 +4130,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6798_REG_TEMP_OVER; reg_temp_hyst = NCT6798_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6798_REG_TEMP_CRIT; @@ -4204,7 +4212,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, = reg_temp_crit[src - 1]; if (reg_temp_crit_l && reg_temp_crit_l[i]) data->reg_temp[4][src - 1] = reg_temp_crit_l[i]; - data->reg_temp_config[src - 1] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[src - 1] = reg_temp_config[i]; data->temp_src[src - 1] = src; continue; } @@ -4217,7 +4226,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, data->reg_temp[0][s] = reg_temp[i]; data->reg_temp[1][s] = reg_temp_over[i]; data->reg_temp[2][s] = reg_temp_hyst[i]; - data->reg_temp_config[s] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[s] = reg_temp_config[i]; if (reg_temp_crit_h && reg_temp_crit_h[i]) data->reg_temp[3][s] = reg_temp_crit_h[i]; else if (reg_temp_crit[src - 1]) From 80b1d6a0c0c06d664b01a8bc17c0ad1d10136688 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 267/299] tls: break out of main loop when PEEK gets a non-data record [ Upstream commit 10f41d0710fc81b7af93fa6106678d57b1ff24a7 ] PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e1f8ff6e9a73..67c8323b7cd1 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2064,6 +2064,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } From 3b952d8fdfcf6fd8ea0b8954bc9277642cf0977f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 268/299] tls: stop recv() if initial process_rx_list gave us non-DATA [ Upstream commit fdfbaec5923d9359698cbb286bc0deadbb717504 ] If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 67c8323b7cd1..a83b6119f382 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1971,7 +1971,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); From 4f13a79ea3cf5c131464ce9130c60f0eeaf2abf6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 269/299] tls: don't skip over different type records from the rx_list [ Upstream commit ec823bf3a479d42c589dc0f28ef4951c49cd2d2a ] If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a83b6119f382..5238886e6186 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1772,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1785,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1803,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1844,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1947,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1966,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2130,10 +2136,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; From f2135bbf14949687e96cabb13d8a91ae3deb9069 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2024 16:58:04 +0100 Subject: [PATCH 270/299] netfilter: nf_tables: set dormant flag on hook register failure [ Upstream commit bccebf64701735533c8db37773eeacc6566cc8ec ] We need to set the dormant flag again if we fail to register the hooks. During memory pressure hook registration can fail and we end up with a table marked as active but no registered hooks. On table/base chain deletion, nf_tables will attempt to unregister the hook again which yields a warn splat from the nftables core. Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cb7d42a3faab..40e8aa8343cc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1253,6 +1253,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } From 558b00a30e05753a62ecc7e05e939ca8f0241148 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: [PATCH 271/299] netfilter: nft_flow_offload: reset dst in route object after setting up flow [ Upstream commit 9e0f0430389be7696396c62f037be4bf72cf93e3 ] dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 692d5955911c..4a767b3d20b9 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -275,7 +275,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 920a5a29ae1d..7502d6d73a60 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -146,7 +156,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); From 9256ab9232e35a16af9c30fa4e522e6d1bd3605a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Feb 2024 21:36:39 +0100 Subject: [PATCH 272/299] netfilter: nft_flow_offload: release dst in case direct xmit path is used [ Upstream commit 8762785f459be1cfe6fcf7285c123aad6a3703f0 ] Direct xmit does not use it since it calls dev_queue_xmit() to send packets, hence it calls dst_release(). kmemleak reports: unreferenced object 0xffff88814f440900 (size 184): comm "softirq", pid 0, jiffies 4294951896 hex dump (first 32 bytes): 00 60 5b 04 81 88 ff ff 00 e6 e8 82 ff ff ff ff .`[............. 21 0b 50 82 ff ff ff ff 00 00 00 00 00 00 00 00 !.P............. backtrace (crc cb2bf5d6): [<000000003ee17107>] kmem_cache_alloc+0x286/0x340 [<0000000021a5de2c>] dst_alloc+0x43/0xb0 [<00000000f0671159>] rt_dst_alloc+0x2e/0x190 [<00000000fe5092c9>] __mkroute_output+0x244/0x980 [<000000005fb96fb0>] ip_route_output_flow+0xc0/0x160 [<0000000045367433>] nf_ip_route+0xf/0x30 [<0000000085da1d8e>] nf_route+0x2d/0x60 [<00000000d1ecd1cb>] nft_flow_route+0x171/0x6a0 [nft_flow_offload] [<00000000d9b2fb60>] nft_flow_offload_eval+0x4e8/0x700 [nft_flow_offload] [<000000009f447dbb>] expr_call_ops_eval+0x53/0x330 [nf_tables] [<00000000072e1be6>] nft_do_chain+0x17c/0x840 [nf_tables] [<00000000d0551029>] nft_do_chain_inet+0xa1/0x210 [nf_tables] [<0000000097c9d5c6>] nf_hook_slow+0x5b/0x160 [<0000000005eccab1>] ip_forward+0x8b6/0x9b0 [<00000000553a269b>] ip_rcv+0x221/0x230 [<00000000412872e5>] __netif_receive_skb_one_core+0xfe/0x110 Fixes: fa502c865666 ("netfilter: flowtable: simplify route logic") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7502d6d73a60..a0571339239c 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: From fe9f4d1c531ae6271e8e0c12c6dc270bd9ce1b7e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Feb 2024 19:43:53 +0100 Subject: [PATCH 273/299] netfilter: nf_tables: register hooks last when adding new chain/flowtable [ Upstream commit d472e9853d7b46a6b094224d131d09ccd3a03daf ] Register hooks last when adding chain/flowtable to ensure that packets do not walk over datastructure that is being released in the error path without waiting for the rcu grace period. Fixes: 91c7b38dc9f0 ("netfilter: nf_tables: use new transaction infrastructure to handle chain") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 78 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 40e8aa8343cc..36fdce00bdab 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -686,15 +686,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -703,22 +704,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -2506,19 +2507,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2526,17 +2523,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -8334,9 +8336,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8416,34 +8418,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, extack, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); From 73a7cdb487797f1b84822663c0fa3409bfb93b8a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:38:45 +0100 Subject: [PATCH 274/299] netfilter: nf_tables: use kzalloc for hook allocation [ Upstream commit 195e5f88c2e48330ba5483e0bad2de3b3fad484f ] KMSAN reports unitialized variable when registering the hook, reg->hook_ops_type == NF_HOOK_OP_BPF) ~~~~~~~~~~~ undefined This is a small structure, just use kzalloc to make sure this won't happen again when new fields get added to nf_hook_ops. Fixes: 7b4b2fa37587 ("netfilter: annotate nf_tables base hook ops") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 36fdce00bdab..8808d78d6523 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2084,7 +2084,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; From 18a3d49aee3173fa0152a044ce607e6e01632444 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 15 Feb 2024 15:53:08 +0800 Subject: [PATCH 275/299] net: mctp: put sock on tag allocation failure [ Upstream commit 9990889be14288d4f1743e4768222d5032a79c27 ] We may hold an extra reference on a socket if a tag allocation fails: we optimistically allocate the sk_key, and take a ref there, but do not drop if we end up not using the allocated key. Ensure we're dropping the sock on this failure by doing a proper unref rather than directly kfree()ing. Fixes: de8a6b15d965 ("net: mctp: add an explicit reference from a mctp_sk_key to sock") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce9b61e44d1cdae7797be0c5e3141baf582d23a0.1707983487.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa54b..6218dcd07e18 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } From 91addaf5f02b47e82bfd01c4b5100742c5138580 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Feb 2024 08:11:11 -0800 Subject: [PATCH 276/299] tools: ynl: make sure we always pass yarg to mnl_cb_run [ Upstream commit e4fe082c38cd74a8fa384bc7542cf3edf1cb7318 ] There is one common error handler in ynl - ynl_cb_error(). It expects priv to be a pointer to struct ynl_parse_arg AKA yarg. To avoid potential crashes if we encounter a stray NLMSG_ERROR always pass yarg as priv (or a struct which has it as the first member). ynl_cb_null() has a similar problem directly - it expects yarg but priv passed by the caller is ys. Found by code inspection. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240220161112.2735195-2-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/net/ynl/lib/ynl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 514e0d69e731..0a7fe6a13f7b 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -450,6 +450,8 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) int ynl_recv_ack(struct ynl_sock *ys, int ret) { + struct ynl_parse_arg yarg = { .ys = ys, }; + if (!ret) { yerr(ys, YNL_ERROR_EXPECT_ACK, "Expecting an ACK but nothing received"); @@ -462,7 +464,7 @@ int ynl_recv_ack(struct ynl_sock *ys, int ret) return ret; } return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, - ynl_cb_null, ys); + ynl_cb_null, &yarg); } int ynl_cb_null(const struct nlmsghdr *nlh, void *data) @@ -725,11 +727,14 @@ err_free: static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) { - return ynl_ntf_parse((struct ynl_sock *)data, nlh); + struct ynl_parse_arg *yarg = data; + + return ynl_ntf_parse(yarg->ys, nlh); } int ynl_ntf_check(struct ynl_sock *ys) { + struct ynl_parse_arg yarg = { .ys = ys, }; ssize_t len; int err; @@ -751,7 +756,7 @@ int ynl_ntf_check(struct ynl_sock *ys) return len; err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_ntf_trampoline, ys, + ynl_ntf_trampoline, &yarg, ynl_cb_array, NLMSG_MIN_TYPE); if (err < 0) return err; From a702e98833420f386cff1986f1517e8cf6585e25 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Feb 2024 08:11:12 -0800 Subject: [PATCH 277/299] tools: ynl: don't leak mcast_groups on init error [ Upstream commit 5d78b73e851455d525a064f3b042b29fdc0c1a4a ] Make sure to free the already-parsed mcast_groups if we don't get an ack from the kernel when reading family info. This is part of the ynl_sock_create() error path, so we won't get a call to ynl_sock_destroy() to free them later. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240220161112.2735195-3-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/net/ynl/lib/ynl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 0a7fe6a13f7b..11a7a889d279 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -572,7 +572,13 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) return err; } - return ynl_recv_ack(ys, err); + err = ynl_recv_ack(ys, err); + if (err < 0) { + free(ys->mcast_groups); + return err; + } + + return 0; } struct ynl_sock * From 30d8d56aee704326e4d5282b556c49e239ee10be Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 20 Feb 2024 08:52:45 +0100 Subject: [PATCH 278/299] devlink: fix port dump cmd type [ Upstream commit 61c43780e9444123410cd48c2483e01d2b8f75e8 ] Unlike other commands, due to a c&p error, port dump fills-up cmd with wrong value, different from port-get request cmd, port-get doit reply and port notification. Fix it by filling cmd with value DEVLINK_CMD_PORT_NEW. Skimmed through devlink userspace implementations, none of them cares about this cmd value. Only ynl, for which, this is actually a fix, as it expects doit and dumpit ops rsp_value to be the same. Omit the fixes tag, even thought this is fix, better to target this for next release. Fixes: bfcd3a466172 ("Introduce devlink infrastructure") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240220075245.75416-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 91ba1ca0f355..9b5ff0fccefd 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -574,7 +574,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, + DEVLINK_CMD_PORT_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); From 88895d42472099c3c9bf9e113366f7f289102ab8 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 20 Feb 2024 08:59:28 +0000 Subject: [PATCH 279/299] net/sched: flower: Add lock protection when remove filter handle [ Upstream commit 1fde0ca3a0de7e9f917668941156959dd5e9108b ] As IDR can't protect itself from the concurrent modification, place idr_remove() under the protection of tp->lock. Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Reviewed-by: Gal Pressman Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240220085928.9161-1-jianbol@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_flower.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b73..6ee7064c82fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ unbind_filter: } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); From 9adfd66b42d28a1763292bae5c5a163d7dce0cd6 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 19 Feb 2024 09:00:43 +0100 Subject: [PATCH 280/299] net: sparx5: Add spinlock for frame transmission from CPU [ Upstream commit 603ead96582d85903baec2d55f021b8dac5c25d2 ] Both registers used when doing manual injection or fdma injection are shared between all the net devices of the switch. It was noticed that when having two process which each of them trying to inject frames on different ethernet ports, that the HW started to behave strange, by sending out more frames then expected. When doing fdma injection it is required to set the frame in the DCB and then make sure that the next pointer of the last DCB is invalid. But because there is no locks for this, then easily this pointer between the DCB can be broken and then it would create a loop of DCBs. And that means that the HW will continuously transmit these frames in a loop. Until the SW will break this loop. Therefore to fix this issue, add a spin lock for when accessing the registers for manual or fdma injection. Signed-off-by: Horatiu Vultur Reviewed-by: Daniel Machon Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Link: https://lore.kernel.org/r/20240219080043.1561014-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index dc9af480bfea..8f116982c08a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6f565c0c0c3d..316fed5f2735 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -280,6 +280,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc..ac7e1cffbcec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; From 3ebd19073efde6ff4a54246cc68a0414a401d918 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:13 +0200 Subject: [PATCH 281/299] phonet: take correct lock to peek at the RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3b2d9bc4d4acdf15a876eae2c0d83149250e85ba ] The receive queue is protected by its embedded spin-lock, not the socket lock, so we need the former lock here (and only that one). Fixes: 107d0d9b8d9a ("Phonet: Phonet datagram transport protocol") Reported-by: Luosili Signed-off-by: Rémi Denis-Courmont Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240218081214.4806-1-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 3aa50dc7535b..976fe250b509 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: From 0a9f558c72c47472c38c05fcb72c70abb9104277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:14 +0200 Subject: [PATCH 282/299] phonet/pep: fix racy skb_queue_empty() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7d2a894d7f487dcb894df023e9d3014cf5b93fe5 ] The receive queues are protected by their respective spin-lock, not the socket lock. This could lead to skb_peek() unexpectedly returning NULL or a pointer to an already dequeued socket buffer. Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Signed-off-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240218081214.4806-2-remi@remlab.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/phonet/pep.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index faba31f2eff2..3dd5f52bc1b5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); @@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - *karg = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - *karg = skb_peek(&sk->sk_receive_queue)->len; - else - *karg = 0; - release_sock(sk); + *karg = pep_first_packet_length(sk); ret = 0; break; From 8fbc19196dbe710e2f54fad7fbb10d2fa8353832 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 19 Feb 2024 14:52:54 +0100 Subject: [PATCH 283/299] Fix write to cloned skb in ipv6_hop_ioam() [ Upstream commit f198d933c2e4f8f89e0620fbaf1ea7eac384a0eb ] ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam(). It also updates ip6_parse_tlv() to refresh the network header pointer ("nh") when returning from ipv6_hop_ioam(). Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Paolo Abeni Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv6/exthdrs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..02e9ffb63af1 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: From c7818378953d1522d68e9b9e84c8ad6761f5686f Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 20 Feb 2024 12:30:07 +0530 Subject: [PATCH 284/299] net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY [ Upstream commit 3489182b11d35f1944c1245fc9c4867cf622c50f ] Commit bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") extended support of the driver from the existing support for RTL8211F(D)(I)-CG PHY to the newer RTL8211F(D)(I)-VD-CG PHY. While that commit indicated that the RTL8211F_PHYCR2 register is not supported by the "VD-CG" PHY model and therefore updated the corresponding section in rtl8211f_config_init() to be invoked conditionally, the call to "genphy_soft_reset()" was left as-is, when it should have also been invoked conditionally. This is because the call to "genphy_soft_reset()" was first introduced by the commit 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock") since the RTL8211F guide indicates that a PHY reset should be issued after setting bits in the PHYCR2 register. As the PHYCR2 register is not applicable to the "VD-CG" PHY model, fix the rtl8211f_config_init() function by invoking "genphy_soft_reset()" conditionally based on the presence of the "PHYCR2" register. Fixes: bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") Signed-off-by: Siddharth Vadapalli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220070007.968762-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 894172a3e15f..337899c69738 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -421,9 +421,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_suspend(struct phy_device *phydev) From 716cfee8053e608b4ff8c698e91843bd985f4553 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Fri, 19 Jan 2024 08:32:06 -0800 Subject: [PATCH 285/299] drm/syncobj: call drm_syncobj_fence_add_wait when WAIT_AVAILABLE flag is set [ Upstream commit 3c43177ffb54ea5be97505eb8e2690e99ac96bc9 ] When waiting for a syncobj timeline point whose fence has not yet been submitted with the WAIT_FOR_SUBMIT flag, a callback is registered using drm_syncobj_fence_add_wait and the thread is put to sleep until the timeout expires. If the fence is submitted before then, drm_syncobj_add_point will wake up the sleeping thread immediately which will proceed to wait for the fence to be signaled. However, if the WAIT_AVAILABLE flag is used instead, drm_syncobj_fence_add_wait won't get called, meaning the waiting thread will always sleep for the full timeout duration, even if the fence gets submitted earlier. If it turns out that the fence *has* been submitted by the time it eventually wakes up, it will still indicate to userspace that the wait completed successfully (it won't return -ETIME), but it will have taken much longer than it should have. To fix this, we must call drm_syncobj_fence_add_wait if *either* the WAIT_FOR_SUBMIT flag or the WAIT_AVAILABLE flag is set. The only difference being that with WAIT_FOR_SUBMIT we will also wait for the fence to be signaled after it has been submitted while with WAIT_AVAILABLE we will return immediately. IGT test patch: https://lists.freedesktop.org/archives/igt-dev/2024-January/067537.html v1 -> v2: adjust lockdep_assert_none_held_once condition (cherry picked from commit 8c44ea81634a4a337df70a32621a5f3791be23df) Fixes: 01d6c3578379 ("drm/syncobj: add support for timeline point wait v8") Signed-off-by: Erik Kurzinger Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240119163208.3723457-1-ekurzinger@nvidia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_syncobj.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 01da6789d044..b3433265be6a 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1034,7 +1034,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1103,7 +1104,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } From 20e1e1a2b8a4525301a76bd9afb856a7606a3a34 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Wed, 21 Feb 2024 10:44:28 -0800 Subject: [PATCH 286/299] drm/syncobj: handle NULL fence in syncobj_eventfd_entry_func [ Upstream commit 2aa6f5b0fd052e363bb9d4b547189f0bf6b3d6d3 ] During syncobj_eventfd_entry_func, dma_fence_chain_find_seqno may set the fence to NULL if the given seqno is signaled and a later seqno has already been submitted. In that case, the eventfd should be signaled immediately which currently does not happen. This is a similar issue to the one addressed by commit b19926d4f3a6 ("drm/syncobj: Deal with signalled fences in drm_syncobj_find_fence."). As a fix, if the return value of dma_fence_chain_find_seqno indicates success but it sets the fence to NULL, we will assign a stub fence to ensure the following code still signals the eventfd. v1 -> v2: assign a stub fence instead of signaling the eventfd Signed-off-by: Erik Kurzinger Fixes: c7a472297169 ("drm/syncobj: add IOCTL to register an eventfd") Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240221184527.37667-1-ekurzinger@nvidia.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_syncobj.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index b3433265be6a..5860428da8de 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1380,10 +1380,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, /* This happens inside the syncobj lock */ fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + if (!fence) + return; + ret = dma_fence_chain_find_seqno(&fence, entry->point); - if (ret != 0 || !fence) { + if (ret != 0) { + /* The given seqno has not been submitted yet. */ dma_fence_put(fence); return; + } else if (!fence) { + /* If dma_fence_chain_find_seqno returns 0 but sets the fence + * to NULL, it implies that the given seqno is signaled and a + * later seqno has already been submitted. Assign a stub fence + * so that the eventfd still gets signaled below. + */ + fence = dma_fence_get_stub(); } list_del_init(&entry->node); From 7a8a8a6a4f1e1a0585b2bed983b0c71e35901821 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 22 Feb 2024 12:49:33 +0500 Subject: [PATCH 287/299] selftests/iommu: fix the config fragment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 510325e5ac5f45c1180189d3bfc108c54bf64544 ] The config fragment doesn't follow the correct format to enable those config options which make the config options getting missed while merging with other configs. ➜ merge_config.sh -m .config tools/testing/selftests/iommu/config Using .config as base Merging tools/testing/selftests/iommu/config ➜ make olddefconfig .config:5295:warning: unexpected data: CONFIG_IOMMUFD .config:5296:warning: unexpected data: CONFIG_IOMMUFD_TEST While at it, add CONFIG_FAULT_INJECTION as well which is needed for CONFIG_IOMMUFD_TEST. If CONFIG_FAULT_INJECTION isn't present in base config (such as x86 defconfig), CONFIG_IOMMUFD_TEST doesn't get enabled. Fixes: 57f0988706fe ("iommufd: Add a selftest") Link: https://lore.kernel.org/r/20240222074934.71380-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- tools/testing/selftests/iommu/config | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config index 6c4f901d6fed..110d73917615 100644 --- a/tools/testing/selftests/iommu/config +++ b/tools/testing/selftests/iommu/config @@ -1,2 +1,3 @@ -CONFIG_IOMMUFD -CONFIG_IOMMUFD_TEST +CONFIG_IOMMUFD=y +CONFIG_FAULT_INJECTION=y +CONFIG_IOMMUFD_TEST=y From 10c6b90e975358c17856a578419dc449887899c2 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:50:50 +0100 Subject: [PATCH 288/299] drm/amd/display: Fix memory leak in dm_sw_fini() [ Upstream commit bae67893578d608e35691dcdfa90c4957debf1d3 ] After destroying dmub_srv, the memory associated with it is not freed, causing a memory leak: unreferenced object 0xffff896302b45800 (size 1024): comm "(udev-worker)", pid 222, jiffies 4294894636 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 6265fd77): [] kmalloc_trace+0x29d/0x340 [] dm_dmub_sw_init+0xb4/0x450 [amdgpu] [] dm_sw_init+0x15/0x2b0 [amdgpu] [] amdgpu_device_init+0x1417/0x24e0 [amdgpu] [] amdgpu_driver_load_kms+0x15/0x190 [amdgpu] [] amdgpu_pci_probe+0x187/0x4e0 [amdgpu] [] local_pci_probe+0x3e/0x90 [] pci_device_probe+0xc3/0x230 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] do_one_initcall+0x41/0x300 Fix this by freeing dmub_srv after destroying it. Fixes: 743b9786b14a ("drm/amd/display: Hook up the DMUB service in DM") Signed-off-by: Armin Wolf Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3194c10f345f..50444ab7b3cc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2247,6 +2247,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } From b81fc6c229ec6fc6f8d21ed68eb9dc2fc1cdd360 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 21 Feb 2024 20:27:13 +0100 Subject: [PATCH 289/299] i2c: imx: when being a target, mark the last read as processed [ Upstream commit 87aec499368d488c20292952d6d4be7cb9e49c5e ] When being a target, NAK from the controller means that all bytes have been transferred. So, the last byte needs also to be marked as 'processed'. Otherwise index registers of backends may not increase. Fixes: f7414cd6923f ("i2c: imx: support slave mode for imx I2C driver") Signed-off-by: Corey Minyard Tested-by: Andrew Manley Reviewed-by: Andrew Manley Reviewed-by: Oleksij Rempel [wsa: fixed comment and commit message to properly describe the case] Signed-off-by: Wolfram Sang Signed-off-by: Andi Shyti Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-imx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 1775a79aeba2..0951bfdc89cf 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -803,6 +803,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } From a259173bf2659e933fcb2ac56681a91adda2777f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:53 +0100 Subject: [PATCH 290/299] selftests: mptcp: join: stop transfer when check is done (part 1) commit 31ee4ad86afd6ed6f4bb1b38c43011216080c42a upstream. Since the "Fixes" commit mentioned below, "userspace pm" subtests of mptcp_join selftests introduced in v6.5 are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds in slow environments. Note that old versions will need commit bdbef0a6ff10 ("selftests: mptcp: add mptcp_lib_kill_wait") as well to get 'mptcp_lib_kill_wait()' helper. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org # 6.5.x: bdbef0a6ff10: selftests: mptcp: add mptcp_lib_kill_wait Cc: stable@vger.kernel.org # 6.5.x Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-8-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 67ca22856d54..a0584417bcab 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3430,7 +3430,7 @@ userspace_tests() chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3449,7 +3449,7 @@ userspace_tests() chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi } From 7a3610956d3bddacca2fcc2259295d1ad56556cb Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Thu, 8 Feb 2024 02:32:54 +0000 Subject: [PATCH 291/299] mm/zswap: invalidate duplicate entry when !zswap_enabled commit 678e54d4bb9a4822f8ae99690ac131c5d490cdb1 upstream. We have to invalidate any duplicate entry even when !zswap_enabled since zswap can be disabled anytime. If the folio store success before, then got dirtied again but zswap disabled, we won't invalidate the old duplicate entry in the zswap_store(). So later lru writeback may overwrite the new data in swapfile. Link: https://lkml.kernel.org/r/20240208023254.3873823-1-chengming.zhou@linux.dev Fixes: 42c06a0e8ebe ("mm: kill frontswap") Signed-off-by: Chengming Zhou Acked-by: Johannes Weiner Cc: Nhat Pham Cc: Yosry Ahmed Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index 37d2b1cb2ecb..63fb94d68e10 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1215,7 +1215,7 @@ bool zswap_store(struct folio *folio) if (folio_test_large(folio)) return false; - if (!zswap_enabled || !tree) + if (!tree) return false; /* @@ -1231,6 +1231,9 @@ bool zswap_store(struct folio *folio) } spin_unlock(&tree->lock); + if (!zswap_enabled) + return false; + /* * XXX: zswap reclaim does not work with cgroups yet. Without a * cgroup-aware entry LRU, we will push out entries system-wide based on From 6156277d1b26cb3fdb6fcbf0686ab78268571644 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 25 Jan 2024 08:51:27 +0000 Subject: [PATCH 292/299] mm: zswap: fix missing folio cleanup in writeback race path commit e3b63e966cac0bf78aaa1efede1827a252815a1d upstream. In zswap_writeback_entry(), after we get a folio from __read_swap_cache_async(), we grab the tree lock again to check that the swap entry was not invalidated and recycled. If it was, we delete the folio we just added to the swap cache and exit. However, __read_swap_cache_async() returns the folio locked when it is newly allocated, which is always true for this path, and the folio is ref'd. Make sure to unlock and put the folio before returning. This was discovered by code inspection, probably because this path handles a race condition that should not happen often, and the bug would not crash the system, it will only strand the folio indefinitely. Link: https://lkml.kernel.org/r/20240125085127.1327013-1-yosryahmed@google.com Fixes: 04fc7816089c ("mm: fix zswap writeback race condition") Signed-off-by: Yosry Ahmed Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Domenico Cerasuolo Cc: Signed-off-by: Andrew Morton Signed-off-by: Yosry Ahmed Signed-off-by: Greg Kroah-Hartman --- mm/zswap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index 63fb94d68e10..69681b9173fd 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1100,6 +1100,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry, if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(page_folio(page)); + unlock_page(page); + put_page(page); ret = -ENOMEM; goto fail; } From 358f02b84fbb56c040c06904512b0e83ba7e23b3 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:54 +0100 Subject: [PATCH 293/299] selftests: mptcp: join: stop transfer when check is done (part 2) commit 04b57c9e096a9479fe0ad31e3956e336fa589cb2 upstream. Since the "Fixes" commits mentioned below, the newly added "userspace pm" subtests of mptcp_join selftests are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds on slow environments. While at it, use 'mptcp_lib_kill_wait()' helper everywhere, instead of on a specific one with 'kill_tests_wait()'. Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Cc: stable@vger.kernel.org Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-9-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a0584417bcab..bc85570a6b26 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -688,13 +688,6 @@ kill_events_pids() mptcp_lib_kill_wait $evts_ns2_pid } -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait -} - pm_nl_set_limits() { local ns=$1 @@ -3463,7 +3456,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3478,7 +3472,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3487,7 +3481,8 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 chk_subflow_nr "before delete" 2 @@ -3502,7 +3497,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } From 697128a3e2e6e6612ae827cfd850cbb7e6bc02f9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:55 -0800 Subject: [PATCH 294/299] selftests: mptcp: add mptcp_lib_get_counter commit 61c131f5d4d2b79904af2fdcb2839a9db8e7c55c upstream. To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. The helper get_counter() in mptcp_join.sh and get_mib_counter() in mptcp_connect.sh have the same functionality, export get_counter() into mptcp_lib.sh and rename it as mptcp_lib_get_counter(). Use this new helper instead of get_counter() and get_mib_counter(). Use this helper in test_prio() in userspace_pm.sh too instead of open-coding. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-11-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- .../selftests/net/mptcp/mptcp_connect.sh | 41 +++------ .../testing/selftests/net/mptcp/mptcp_join.sh | 88 ++++++++----------- .../testing/selftests/net/mptcp/mptcp_lib.sh | 16 ++++ .../selftests/net/mptcp/userspace_pm.sh | 14 +-- 4 files changed, 73 insertions(+), 86 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b1fc8afd072d..10cd322e05c4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -341,21 +341,6 @@ do_ping() return 0 } -# $1: ns, $2: MIB counter -get_mib_counter() -{ - local listener_ns="${1}" - local mib="${2}" - - # strip the header - ip netns exec "${listener_ns}" \ - nstat -z -a "${mib}" | \ - tail -n+2 | \ - while read a count c rest; do - echo $count - done -} - # $1: ns, $2: port wait_local_port_listen() { @@ -441,12 +426,12 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -509,11 +494,11 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -542,8 +527,8 @@ do_transfer() fi if $checksum; then - local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index bc85570a6b26..a72104dae2b9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -611,25 +611,9 @@ wait_local_port_listen() done } -# $1: ns ; $2: counter -get_counter() -{ - local ns="${1}" - local counter="${2}" - local count - - count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') - if [ -z "${count}" ]; then - mptcp_lib_fail_if_expected_feature "${counter} counter" - return 1 - fi - - echo "${count}" -} - rm_addr_count() { - get_counter "${1}" "MPTcpExtRmAddr" + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -649,7 +633,7 @@ wait_rm_addr() rm_sf_count() { - get_counter "${1}" "MPTcpExtRmSubflow" + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" } # $1: ns, $2: old rm_sf counter in $ns @@ -672,11 +656,11 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done @@ -1271,7 +1255,7 @@ chk_csum_nr() fi print_check "sum" - count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi @@ -1284,7 +1268,7 @@ chk_csum_nr() print_ok fi print_check "csum" - count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi @@ -1328,7 +1312,7 @@ chk_fail_nr() fi print_check "ftx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi @@ -1342,7 +1326,7 @@ chk_fail_nr() fi print_check "failrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi @@ -1375,7 +1359,7 @@ chk_fclose_nr() fi print_check "ctx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then @@ -1386,7 +1370,7 @@ chk_fclose_nr() fi print_check "fclzrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then @@ -1416,7 +1400,7 @@ chk_rst_nr() fi print_check "rtx" - count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1428,7 +1412,7 @@ chk_rst_nr() fi print_check "rstrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1449,7 +1433,7 @@ chk_infi_nr() local count print_check "itx" - count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_tx" ]; then @@ -1459,7 +1443,7 @@ chk_infi_nr() fi print_check "infirx" - count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_rx" ]; then @@ -1488,7 +1472,7 @@ chk_join_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1499,7 +1483,7 @@ chk_join_nr() print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1516,7 +1500,7 @@ chk_join_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1549,8 +1533,8 @@ chk_stale_nr() print_check "stale" - stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") - recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then print_skip elif [ $stale_nr -lt $stale_min ] || @@ -1587,7 +1571,7 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1599,7 +1583,7 @@ chk_add_nr() fi print_check "echo" - count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1610,7 +1594,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1620,7 +1604,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1631,7 +1615,7 @@ chk_add_nr() fi print_check "synack" - count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1642,7 +1626,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1653,7 +1637,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1664,7 +1648,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1686,7 +1670,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add TX" - count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1698,7 +1682,7 @@ chk_add_tx_nr() fi print_check "echo TX" - count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_tx_nr" ]; then @@ -1736,7 +1720,7 @@ chk_rm_nr() fi print_check "rm" - count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_nr" ]; then @@ -1746,13 +1730,13 @@ chk_rm_nr() fi print_check "rmsf" - count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip elif [ -n "$simult" ]; then local cnt suffix - cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable @@ -1778,7 +1762,7 @@ chk_rm_tx_nr() local rm_addr_tx_nr=$1 print_check "rm TX" - count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_tx_nr" ]; then @@ -1795,7 +1779,7 @@ chk_prio_nr() local count print_check "ptx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_tx" ]; then @@ -1805,7 +1789,7 @@ chk_prio_nr() fi print_check "prx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_rx" ]; then @@ -1905,7 +1889,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(get_counter ${ns} "TcpAttemptFails") + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 4cd4297ca86d..2b10f200de40 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -216,3 +216,19 @@ mptcp_lib_kill_wait() { kill "${1}" > /dev/null 2>&1 wait "${1}" 2>/dev/null } + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 0e573c6c393a..0e748068ee95 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -887,9 +887,10 @@ test_prio() # Check TX print_test "MP_PRIO TX" - count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -897,9 +898,10 @@ test_prio() # Check RX print_test "MP_PRIO RX" - count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass From 6931f9029aa6cd1c0cb07e625e54be4891a6ac5e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 25 Oct 2023 16:37:04 -0700 Subject: [PATCH 295/299] mptcp: userspace pm send RM_ADDR for ID 0 commit 84c531f54ad9a124a924c9505d74e33d16965146 upstream. This patch adds the ability to send RM_ADDR for local ID 0. Check whether id 0 address is removed, if not, put id 0 into a removing list, pass it to mptcp_pm_remove_addr() to remove id 0 address. There is no reason not to allow the userspace to remove the initial address (ID 0). This special case was not taken into account not letting the userspace to delete all addresses as announced. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231025-send-net-next-20231025-v1-3-db8f25f798eb@kernel.org Signed-off-by: Jakub Kicinski Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_userspace.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 3b34b7cf56c9..ecd166ce047d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -220,6 +220,40 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (subflow->local_id == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -251,6 +285,11 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) goto remove_err; } + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + lock_sock((struct sock *)msk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { From 7e7a81f9f2da273dfc4a1a6cddbb78024ba3cd59 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:29 +0100 Subject: [PATCH 296/299] mptcp: add needs_id for netlink appending addr commit 584f3894262634596532cf43a5e782e34a0ce374 upstream. Just the same as userspace PM, a new parameter needs_id is added for in-kernel PM mptcp_pm_nl_append_new_local_addr() too. Add a new helper mptcp_pm_has_addr_attr_id() to check whether an address ID is set from PM or not. In mptcp_pm_nl_get_local_id(), needs_id is always true, but in mptcp_pm_nl_add_addr_doit(), pass mptcp_pm_has_addr_attr_id() to needs_it. Fixes: efd5a4c04e18 ("mptcp: add the address ID assignment bitmap") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm_netlink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index e504a1649da0..4dd47a1fb9aa 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -904,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -952,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -963,7 +964,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1095,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1311,6 +1312,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_addr_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1352,7 +1365,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; From 1f9b7a5d023a94666dcdaf8235f3008d4c1710ba Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 21 Sep 2023 17:33:51 +0800 Subject: [PATCH 297/299] ata: ahci: add identifiers for ASM2116 series adapters commit 3bf6141060948e27b62b13beb216887f2e54591e upstream. Add support for PCIe SATA adapter cards based on Asmedia 2116 controllers. These cards can provide up to 10 SATA ports on PCIe card. Signed-off-by: Szuying Chen Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 20761eeea410..e017ec3847f2 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -612,6 +612,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ + { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ + { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ + { PCI_VDEVICE(ASMEDIA, 0x1165), board_ahci }, /* ASM1165 */ + { PCI_VDEVICE(ASMEDIA, 0x1166), board_ahci }, /* ASM1166 */ /* * Samsung SSDs found on some macbooks. NCQ times out if MSI is From f6e4aca0dd8cd6dcffe8f5655f6459a38f5afa13 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 30 Jan 2024 15:21:51 +0200 Subject: [PATCH 298/299] ahci: Extend ASM1061 43-bit DMA address quirk to other ASM106x parts commit 51af8f255bdaca6d501afc0d085b808f67b44d91 upstream. ASMedia have confirmed that all ASM106x parts currently listed in ahci_pci_tbl[] suffer from the 43-bit DMA address limitation that we ran into on the ASM1061, and therefore, we need to apply the quirk added by commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers") to the other supported ASM106x parts as well. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-ide/ZbopwKZJAKQRA4Xv@x1-carbon/ Signed-off-by: Lennert Buytenhek [cassel: add link to ASMedia confirmation email] Signed-off-by: Niklas Cassel Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e017ec3847f2..90d33c519f4c 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -605,13 +605,13 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* ASMedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ From 0700f4e154f88d6ddf4913fa2fce5c6e0314960a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Mar 2024 13:35:11 +0100 Subject: [PATCH 299/299] Linux 6.6.19 Link: https://lore.kernel.org/r/20240227131625.847743063@linuxfoundation.org Tested-by: Luna Jernberg Tested-by: SeongJae Park Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: Kelsey Steele Tested-by: Takeshi Ogasawara Tested-by: Jon Hunter Tested-by: Conor Dooley Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Harshit Mogalapalli Tested-by: Ron Economos Tested-by: kernelci.org bot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b7198af9e59b..6130b6bd8d6c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 18 +SUBLEVEL = 19 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth